Property-Based Testing in Python with Hypothesis
February 10, 2026 · 5 min read
Example-based tests check specific inputs. Property-based tests check invariants that should hold for any valid input. Hypothesis generates hundreds or thousands of inputs automatically, shrinks failures to minimal examples, and finds edge cases — empty strings, zero, INT_MAX, Unicode characters, NaN — that humans forget to test.
The Core Idea
Instead of testing validate_email("user@example.com") == True, you test properties:
- "Any valid email should pass validation"
- "Any string with spaces should fail validation"
- "Parsing then serializing should round-trip"
- "Sorting then checking is_sorted should always be True"
from hypothesis import given, strategies as st
@given(st.text())
def test_split_and_join_roundtrip(s):
"""Joining split parts should reconstruct the original string."""
delimiter = ","
parts = s.split(delimiter)
reconstructed = delimiter.join(parts)
assert reconstructed == sRun it — Hypothesis generates hundreds of strings automatically.
Strategies: Generating Test Data
Strategies describe what kind of data to generate:
from hypothesis import given, strategies as st
from hypothesis.strategies import SearchStrategy
# Primitives
st.integers() # Any integer
st.integers(min_value=0, max_value=100) # Bounded integers
st.floats(allow_nan=False, allow_infinity=False)
st.text() # Any Unicode text
st.text(alphabet=st.characters(whitelist_categories=["Lu", "Ll"])) # Letters only
st.booleans()
st.none()
# Collections
st.lists(st.integers(), min_size=1, max_size=100)
st.dictionaries(st.text(), st.integers())
st.tuples(st.integers(), st.text(), st.booleans())
st.sets(st.text())
# Complex
st.emails()
st.urls()
st.datetimes()
st.uuids()
st.binary()
# Composing
st.one_of(st.integers(), st.text(), st.none()) # Union types
st.sampled_from(["admin", "editor", "viewer"]) # Fixed choicesTesting a Real Validator
from hypothesis import given, assume, strategies as st
import re
def validate_slug(slug: str) -> bool:
return bool(re.match(r'^[a-z0-9]+(?:-[a-z0-9]+)*$', slug))
@given(st.text(
alphabet=st.characters(whitelist_categories=["Ll", "Nd"]), # lowercase + digits
min_size=1
))
def test_lowercase_alphanumeric_is_always_valid(slug):
"""Any lowercase alphanumeric string without hyphens should be valid."""
assume(slug) # Skip empty strings
# No hyphens in this test — basic alphanumeric always passes
if all(c.isalnum() and (c.islower() or c.isdigit()) for c in slug):
assert validate_slug(slug)
@given(st.text(alphabet=" \t\n\r"))
def test_whitespace_always_invalid(s):
assume(s)
assert not validate_slug(s)
@given(st.text())
def test_slug_with_uppercase_invalid(s):
if any(c.isupper() for c in s):
assert not validate_slug(s)Finding Real Bugs with Hypothesis
Here's a real example where Hypothesis finds a bug:
def parse_date_range(start: str, end: str) -> tuple[date, date]:
"""Parse date strings and return (start, end). Raises ValueError if end < start."""
s = datetime.strptime(start, "%Y-%m-%d").date()
e = datetime.strptime(end, "%Y-%m-%d").date()
if e < s:
raise ValueError("end must be after start")
return s, e
@given(
start=st.dates(min_value=date(2000, 1, 1)),
end=st.dates(min_value=date(2000, 1, 1)),
)
def test_parse_date_range_roundtrip(start, end):
assume(end >= start)
s_str = start.strftime("%Y-%m-%d")
e_str = end.strftime("%Y-%m-%d")
parsed_start, parsed_end = parse_date_range(s_str, e_str)
assert parsed_start == start
assert parsed_end == end
assert parsed_start <= parsed_endHypothesis will generate thousands of date pairs, including edge cases like 2000-01-01 to 2000-01-01 (same date), leap year dates, and year transitions.
Stateful Testing
RuleBasedStateMachine tests stateful objects by executing random sequences of operations and checking invariants after each step:
from hypothesis.stateful import RuleBasedStateMachine, rule, initialize, invariant
from hypothesis import strategies as st
class ShoppingCartMachine(RuleBasedStateMachine):
def __init__(self):
super().__init__()
self.cart = []
@initialize()
def init_cart(self):
self.cart = []
@rule(item=st.text(min_size=1), price=st.decimals(min_value=0, max_value=1000))
def add_item(self, item, price):
self.cart.append({"name": item, "price": float(price)})
@rule()
def remove_last_item(self):
if self.cart:
self.cart.pop()
@rule()
def clear_cart(self):
self.cart.clear()
@invariant()
def cart_total_is_non_negative(self):
total = sum(item["price"] for item in self.cart)
assert total >= 0, f"Cart total was negative: {total}"
@invariant()
def cart_count_matches_items(self):
assert len(self.cart) >= 0 # Cart length always non-negative
TestShoppingCart = ShoppingCartMachine.TestCaseHypothesis will generate sequences like: add 3 items, remove last, add 2 more, clear, add 1 — and check invariants after each step. It finds bugs in state machines that would take months to reproduce manually.
Composing Custom Strategies
Build domain-specific strategies for your data models:
from hypothesis import given, strategies as st
from datetime import datetime, timezone
@st.composite
def article_strategy(draw):
title = draw(st.text(min_size=1, max_size=200).filter(str.strip))
content = draw(st.text(min_size=10))
published = draw(st.booleans())
tags = draw(st.lists(
st.text(min_size=1, max_size=30, alphabet=st.characters(whitelist_categories=["Ll", "Nd"])),
max_size=10,
unique=True,
))
return {
"title": title,
"content": content,
"published": published,
"tags": tags,
}
@given(article_strategy())
def test_article_schema_validates_any_valid_input(article_data):
from myapp.schemas import ArticleSchema
# Should not raise ValidationError for any valid article
schema = ArticleSchema(**article_data)
assert schema.title == article_data["title"]
assert schema.content == article_data["content"]Integration with pytest
Hypothesis integrates with pytest naturally — add the @given decorator:
from hypothesis import given, settings, HealthCheck
from hypothesis import strategies as st
@given(st.lists(st.integers()))
@settings(max_examples=500) # Run 500 examples instead of default 100
def test_sort_is_idempotent(lst):
assert sorted(sorted(lst)) == sorted(lst)
@given(st.integers(), st.integers())
@settings(
max_examples=1000,
suppress_health_check=[HealthCheck.too_slow],
)
def test_addition_is_commutative(a, b):
assert a + b == b + aThe Hypothesis Database
When Hypothesis finds a failure, it saves the minimal failing example to .hypothesis/. On the next run, it checks the saved examples first — so once you've found a bug, Hypothesis never forgets it:
.hypothesis/
examples/
test_module/
test_function_name/
# Saved failure casesCommit this directory — it's part of your test infrastructure.
When to Use Property-Based Testing
Property-based testing shines for:
- Parser/serializer roundtrips — parse then serialize equals original
- Mathematical invariants — commutativity, associativity, idempotence
- Data transformation — any valid input produces valid output
- State machines — random operation sequences don't corrupt state
- Security-sensitive code — input validation, sanitization
It's less suited for:
- Tests that require specific fixed inputs (mocked external services)
- Tests checking specific business outcomes from known scenarios
- Snapshot tests
Property-based testing and example-based testing are complementary, not competing. Use both: examples for documenting expected behavior, Hypothesis for finding the unexpected.