Skip to content

re

re.compile() - Compile regular expression pattern.

re.search() - Search for pattern anywhere in string.

re.match() - Match pattern at beginning of string.

re.fullmatch() - Match entire string.

re.findall() - Find all non-overlapping matches.

re.finditer() - Iterator over match objects.

re.sub() - Replace pattern matches.

re.subn() - Replace and return count.

re.split() - Split string by pattern.

Pattern groups and capturing.

Regular expression flags.

Special character classes and escapes.

Pattern quantifiers (* + ? {m,n}).

Pattern anchors (^ $ \b \B).

Lookahead and lookbehind assertions.

Edge cases and special scenarios.

Basic: Compile simple pattern.

def test_compile_basic(self):
    """Basic: Compile simple pattern."""
    pattern = re.compile('hello')
    assert pattern.pattern == 'hello'

Verification: ✅ Tested in CI

Feature: Compile with flags.

def test_compile_with_flags(self):
    """Feature: Compile with flags."""
    pattern = re.compile('hello', re.IGNORECASE)
    assert pattern.flags & re.IGNORECASE

Verification: ✅ Tested in CI

Property: Compiled patterns can be reused.

def test_compile_reuse(self):
    """Property: Compiled patterns can be reused."""
    pattern = re.compile('\\d+')
    assert pattern.search('abc123')
    assert pattern.search('xyz789')

Verification: ✅ Tested in CI

Error: Invalid pattern raises re.error.

def test_compile_invalid_pattern_raises(self):
    """Error: Invalid pattern raises re.error."""
    with pytest.raises(re.error):
        re.compile('(?P<')

Verification: ✅ Tested in CI

Basic: Search finds pattern in string.

def test_search_basic(self):
    """Basic: Search finds pattern in string."""
    match = re.search('world', 'hello world')
    assert match is not None
    assert match.group() == 'world'

Verification: ✅ Tested in CI

Basic: No match returns None.

def test_search_no_match_returns_none(self):
    """Basic: No match returns None."""
    match = re.search('xyz', 'hello world')
    assert match is None

Verification: ✅ Tested in CI

Feature: Search finds first occurrence.

def test_search_finds_first_occurrence(self):
    """Feature: Search finds first occurrence."""
    match = re.search('\\d+', 'abc123def456')
    assert match.group() == '123'

Verification: ✅ Tested in CI

Property: Match object has position info.

def test_search_position(self):
    """Property: Match object has position info."""
    match = re.search('world', 'hello world')
    assert match.start() == 6
    assert match.end() == 11
    assert match.span() == (6, 11)

Verification: ✅ Tested in CI

Basic: Match at start of string.

def test_match_basic(self):
    """Basic: Match at start of string."""
    match = re.match('hello', 'hello world')
    assert match is not None
    assert match.group() == 'hello'

Verification: ✅ Tested in CI

Feature: Match requires pattern at start.

def test_match_middle_fails(self):
    """Feature: Match requires pattern at start."""
    match = re.match('world', 'hello world')
    assert match is None

Verification: ✅ Tested in CI

Property: match() anchors at start, search() doesn't.

def test_match_vs_search(self):
    """Property: match() anchors at start, search() doesn't."""
    text = 'abc123'
    assert re.match('\\d+', text) is None
    assert re.search('\\d+', text) is not None

Verification: ✅ Tested in CI

Basic: Fullmatch requires complete match.

def test_fullmatch_basic(self):
    """Basic: Fullmatch requires complete match."""
    match = re.fullmatch('hello', 'hello')
    assert match is not None

Verification: ✅ Tested in CI

Feature: Partial match fails.

def test_fullmatch_partial_fails(self):
    """Feature: Partial match fails."""
    match = re.fullmatch('hello', 'hello world')
    assert match is None

Verification: ✅ Tested in CI

Feature: Useful for validation.

def test_fullmatch_use_case(self):
    """Feature: Useful for validation."""
    pattern = '\\d{3}-\\d{3}-\\d{4}'
    assert re.fullmatch(pattern, '555-123-4567') is not None
    assert re.fullmatch(pattern, '555-123-4567x') is None

Verification: ✅ Tested in CI

Basic: Find all occurrences.

def test_findall_basic(self):
    """Basic: Find all occurrences."""
    matches = re.findall('\\d+', 'abc123def456ghi789')
    assert matches == ['123', '456', '789']

Verification: ✅ Tested in CI

Edge: No matches returns empty list.

def test_findall_no_matches(self):
    """Edge: No matches returns empty list."""
    matches = re.findall('\\d+', 'abcdef')
    assert matches == []

Verification: ✅ Tested in CI

Feature: Groups affect returned values.

def test_findall_with_groups(self):
    """Feature: Groups affect returned values."""
    matches = re.findall('(\\w+)@(\\w+)', 'alice@example bob@test')
    assert matches == [('alice', 'example'), ('bob', 'test')]

Verification: ✅ Tested in CI

Property: Matches are non-overlapping.

def test_findall_non_overlapping(self):
    """Property: Matches are non-overlapping."""
    matches = re.findall('.{2}', 'abcdef')
    assert matches == ['ab', 'cd', 'ef']

Verification: ✅ Tested in CI

Basic: Iterate over matches.

def test_finditer_basic(self):
    """Basic: Iterate over matches."""
    matches = list(re.finditer('\\d+', 'abc123def456'))
    assert len(matches) == 2
    assert matches[0].group() == '123'
    assert matches[1].group() == '456'

Verification: ✅ Tested in CI

Feature: Returns full match objects.

def test_finditer_match_objects(self):
    """Feature: Returns full match objects."""
    matches = list(re.finditer('\\d+', 'abc123def456'))
    assert matches[0].start() == 3
    assert matches[0].end() == 6
    assert matches[1].start() == 9
    assert matches[1].end() == 12

Verification: ✅ Tested in CI

Property: Iterator is memory-efficient.

def test_finditer_memory_efficient(self):
    """Property: Iterator is memory-efficient."""
    iterator = re.finditer('\\d+', 'a1b2c3d4e5')
    first = next(iterator)
    assert first.group() == '1'

Verification: ✅ Tested in CI

Basic: Replace pattern with string.

def test_sub_basic(self):
    """Basic: Replace pattern with string."""
    result = re.sub('\\d+', 'X', 'abc123def456')
    assert result == 'abcXdefX'

Verification: ✅ Tested in CI

Feature: Limit number of replacements.

def test_sub_count(self):
    """Feature: Limit number of replacements."""
    result = re.sub('\\d+', 'X', 'abc123def456', count=1)
    assert result == 'abcXdef456'

Verification: ✅ Tested in CI

Feature: Replacement can be a function.

def test_sub_with_function(self):
    """Feature: Replacement can be a function."""

    def double(match):
        return str(int(match.group()) * 2)
    result = re.sub('\\d+', double, 'abc5def10')
    assert result == 'abc10def20'

Verification: ✅ Tested in CI

Feature: Use backreferences in replacement.

def test_sub_backreferences(self):
    """Feature: Use backreferences in replacement."""
    result = re.sub('(\\w+) (\\w+)', '\\2 \\1', 'hello world')
    assert result == 'world hello'

Verification: ✅ Tested in CI

Basic: Returns tuple (new_string, count).

def test_subn_basic(self):
    """Basic: Returns tuple (new_string, count)."""
    result, count = re.subn('\\d+', 'X', 'abc123def456')
    assert result == 'abcXdefX'
    assert count == 2

Verification: ✅ Tested in CI

Edge: No matches returns count of 0.

def test_subn_no_matches(self):
    """Edge: No matches returns count of 0."""
    result, count = re.subn('\\d+', 'X', 'abcdef')
    assert result == 'abcdef'
    assert count == 0

Verification: ✅ Tested in CI

Basic: Split by pattern.

def test_split_basic(self):
    """Basic: Split by pattern."""
    parts = re.split('\\s+', 'hello world  from   python')
    assert parts == ['hello', 'world', 'from', 'python']

Verification: ✅ Tested in CI

Feature: Limit number of splits.

def test_split_maxsplit(self):
    """Feature: Limit number of splits."""
    parts = re.split('\\s+', 'a b c d', maxsplit=2)
    assert parts == ['a', 'b', 'c d']

Verification: ✅ Tested in CI

Feature: Capturing groups are included in result.

def test_split_with_groups(self):
    """Feature: Capturing groups are included in result."""
    parts = re.split('(\\s+)', 'hello world')
    assert parts == ['hello', ' ', 'world']

Verification: ✅ Tested in CI

Edge: Empty matches at edges.

def test_split_empty_matches(self):
    """Edge: Empty matches at edges."""
    parts = re.split('\\d+', 'a1b2c')
    assert parts == ['a', 'b', 'c']

Verification: ✅ Tested in CI

Basic: Access captured groups.

def test_groups_basic(self):
    """Basic: Access captured groups."""
    match = re.search('(\\w+)@(\\w+)', 'alice@example')
    assert match.group(0) == 'alice@example'
    assert match.group(1) == 'alice'
    assert match.group(2) == 'example'

Verification: ✅ Tested in CI

Feature: groups() returns all groups as tuple.

def test_groups_tuple(self):
    """Feature: groups() returns all groups as tuple."""
    match = re.search('(\\w+)@(\\w+)', 'alice@example')
    assert match.groups() == ('alice', 'example')

Verification: ✅ Tested in CI

Feature: Named groups with ?P.

def test_named_groups(self):
    """Feature: Named groups with ?P<name>."""
    match = re.search('(?P<user>\\w+)@(?P<domain>\\w+)', 'alice@example')
    assert match.group('user') == 'alice'
    assert match.group('domain') == 'example'

Verification: ✅ Tested in CI

Feature: groupdict() returns named groups as dict.

def test_groupdict(self):
    """Feature: groupdict() returns named groups as dict."""
    match = re.search('(?P<user>\\w+)@(?P<domain>\\w+)', 'alice@example')
    assert match.groupdict() == {'user': 'alice', 'domain': 'example'}

Verification: ✅ Tested in CI

Feature: (?:...) is non-capturing.

def test_non_capturing_group(self):
    """Feature: (?:...) is non-capturing."""
    match = re.search('(?:\\w+)@(\\w+)', 'alice@example')
    assert match.groups() == ('example',)

Verification: ✅ Tested in CI

Feature: IGNORECASE makes pattern case-insensitive.

def test_ignorecase_flag(self):
    """Feature: IGNORECASE makes pattern case-insensitive."""
    pattern = re.compile('hello', re.IGNORECASE)
    assert pattern.search('HELLO') is not None
    assert pattern.search('Hello') is not None

Verification: ✅ Tested in CI

Feature: MULTILINE makes ^ and $ match line boundaries.

def test_multiline_flag(self):
    """Feature: MULTILINE makes ^ and $ match line boundaries."""
    text = 'first line\nsecond line'
    assert len(re.findall('^second', text)) == 0
    assert len(re.findall('^second', text, re.MULTILINE)) == 1

Verification: ✅ Tested in CI

Feature: DOTALL makes . match newlines.

def test_dotall_flag(self):
    """Feature: DOTALL makes . match newlines."""
    text = 'hello\nworld'
    assert re.search('hello.world', text) is None
    assert re.search('hello.world', text, re.DOTALL) is not None

Verification: ✅ Tested in CI

Feature: VERBOSE allows whitespace and comments in pattern.

def test_verbose_flag(self):
    """Feature: VERBOSE allows whitespace and comments in pattern."""
    pattern = re.compile('\n            (\\d{3})  # Area code\n            -        # Separator\n            (\\d{4})  # Number\n        ', re.VERBOSE)
    match = pattern.search('555-1234')
    assert match.groups() == ('555', '1234')

Verification: ✅ Tested in CI

Feature: Multiple flags can be combined.

def test_combined_flags(self):
    """Feature: Multiple flags can be combined."""
    pattern = re.compile('hello.world', re.IGNORECASE | re.DOTALL)
    assert pattern.search('HELLO\nWORLD') is not None

Verification: ✅ Tested in CI

Feature: \d matches digits.

def test_digit_class(self):
    """Feature: \\d matches digits."""
    assert re.findall('\\d', 'abc123def') == ['1', '2', '3']

Verification: ✅ Tested in CI

Feature: \w matches word characters.

def test_word_class(self):
    """Feature: \\w matches word characters."""
    assert re.findall('\\w+', 'hello world_123') == ['hello', 'world_123']

Verification: ✅ Tested in CI

Feature: \s matches whitespace.

def test_whitespace_class(self):
    """Feature: \\s matches whitespace."""
    assert re.findall('\\S+', 'hello world') == ['hello', 'world']

Verification: ✅ Tested in CI

Feature: \D, \W, \S match non-digits, non-word, non-whitespace.

def test_negated_classes(self):
    """Feature: \\D, \\W, \\S match non-digits, non-word, non-whitespace."""
    assert re.findall('\\D+', 'abc123def') == ['abc', 'def']

Verification: ✅ Tested in CI

Feature: [abc] matches any of a, b, c.

def test_custom_character_class(self):
    """Feature: [abc] matches any of a, b, c."""
    assert re.findall('[aeiou]', 'hello world') == ['e', 'o', 'o']

Verification: ✅ Tested in CI

Feature: [a-z] matches range.

def test_range_character_class(self):
    """Feature: [a-z] matches range."""
    assert re.findall('[0-9]+', 'abc123def456') == ['123', '456']

Verification: ✅ Tested in CI

Feature: * matches 0 or more.

def test_star_quantifier(self):
    """Feature: * matches 0 or more."""
    assert re.findall('a*b', 'b ab aab aaab') == ['b', 'ab', 'aab', 'aaab']

Verification: ✅ Tested in CI

Feature: + matches 1 or more.

def test_plus_quantifier(self):
    """Feature: + matches 1 or more."""
    assert re.findall('a+b', 'b ab aab aaab') == ['ab', 'aab', 'aaab']

Verification: ✅ Tested in CI

Feature: ? matches 0 or 1.

def test_question_quantifier(self):
    """Feature: ? matches 0 or 1."""
    assert re.findall('a?b', 'b ab aab') == ['b', 'ab', 'ab']

Verification: ✅ Tested in CI

Feature: {n} matches exactly n.

def test_exact_quantifier(self):
    """Feature: {n} matches exactly n."""
    assert re.findall('a{3}', 'a aa aaa aaaa') == ['aaa', 'aaa']

Verification: ✅ Tested in CI

Feature: {m,n} matches m to n.

def test_range_quantifier(self):
    """Feature: {m,n} matches m to n."""
    assert re.findall('a{2,3}', 'a aa aaa aaaa') == ['aa', 'aaa', 'aaa']

Verification: ✅ Tested in CI

Property: Quantifiers are greedy by default.

def test_greedy_vs_non_greedy(self):
    """Property: Quantifiers are greedy by default."""
    assert re.findall('<.*>', '<a>text</a>') == ['<a>text</a>']
    assert re.findall('<.*?>', '<a>text</a>') == ['<a>', '</a>']

Verification: ✅ Tested in CI

Feature: ^ matches start of string.

def test_start_anchor(self):
    """Feature: ^ matches start of string."""
    assert re.match('^hello', 'hello world') is not None
    assert re.match('^world', 'hello world') is None

Verification: ✅ Tested in CI

Feature: $ matches end of string.

def test_end_anchor(self):
    """Feature: $ matches end of string."""
    assert re.search('world$', 'hello world') is not None
    assert re.search('hello$', 'hello world') is None

Verification: ✅ Tested in CI

Feature: \b matches word boundary.

def test_word_boundary(self):
    """Feature: \\b matches word boundary."""
    text = 'the cat in the hat'
    assert re.findall('\\bcat\\b', text) == ['cat']
    assert re.findall('\\bcat\\b', 'cats') == []

Verification: ✅ Tested in CI

Feature: \B matches non-word boundary.

def test_non_word_boundary(self):
    """Feature: \\B matches non-word boundary."""
    assert re.findall('\\Bcat', 'concatenate') == ['cat']

Verification: ✅ Tested in CI

Feature: (?=...) positive lookahead.

def test_positive_lookahead(self):
    """Feature: (?=...) positive lookahead."""
    assert re.search('hello(?= world)', 'hello world') is not None
    assert re.search('hello(?= world)', 'hello there') is None

Verification: ✅ Tested in CI

Feature: (?!...) negative lookahead.

def test_negative_lookahead(self):
    """Feature: (?!...) negative lookahead."""
    assert re.search('hello(?! world)', 'hello there') is not None
    assert re.search('hello(?! world)', 'hello world') is None

Verification: ✅ Tested in CI

Feature: (?<=...) positive lookbehind.

def test_positive_lookbehind(self):
    """Feature: (?<=...) positive lookbehind."""
    assert re.findall('(?<=\\$)\\d+', 'Price: $100') == ['100']

Verification: ✅ Tested in CI

Feature: (?<!...) negative lookbehind.

def test_negative_lookbehind(self):
    """Feature: (?<!...) negative lookbehind."""
    assert re.findall('(?<!\\$)\\d+', 'Item 5 costs $100') == ['5', '00']

Verification: ✅ Tested in CI

Edge: Empty string handling.

def test_empty_string(self):
    """Edge: Empty string handling."""
    assert re.search('.*', '') is not None
    assert re.findall('\\d+', '') == []

Verification: ✅ Tested in CI

Feature: re.escape() escapes special characters.

def test_escape_special_chars(self):
    """Feature: re.escape() escapes special characters."""
    text = 'cost: $5.00'
    assert re.search('$5.00', text) is None
    escaped = re.escape('$5.00')
    assert re.search(escaped, text) is not None

Verification: ✅ Tested in CI

Feature: Unicode patterns work correctly.

def test_unicode_support(self):
    """Feature: Unicode patterns work correctly."""
    text = 'Hello 世界 привет'
    assert re.findall('[^\\x00-\\x7F]+', text) == ['世界', 'привет']

Verification: ✅ Tested in CI

Edge: Some patterns can cause performance issues.

def test_catastrophic_backtracking(self):
    """Edge: Some patterns can cause performance issues."""
    text = 'a' * 20 + 'b'
    match = re.search('(a+)+b', text)
    assert match is not None

Verification: ✅ Tested in CI

Feature: Groups can be nested.

def test_nested_groups(self):
    """Feature: Groups can be nested."""
    pattern = '((\\w+)@(\\w+)\\.(\\w+))'
    match = re.search(pattern, 'alice@example.com')
    assert match.group(1) == 'alice@example.com'
    assert match.group(2) == 'alice'
    assert match.group(3) == 'example'
    assert match.group(4) == 'com'

Verification: ✅ Tested in CI

Feature: \1 references first captured group.

def test_backreference(self):
    """Feature: \\1 references first captured group."""
    pattern = '\\b(\\w+)\\s+\\1\\b'
    assert re.search(pattern, 'hello hello') is not None
    assert re.search(pattern, 'hello world') is None

Verification: ✅ Tested in CI