Skip to content

Commit e5266fc

Browse files
[3.14] gh-140797: Forbid capturing groups in re.Scanner lexicon patterns (GH-140944) (GH-140982)
(cherry picked from commit fa9c3ee) Co-authored-by: Abhishek Tiwari <[email protected]>
1 parent 331b4b8 commit e5266fc

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

Lib/re/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,9 +399,12 @@ def __init__(self, lexicon, flags=0):
399399
s = _parser.State()
400400
s.flags = flags
401401
for phrase, action in lexicon:
402+
sub_pattern = _parser.parse(phrase, flags)
403+
if sub_pattern.state.groups != 1:
404+
raise ValueError("Cannot use capturing groups in re.Scanner")
402405
gid = s.opengroup()
403406
p.append(_parser.SubPattern(s, [
404-
(SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
407+
(SUBPATTERN, (gid, 0, 0, sub_pattern)),
405408
]))
406409
s.closegroup(gid, p[-1])
407410
p = _parser.SubPattern(s, [(BRANCH, (None, p))])

Lib/test/test_re.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,24 @@ def s_int(scanner, token): return int(token)
16391639
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
16401640
'op+', 'bar'], ''))
16411641

1642+
def test_bug_gh140797(self):
1643+
# gh140797: Capturing groups are not allowed in re.Scanner
1644+
1645+
msg = r"Cannot use capturing groups in re\.Scanner"
1646+
# Capturing group throws an error
1647+
with self.assertRaisesRegex(ValueError, msg):
1648+
Scanner([("(a)b", None)])
1649+
1650+
# Named Group
1651+
with self.assertRaisesRegex(ValueError, msg):
1652+
Scanner([("(?P<name>a)", None)])
1653+
1654+
# Non-capturing groups should pass normally
1655+
s = Scanner([("(?:a)b", lambda scanner, token: token)])
1656+
result, rem = s.scan("ab")
1657+
self.assertEqual(result,['ab'])
1658+
self.assertEqual(rem,'')
1659+
16421660
def test_bug_448951(self):
16431661
# bug 448951 (similar to 429357, but with single char match)
16441662
# (Also test greedy matches.)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could
2+
previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead.

0 commit comments

Comments
 (0)