86 lines
3.3 KiB
Diff
86 lines
3.3 KiB
Diff
|
The Ply lexer, which doesn't seem too active a project, wraps regular
|
||
|
expressions from grammar definitions in its own regular expressions that name
|
||
|
groups. This breaks re.compile in Python >= 3.11 when the original expressions
|
||
|
contain global flags, because the compiler requires that global flags appear at
|
||
|
the start of the expression instead of inside the named group.
|
||
|
|
||
|
This patch wraps re.compile to scan the expression for any global flags and,
|
||
|
when found, moves them to the start of the expression.
|
||
|
|
||
|
--- a/third_party/python/ply/ply/lex.py
|
||
|
+++ b/third_party/python/ply/ply/lex.py
|
||
|
@@ -49,6 +49,37 @@
|
||
|
# Python 3.0
|
||
|
StringTypes = (str, bytes)
|
||
|
|
||
|
+
|
||
|
+def _re_compile(expression, *args, **kwargs):
|
||
|
+ '''
|
||
|
+ Rearrange global flags in the regular expression to appear at the
|
||
|
+ beginning, avoiding deprecation warnings on Python < 3.11 and hard
|
||
|
+ errors on Python >= 3.11.
|
||
|
+ '''
|
||
|
+ flags = set()
|
||
|
+ remainder = ''
|
||
|
+
|
||
|
+ pattern = re.compile(r'\(\?([aiLmsux]+)\)')
|
||
|
+ while m := pattern.search(expression):
|
||
|
+ # Location of the global flag spec
|
||
|
+ l, h = m.span(0)
|
||
|
+ # Accumulate global flags from this spec
|
||
|
+ flags.update(m.group(1))
|
||
|
+ # Capture all text leading up to the match
|
||
|
+ remainder += expression[:l]
|
||
|
+ # Trim to the end fo the flag spec
|
||
|
+ expression = expression[h:]
|
||
|
+
|
||
|
+ # Any remaining expression contains no flags
|
||
|
+ remainder += expression
|
||
|
+
|
||
|
+ # If there are flags, they belong at the beginning
|
||
|
+ if flags:
|
||
|
+ remainder = f'(?{"".join(sorted(flags))})' + remainder
|
||
|
+
|
||
|
+ return re.compile(remainder, *args, **kwargs)
|
||
|
+
|
||
|
+
|
||
|
# This regular expression is used to match valid token names
|
||
|
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
|
||
|
|
||
|
@@ -230,7 +261,7 @@
|
||
|
titem = []
|
||
|
txtitem = []
|
||
|
for pat, func_name in lre:
|
||
|
- titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict)))
|
||
|
+ titem.append((_re_compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict)))
|
||
|
|
||
|
self.lexstatere[statename] = titem
|
||
|
self.lexstateretext[statename] = txtitem
|
||
|
@@ -495,7 +526,7 @@
|
||
|
return []
|
||
|
regex = '|'.join(relist)
|
||
|
try:
|
||
|
- lexre = re.compile(regex, reflags)
|
||
|
+ lexre = _re_compile(regex, reflags)
|
||
|
|
||
|
# Build the index to function map for the matching engine
|
||
|
lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
|
||
|
@@ -758,7 +789,7 @@
|
||
|
continue
|
||
|
|
||
|
try:
|
||
|
- c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
|
||
|
+ c = _re_compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
|
||
|
if c.match(''):
|
||
|
self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__)
|
||
|
self.error = True
|
||
|
@@ -782,7 +813,7 @@
|
||
|
continue
|
||
|
|
||
|
try:
|
||
|
- c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
|
||
|
+ c = _re_compile('(?P<%s>%s)' % (name, r), self.reflags)
|
||
|
if (c.match('')):
|
||
|
self.log.error("Regular expression for rule '%s' matches empty string", name)
|
||
|
self.error = True
|