Skip to content

Commit 02b616c

Browse files
committed
Fix f/t-string edge cases
1 parent 670d177 commit 02b616c

File tree

5 files changed

+319
-170
lines changed

5 files changed

+319
-170
lines changed

src/python_minifier/f_string.py

Lines changed: 80 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def complete_debug_specifier(self, partial_specifier_candidates, value_node):
6262
def _generate_candidates_with_processor(self, prefix, str_processor):
6363
"""Generate f-string candidates using the given prefix and string processor function."""
6464
candidates = []
65-
65+
6666
for quote in self.allowed_quotes:
6767
quote_candidates = ['']
6868
debug_specifier_candidates = []
@@ -99,7 +99,7 @@ def _generate_candidates_with_processor(self, prefix, str_processor):
9999
raise RuntimeError('Unexpected JoinedStr value')
100100

101101
candidates += [prefix + quote + x + quote for x in quote_candidates]
102-
102+
103103
return candidates
104104

105105
def candidates(self):
@@ -134,7 +134,33 @@ def _contains_literal_backslashes(self):
134134

135135

136136
def str_for(self, s, quote):
137-
return s.replace('{', '{{').replace('}', '}}')
137+
# Escape null bytes and other characters that can't appear in Python source
138+
escaped = ''
139+
is_multiline = len(quote) == 3 # Triple-quoted strings
140+
141+
for c in s:
142+
if c == '\0':
143+
escaped += '\\x00'
144+
elif c == '\n' and not is_multiline:
145+
# Only escape newlines in single-quoted strings
146+
escaped += '\\n'
147+
elif c == '\r':
148+
# Always escape carriage returns because Python normalizes them during parsing
149+
# This prevents semantic changes (\\r -> \\n) in multiline strings
150+
escaped += '\\r'
151+
elif c == '\t':
152+
# Always escape tabs for consistency (though not strictly necessary in multiline)
153+
escaped += '\\t'
154+
elif c == '{':
155+
escaped += '{{'
156+
elif c == '}':
157+
escaped += '}}'
158+
elif ord(c) < 32 and c not in '\n\r\t':
159+
# Escape other control characters
160+
escaped += f'\\x{ord(c):02x}'
161+
else:
162+
escaped += c
163+
return escaped
138164

139165

140166
class OuterFString(FString):
@@ -316,7 +342,9 @@ def _literals(self):
316342
if literal == '':
317343
literal += self.current_quote
318344

319-
if c == '\n':
345+
if c == '\0':
346+
literal += '\\x00'
347+
elif c == '\n':
320348
literal += '\\n'
321349
elif c == '\r':
322350
literal += '\\r'
@@ -333,7 +361,7 @@ def __str__(self):
333361
if self._s == '':
334362
return str(min(self.allowed_quotes, key=len)) * 2
335363

336-
if '\0' in self._s or ('\\' in self._s and not self.pep701):
364+
if '\\' in self._s and not self.pep701:
337365
raise ValueError('Impossible to represent a character in f-string expression part')
338366

339367
if not self.pep701 and ('\n' in self._s or '\r' in self._s):
@@ -391,14 +419,35 @@ def candidates(self):
391419
return candidates
392420

393421
def str_for(self, s):
394-
# For Python 3.12+ raw f-string regression (fixed in 3.14rc2), we need to escape backslashes
395-
# in format specs so they round-trip correctly
396-
if (3, 12) <= sys.version_info < (3, 14) and '\\' in s:
397-
# In Python 3.12-3.13, format specs need backslashes escaped
398-
escaped = s.replace('\\', '\\\\')
399-
else:
400-
escaped = s
401-
return escaped.replace('{', '{{').replace('}', '}}')
422+
# Special handling for problematic format spec characters that can cause parsing issues
423+
# If the format spec contains only braces, it's likely an invalid test case
424+
425+
# Escape null bytes and other unprintable characters
426+
escaped = ''
427+
for c in s:
428+
if c == '\0':
429+
escaped += '\\x00'
430+
elif c == '{':
431+
escaped += '{{'
432+
elif c == '}':
433+
escaped += '}}'
434+
elif c == '\\':
435+
# For Python 3.12+ raw f-string regression (fixed in 3.14rc2), we need to escape backslashes
436+
# in format specs so they round-trip correctly
437+
if (3, 12) <= sys.version_info < (3, 14):
438+
escaped += '\\\\'
439+
else:
440+
escaped += c
441+
elif c == '\r':
442+
# Always escape carriage returns because Python normalizes them to newlines during parsing
443+
# This prevents AST mismatches (\r -> \n normalization)
444+
escaped += '\\r'
445+
elif ord(c) < 32 and c not in '\t\n':
446+
# Escape other control characters except tab, newline
447+
escaped += f'\\x{ord(c):02x}'
448+
else:
449+
escaped += c
450+
return escaped
402451

403452

404453
class Bytes(object):
@@ -449,7 +498,24 @@ def _literals(self):
449498

450499
if literal == '':
451500
literal = 'b' + self.current_quote
452-
literal += chr(b)
501+
502+
# Handle special characters that need escaping
503+
if b == 0: # null byte
504+
literal += '\\x00'
505+
elif b == ord('\\'): # backslash
506+
literal += '\\\\'
507+
elif b == ord('\n'): # newline
508+
literal += '\\n'
509+
elif b == ord('\r'): # carriage return
510+
literal += '\\r'
511+
elif b == ord('\t'): # tab
512+
literal += '\\t'
513+
elif len(self.current_quote) == 1 and b == ord(self.current_quote): # single quote character
514+
literal += '\\' + self.current_quote
515+
elif 32 <= b <= 126: # printable ASCII
516+
literal += chr(b)
517+
else: # other non-printable characters
518+
literal += f'\\x{b:02x}'
453519

454520
if literal:
455521
literal += self.current_quote
@@ -459,8 +525,6 @@ def __str__(self):
459525
if self._b == b'':
460526
return 'b' + str(min(self.allowed_quotes, key=len)) * 2
461527

462-
if b'\0' in self._b or b'\\' in self._b:
463-
raise ValueError('Impossible to represent a %r character in f-string expression part')
464528

465529
if b'\n' in self._b or b'\r' in self._b:
466530
if '"""' not in self.allowed_quotes and "'''" not in self.allowed_quotes:

src/python_minifier/t_string.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -71,27 +71,40 @@ def candidates(self):
7171
"""Generate all possible representations"""
7272
actual_candidates = []
7373

74+
# Normal t-string candidates
75+
actual_candidates.extend(self._generate_candidates_with_processor('t', self.str_for))
76+
77+
# Raw t-string candidates (if we detect backslashes)
78+
if self._contains_literal_backslashes():
79+
actual_candidates.extend(self._generate_candidates_with_processor('rt', self.raw_str_for))
80+
81+
return filter(self.is_correct_ast, actual_candidates)
82+
83+
def _generate_candidates_with_processor(self, prefix, str_processor):
84+
"""Generate t-string candidates using the given prefix and string processor function."""
85+
candidates = []
86+
7487
for quote in self.allowed_quotes:
75-
candidates = ['']
88+
quote_candidates = ['']
7689
debug_specifier_candidates = []
7790

7891
for v in self.node.values:
7992
if is_constant_node(v, ast.Constant) and isinstance(v.value, str):
8093
# String literal part - check for debug specifiers
8194

8295
# Could this be used as a debug specifier?
83-
if len(candidates) < 10:
96+
if len(quote_candidates) < 10:
8497
import re
8598
debug_specifier = re.match(r'.*=\s*$', v.value)
8699
if debug_specifier:
87100
# Maybe! Save for potential debug specifier completion
88101
try:
89-
debug_specifier_candidates = [x + '{' + v.value for x in candidates]
102+
debug_specifier_candidates = [x + '{' + v.value for x in quote_candidates]
90103
except Exception:
91104
continue
92105

93106
try:
94-
candidates = [x + self.str_for(v.value, quote) for x in candidates]
107+
quote_candidates = [x + str_processor(v.value, quote) for x in quote_candidates]
95108
except Exception:
96109
continue
97110

@@ -103,17 +116,17 @@ def candidates(self):
103116

104117
# Regular interpolation processing
105118
interpolation_candidates = InterpolationValue(v).get_candidates()
106-
candidates = [x + y for x in candidates for y in interpolation_candidates] + completed
119+
quote_candidates = [x + y for x in quote_candidates for y in interpolation_candidates] + completed
107120

108121
debug_specifier_candidates = []
109122
except Exception:
110123
continue
111124
else:
112125
raise RuntimeError('Unexpected TemplateStr value: %r' % v)
113126

114-
actual_candidates.extend(['t' + quote + x + quote for x in candidates])
127+
candidates.extend([prefix + quote + x + quote for x in quote_candidates])
115128

116-
return filter(self.is_correct_ast, actual_candidates)
129+
return candidates
117130

118131
def str_for(self, s, quote):
119132
"""Convert string literal to properly escaped form"""
@@ -125,6 +138,24 @@ def str_for(self, s, quote):
125138
return '\\\n'
126139
return mini_s
127140

141+
def raw_str_for(self, s):
142+
"""
143+
Generate string representation for raw t-strings.
144+
Don't escape backslashes like MiniString does.
145+
"""
146+
return s.replace('{', '{{').replace('}', '}}')
147+
148+
def _contains_literal_backslashes(self):
149+
"""
150+
Check if this t-string contains literal backslashes in constant values.
151+
This indicates it may need to be a raw t-string.
152+
"""
153+
for node in ast.walk(self.node):
154+
if is_constant_node(node, ast.Str):
155+
if '\\' in node.s:
156+
return True
157+
return False
158+
128159
def __str__(self):
129160
"""Generate the shortest valid t-string representation"""
130161
if len(self.node.values) == 0:
@@ -195,20 +226,27 @@ def get_candidates(self):
195226
format_candidates = python_minifier.f_string.OuterFString(
196227
self.node.format_spec, pep701=True
197228
).candidates()
198-
# Remove the f prefix and quotes to get just the format part
229+
# Remove the f/rf prefix and quotes to get just the format part
199230
format_parts = []
200231
for fmt in format_candidates:
201-
if fmt.startswith('f'):
232+
# Handle both f"..." and rf"..." patterns
233+
if fmt.startswith('rf'):
234+
# Remove rf prefix and outer quotes
235+
inner = fmt[2:]
236+
elif fmt.startswith('f'):
202237
# Remove f prefix and outer quotes
203238
inner = fmt[1:]
204-
if (inner.startswith('"') and inner.endswith('"')) or \
205-
(inner.startswith("'") and inner.endswith("'")):
206-
format_parts.append(inner[1:-1])
207-
elif (inner.startswith('"""') and inner.endswith('"""')) or \
208-
(inner.startswith("'''") and inner.endswith("'''")):
209-
format_parts.append(inner[3:-3])
210-
else:
211-
format_parts.append(inner)
239+
else:
240+
continue
241+
242+
if (inner.startswith('"') and inner.endswith('"')) or \
243+
(inner.startswith("'") and inner.endswith("'")):
244+
format_parts.append(inner[1:-1])
245+
elif (inner.startswith('"""') and inner.endswith('"""')) or \
246+
(inner.startswith("'''") and inner.endswith("'''")):
247+
format_parts.append(inner[3:-3])
248+
else:
249+
format_parts.append(inner)
212250

213251
if format_parts:
214252
self._append(format_parts)

0 commit comments

Comments
 (0)