Skip to content

Commit da5d928

Browse files
committed
Fix parsing of integer literals with base prefix
MicroPython 1.25.0 introduced a breaking change, aligning the behaviour of the int() function with the behaviour of CPython (assume a decimal number, unless a base is specified. Only if a base of 0 is specified will the base be inferred from the string). This commit implements a new custom parsing function `parse_int`. It can correctly parse the following string literals: * 0x[0-9]+ -> treated as hex * 0b[0-9]+ -> treated as binary * 0o[0-9]+ -> treated as octal (Python style) * 0[0-9]+ -> treated as octal (GNU as style) * anything else parsed as decimal It only handles the GNU as style octal case directly, letting the original `int()` function handle the other cases (using base 0). In fact, the GNU as octal case was not handled correctly previously, and this commit fixes that. Some new tests for previous functionality were added to show that both new and previous cases are being handled correctly. Note: GNU as does not actually accept the octal prefix 0o..., but we accept it as a convenience, as this is accepted in Python code. This means however, that our assembler accepts code which GNU as does not accept. But the other way around, we still accept all code that GNU as accepts, which was one of our goals.
1 parent 407a790 commit da5d928

File tree

7 files changed

+99
-16
lines changed

7 files changed

+99
-16
lines changed

esp32_ulp/assemble.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,13 @@ def fill(self, section, amount, fill_byte):
219219
raise ValueError('fill in bss section not allowed')
220220
if section is TEXT: # TODO: text section should be filled with NOPs
221221
raise ValueError('fill/skip/align in text section not supported')
222-
fill = int(fill_byte or 0).to_bytes(1, 'little') * amount
222+
fill = int(self.opcodes.eval_arg(str(fill_byte or 0))).to_bytes(1, 'little') * amount
223223
self.offsets[section] += len(fill)
224224
if section is not BSS:
225225
self.sections[section].append(fill)
226226

227227
def d_skip(self, amount, fill=None):
228-
amount = int(amount)
228+
amount = int(self.opcodes.eval_arg(amount))
229229
self.fill(self.section, amount, fill)
230230

231231
d_space = d_skip
@@ -246,7 +246,7 @@ def d_global(self, symbol):
246246
self.symbols.set_global(symbol)
247247

248248
def append_data(self, wordlen, args):
249-
data = [int(arg).to_bytes(wordlen, 'little') for arg in args]
249+
data = [int(self.opcodes.eval_arg(arg)).to_bytes(wordlen, 'little') for arg in args]
250250
self.append_section(b''.join(data))
251251

252252
def d_byte(self, *args):

esp32_ulp/opcodes.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
1414

1515
from .soc import *
16-
from .util import split_tokens, validate_expression
16+
from .util import split_tokens, validate_expression, parse_int
1717

1818
# XXX dirty hack: use a global for the symbol table
1919
symbols = None
@@ -285,7 +285,12 @@ def eval_arg(arg):
285285
_, _, sym_value = symbols.get_sym(token)
286286
parts.append(str(sym_value))
287287
else:
288-
parts.append(token)
288+
try:
289+
# attempt to parse, to convert numbers with base prefix correctly
290+
int_token = parse_int(token)
291+
parts.append(str(int_token))
292+
except ValueError:
293+
parts.append(token)
289294
parts = "".join(parts)
290295
if not validate_expression(parts):
291296
raise ValueError('Unsupported expression: %s' % parts)
@@ -311,7 +316,7 @@ def arg_qualify(arg):
311316
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
312317
return ARG(COND, arg_lower, arg)
313318
try:
314-
return ARG(IMM, int(arg), arg)
319+
return ARG(IMM, parse_int(arg), arg)
315320
except ValueError:
316321
pass
317322
try:

esp32_ulp/opcodes_s2.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ucollections import namedtuple
1313
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
1414

15-
from .util import split_tokens, validate_expression
15+
from .util import split_tokens, validate_expression, parse_int
1616

1717
# XXX dirty hack: use a global for the symbol table
1818
symbols = None
@@ -301,7 +301,12 @@ def eval_arg(arg):
301301
_, _, sym_value = symbols.get_sym(token)
302302
parts.append(str(sym_value))
303303
else:
304-
parts.append(token)
304+
try:
305+
# attempt to parse, to convert numbers with base prefix correctly
306+
int_token = parse_int(token)
307+
parts.append(str(int_token))
308+
except ValueError:
309+
parts.append(token)
305310
parts = "".join(parts)
306311
if not validate_expression(parts):
307312
raise ValueError('Unsupported expression: %s' % parts)
@@ -327,7 +332,7 @@ def arg_qualify(arg):
327332
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
328333
return ARG(COND, arg_lower, arg)
329334
try:
330-
return ARG(IMM, int(arg), arg)
335+
return ARG(IMM, parse_int(arg), arg)
331336
except ValueError:
332337
pass
333338
try:

esp32_ulp/util.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,18 @@ def validate_expression(param):
7777
return True
7878

7979

80+
def parse_int(literal):
81+
"""
82+
GNU as compatible parsing of string literals into integers
83+
Specifically, GNU as treats literals starting with 0 as octal
84+
All other literals are correctly parsed by Python
85+
See: https://sourceware.org/binutils/docs/as/Integers.html
86+
"""
87+
if len(literal) >= 2 and (literal.startswith("0") or literal.startswith("-0")) and literal.lstrip("-0").isdigit():
88+
return int(literal, 8)
89+
return int(literal, 0)
90+
91+
8092
def file_exists(filename):
8193
try:
8294
os.stat(filename)

tests/opcodes.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
99
from esp32_ulp.opcodes import make_ins, make_ins_struct_def
10-
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
10+
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
1111
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
1212
import esp32_ulp.opcodes as opcodes
1313

@@ -46,6 +46,7 @@ def test_arg_qualify():
4646
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
4747
assert arg_qualify('1') == ARG(IMM, 1, '1')
4848
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
49+
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
4950
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
5051
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
5152
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
@@ -96,6 +97,11 @@ def test_eval_arg():
9697
assert eval_arg('const >> 1') == 21
9798
assert eval_arg('(const|4)&0xf') == 0xe
9899

100+
assert eval_arg('0x7') == 7
101+
assert eval_arg('010') == 8
102+
assert eval_arg('-0x7') == -7 # negative
103+
assert eval_arg('~0x7') == -8 # complement
104+
99105
assert_raises(ValueError, eval_arg, 'evil()')
100106
assert_raises(ValueError, eval_arg, 'def cafe()')
101107
assert_raises(ValueError, eval_arg, '1 ^ 2')
@@ -105,14 +111,17 @@ def test_eval_arg():
105111
opcodes.symbols = None
106112

107113

108-
def assert_raises(exception, func, *args):
114+
def assert_raises(exception, func, *args, message=None):
109115
try:
110116
func(*args)
111-
except exception:
117+
except exception as e:
112118
raised = True
119+
actual_message = e.args[0]
113120
else:
114121
raised = False
115122
assert raised
123+
if message:
124+
assert actual_message == message, '%s == %s' % (actual_message, message)
116125

117126

118127
def test_reg_direct_ulp_addressing():

tests/opcodes_s2.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
99
from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def
10-
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
10+
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
1111
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
1212
import esp32_ulp.opcodes_s2 as opcodes
1313

@@ -46,6 +46,7 @@ def test_arg_qualify():
4646
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
4747
assert arg_qualify('1') == ARG(IMM, 1, '1')
4848
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
49+
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
4950
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
5051
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
5152
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
@@ -96,6 +97,11 @@ def test_eval_arg():
9697
assert eval_arg('const >> 1') == 21
9798
assert eval_arg('(const|4)&0xf') == 0xe
9899

100+
assert eval_arg('0x7') == 7
101+
assert eval_arg('010') == 8
102+
assert eval_arg('-0x7') == -7 # negative
103+
assert eval_arg('~0x7') == -8 # complement
104+
99105
assert_raises(ValueError, eval_arg, 'evil()')
100106
assert_raises(ValueError, eval_arg, 'def cafe()')
101107
assert_raises(ValueError, eval_arg, '1 ^ 2')
@@ -105,14 +111,17 @@ def test_eval_arg():
105111
opcodes.symbols = None
106112

107113

108-
def assert_raises(exception, func, *args):
114+
def assert_raises(exception, func, *args, message=None):
109115
try:
110116
func(*args)
111-
except exception:
117+
except exception as e:
112118
raised = True
119+
actual_message = e.args[0]
113120
else:
114121
raised = False
115122
assert raised
123+
if message:
124+
assert actual_message == message, '%s == %s' % (actual_message, message)
116125

117126

118127
def test_reg_direct_ulp_addressing():

tests/util.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# SPDX-License-Identifier: MIT
77

88
import os
9-
from esp32_ulp.util import split_tokens, validate_expression, file_exists
9+
from esp32_ulp.util import split_tokens, validate_expression, parse_int, file_exists
1010

1111
tests = []
1212

@@ -18,6 +18,19 @@ def test(param):
1818
tests.append(param)
1919

2020

21+
def assert_raises(exception, func, *args, message=None):
22+
try:
23+
func(*args)
24+
except exception as e:
25+
raised = True
26+
actual_message = e.args[0]
27+
else:
28+
raised = False
29+
assert raised
30+
if message:
31+
assert actual_message == message, '%s == %s' % (actual_message, message)
32+
33+
2134
@test
2235
def test_split_tokens():
2336
assert split_tokens("") == []
@@ -69,6 +82,36 @@ def test_validate_expression():
6982
assert validate_expression('def CAFE()') is False
7083

7184

85+
@test
86+
def test_parse_int():
87+
# decimal
88+
assert parse_int("0") == 0, "0 == 0"
89+
assert parse_int("5") == 5, "5 == 5"
90+
assert parse_int("-0") == 0, "-0 == 0"
91+
assert parse_int("-5") == -5, "-5 == -5"
92+
# hex
93+
assert parse_int("0x5") == 5, "0x5 == 5"
94+
assert parse_int("0x5a") == 90, "0x5a == 90"
95+
assert parse_int("-0x5a") == -90, "-0x5a == -90"
96+
# binary
97+
assert parse_int("0b1001") == 9, "0b1001 == 9"
98+
assert parse_int("-0b1001") == -9, "-0b1001 == 9"
99+
# octal
100+
assert parse_int("07") == 7, "07 == 7"
101+
assert parse_int("0100") == 64, "0100 == 64"
102+
assert parse_int("0o210") == 136, "0o210 == 136"
103+
assert parse_int("00000010") == 8, "00000010 == 8"
104+
assert parse_int("-07") == -7, "-07 == -7"
105+
assert parse_int("-0100") == -64, "-0100 == -64"
106+
assert parse_int("-0o210") == -136, "-0o210 == -136"
107+
assert parse_int("-00000010") == -8, "-00000010 == -8"
108+
# negative cases
109+
assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'")
110+
assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'")
111+
assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'")
112+
assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'")
113+
114+
72115
@test
73116
def test_file_exists():
74117
testfile = '.testfile'

0 commit comments

Comments
 (0)