Skip to content

Commit 1098afb

Browse files
authored
Merge pull request #106 from wnienhaus/fix-int-parsing-with-base-prefix
Fix parsing of integer literals with base prefix
2 parents 407a790 + da5d928 commit 1098afb

File tree

7 files changed

+99
-16
lines changed

7 files changed

+99
-16
lines changed

esp32_ulp/assemble.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,13 @@ def fill(self, section, amount, fill_byte):
219219
raise ValueError('fill in bss section not allowed')
220220
if section is TEXT: # TODO: text section should be filled with NOPs
221221
raise ValueError('fill/skip/align in text section not supported')
222-
fill = int(fill_byte or 0).to_bytes(1, 'little') * amount
222+
fill = int(self.opcodes.eval_arg(str(fill_byte or 0))).to_bytes(1, 'little') * amount
223223
self.offsets[section] += len(fill)
224224
if section is not BSS:
225225
self.sections[section].append(fill)
226226

227227
def d_skip(self, amount, fill=None):
228-
amount = int(amount)
228+
amount = int(self.opcodes.eval_arg(amount))
229229
self.fill(self.section, amount, fill)
230230

231231
d_space = d_skip
@@ -246,7 +246,7 @@ def d_global(self, symbol):
246246
self.symbols.set_global(symbol)
247247

248248
def append_data(self, wordlen, args):
249-
data = [int(arg).to_bytes(wordlen, 'little') for arg in args]
249+
data = [int(self.opcodes.eval_arg(arg)).to_bytes(wordlen, 'little') for arg in args]
250250
self.append_section(b''.join(data))
251251

252252
def d_byte(self, *args):

esp32_ulp/opcodes.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
1414

1515
from .soc import *
16-
from .util import split_tokens, validate_expression
16+
from .util import split_tokens, validate_expression, parse_int
1717

1818
# XXX dirty hack: use a global for the symbol table
1919
symbols = None
@@ -285,7 +285,12 @@ def eval_arg(arg):
285285
_, _, sym_value = symbols.get_sym(token)
286286
parts.append(str(sym_value))
287287
else:
288-
parts.append(token)
288+
try:
289+
# attempt to parse, to convert numbers with base prefix correctly
290+
int_token = parse_int(token)
291+
parts.append(str(int_token))
292+
except ValueError:
293+
parts.append(token)
289294
parts = "".join(parts)
290295
if not validate_expression(parts):
291296
raise ValueError('Unsupported expression: %s' % parts)
@@ -311,7 +316,7 @@ def arg_qualify(arg):
311316
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
312317
return ARG(COND, arg_lower, arg)
313318
try:
314-
return ARG(IMM, int(arg), arg)
319+
return ARG(IMM, parse_int(arg), arg)
315320
except ValueError:
316321
pass
317322
try:

esp32_ulp/opcodes_s2.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ucollections import namedtuple
1313
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
1414

15-
from .util import split_tokens, validate_expression
15+
from .util import split_tokens, validate_expression, parse_int
1616

1717
# XXX dirty hack: use a global for the symbol table
1818
symbols = None
@@ -301,7 +301,12 @@ def eval_arg(arg):
301301
_, _, sym_value = symbols.get_sym(token)
302302
parts.append(str(sym_value))
303303
else:
304-
parts.append(token)
304+
try:
305+
# attempt to parse, to convert numbers with base prefix correctly
306+
int_token = parse_int(token)
307+
parts.append(str(int_token))
308+
except ValueError:
309+
parts.append(token)
305310
parts = "".join(parts)
306311
if not validate_expression(parts):
307312
raise ValueError('Unsupported expression: %s' % parts)
@@ -327,7 +332,7 @@ def arg_qualify(arg):
327332
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
328333
return ARG(COND, arg_lower, arg)
329334
try:
330-
return ARG(IMM, int(arg), arg)
335+
return ARG(IMM, parse_int(arg), arg)
331336
except ValueError:
332337
pass
333338
try:

esp32_ulp/util.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,18 @@ def validate_expression(param):
7777
return True
7878

7979

80+
def parse_int(literal):
81+
"""
82+
GNU as compatible parsing of string literals into integers
83+
Specifically, GNU as treats literals starting with 0 as octal
84+
All other literals are correctly parsed by Python
85+
See: https://sourceware.org/binutils/docs/as/Integers.html
86+
"""
87+
if len(literal) >= 2 and (literal.startswith("0") or literal.startswith("-0")) and literal.lstrip("-0").isdigit():
88+
return int(literal, 8)
89+
return int(literal, 0)
90+
91+
8092
def file_exists(filename):
8193
try:
8294
os.stat(filename)

tests/opcodes.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
99
from esp32_ulp.opcodes import make_ins, make_ins_struct_def
10-
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
10+
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
1111
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
1212
import esp32_ulp.opcodes as opcodes
1313

@@ -46,6 +46,7 @@ def test_arg_qualify():
4646
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
4747
assert arg_qualify('1') == ARG(IMM, 1, '1')
4848
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
49+
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
4950
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
5051
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
5152
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
@@ -96,6 +97,11 @@ def test_eval_arg():
9697
assert eval_arg('const >> 1') == 21
9798
assert eval_arg('(const|4)&0xf') == 0xe
9899

100+
assert eval_arg('0x7') == 7
101+
assert eval_arg('010') == 8
102+
assert eval_arg('-0x7') == -7 # negative
103+
assert eval_arg('~0x7') == -8 # complement
104+
99105
assert_raises(ValueError, eval_arg, 'evil()')
100106
assert_raises(ValueError, eval_arg, 'def cafe()')
101107
assert_raises(ValueError, eval_arg, '1 ^ 2')
@@ -105,14 +111,17 @@ def test_eval_arg():
105111
opcodes.symbols = None
106112

107113

108-
def assert_raises(exception, func, *args):
114+
def assert_raises(exception, func, *args, message=None):
109115
try:
110116
func(*args)
111-
except exception:
117+
except exception as e:
112118
raised = True
119+
actual_message = e.args[0]
113120
else:
114121
raised = False
115122
assert raised
123+
if message:
124+
assert actual_message == message, '%s == %s' % (actual_message, message)
116125

117126

118127
def test_reg_direct_ulp_addressing():

tests/opcodes_s2.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
99
from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def
10-
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
10+
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
1111
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
1212
import esp32_ulp.opcodes_s2 as opcodes
1313

@@ -46,6 +46,7 @@ def test_arg_qualify():
4646
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
4747
assert arg_qualify('1') == ARG(IMM, 1, '1')
4848
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
49+
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
4950
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
5051
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
5152
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
@@ -96,6 +97,11 @@ def test_eval_arg():
9697
assert eval_arg('const >> 1') == 21
9798
assert eval_arg('(const|4)&0xf') == 0xe
9899

100+
assert eval_arg('0x7') == 7
101+
assert eval_arg('010') == 8
102+
assert eval_arg('-0x7') == -7 # negative
103+
assert eval_arg('~0x7') == -8 # complement
104+
99105
assert_raises(ValueError, eval_arg, 'evil()')
100106
assert_raises(ValueError, eval_arg, 'def cafe()')
101107
assert_raises(ValueError, eval_arg, '1 ^ 2')
@@ -105,14 +111,17 @@ def test_eval_arg():
105111
opcodes.symbols = None
106112

107113

108-
def assert_raises(exception, func, *args):
114+
def assert_raises(exception, func, *args, message=None):
109115
try:
110116
func(*args)
111-
except exception:
117+
except exception as e:
112118
raised = True
119+
actual_message = e.args[0]
113120
else:
114121
raised = False
115122
assert raised
123+
if message:
124+
assert actual_message == message, '%s == %s' % (actual_message, message)
116125

117126

118127
def test_reg_direct_ulp_addressing():

tests/util.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# SPDX-License-Identifier: MIT
77

88
import os
9-
from esp32_ulp.util import split_tokens, validate_expression, file_exists
9+
from esp32_ulp.util import split_tokens, validate_expression, parse_int, file_exists
1010

1111
tests = []
1212

@@ -18,6 +18,19 @@ def test(param):
1818
tests.append(param)
1919

2020

21+
def assert_raises(exception, func, *args, message=None):
22+
try:
23+
func(*args)
24+
except exception as e:
25+
raised = True
26+
actual_message = e.args[0]
27+
else:
28+
raised = False
29+
assert raised
30+
if message:
31+
assert actual_message == message, '%s == %s' % (actual_message, message)
32+
33+
2134
@test
2235
def test_split_tokens():
2336
assert split_tokens("") == []
@@ -69,6 +82,36 @@ def test_validate_expression():
6982
assert validate_expression('def CAFE()') is False
7083

7184

85+
@test
86+
def test_parse_int():
87+
# decimal
88+
assert parse_int("0") == 0, "0 == 0"
89+
assert parse_int("5") == 5, "5 == 5"
90+
assert parse_int("-0") == 0, "-0 == 0"
91+
assert parse_int("-5") == -5, "-5 == -5"
92+
# hex
93+
assert parse_int("0x5") == 5, "0x5 == 5"
94+
assert parse_int("0x5a") == 90, "0x5a == 90"
95+
assert parse_int("-0x5a") == -90, "-0x5a == -90"
96+
# binary
97+
assert parse_int("0b1001") == 9, "0b1001 == 9"
98+
assert parse_int("-0b1001") == -9, "-0b1001 == 9"
99+
# octal
100+
assert parse_int("07") == 7, "07 == 7"
101+
assert parse_int("0100") == 64, "0100 == 64"
102+
assert parse_int("0o210") == 136, "0o210 == 136"
103+
assert parse_int("00000010") == 8, "00000010 == 8"
104+
assert parse_int("-07") == -7, "-07 == -7"
105+
assert parse_int("-0100") == -64, "-0100 == -64"
106+
assert parse_int("-0o210") == -136, "-0o210 == -136"
107+
assert parse_int("-00000010") == -8, "-00000010 == -8"
108+
# negative cases
109+
assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'")
110+
assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'")
111+
assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'")
112+
assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'")
113+
114+
72115
@test
73116
def test_file_exists():
74117
testfile = '.testfile'

0 commit comments

Comments
 (0)