Skip to content

Commit 4899c2e

Browse files
committed
Enhance _parse_simple_number() to handle more cases
1 parent 1182135 commit 4899c2e

File tree

2 files changed

+39
-11
lines changed

2 files changed

+39
-11
lines changed

Lib/test/test_tomllib/test_misc.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,26 @@ def test_parse_simple_number(self):
132132
parse_simple_number = tomllib._parser._parse_simple_number
133133
self.assertEqual(parse_simple_number("123", 0), (3, 123))
134134
self.assertEqual(parse_simple_number("123\n", 0), (3, 123))
135+
self.assertEqual(parse_simple_number("123 456", 0), (3, 123))
136+
self.assertEqual(parse_simple_number("+123\n", 0), (4, 123))
137+
self.assertEqual(parse_simple_number("-123\n", 0), (4, -123))
135138
self.assertEqual(parse_simple_number("0\n", 0), (1, 0))
136-
139+
self.assertEqual(parse_simple_number("+0\n", 0), (2, 0))
140+
self.assertEqual(parse_simple_number("-0\n", 0), (2, 0))
141+
self.assertEqual(parse_simple_number("[23]\n", 1), (3, 23))
142+
self.assertEqual(parse_simple_number("[23, 24]\n", 1), (3, 23))
143+
self.assertEqual(parse_simple_number("[23]\n", 1), (3, 23))
144+
self.assertEqual(parse_simple_number("{x = 42}\n", 5), (7, 42))
145+
146+
self.assertIsNone(parse_simple_number("+", 0), None)
147+
self.assertIsNone(parse_simple_number("-", 0), None)
148+
self.assertIsNone(parse_simple_number("+\n", 0), None)
149+
self.assertIsNone(parse_simple_number("-\n", 0), None)
150+
self.assertIsNone(parse_simple_number("+inf\n", 0), None)
151+
self.assertIsNone(parse_simple_number("-nan\n", 0), None)
137152
self.assertIsNone(parse_simple_number("0123\n", 0))
138-
self.assertIsNone(parse_simple_number("123-456\n", 0))
139-
self.assertIsNone(parse_simple_number("123:456\n", 0))
153+
self.assertIsNone(parse_simple_number("1979-05-27\n", 0))
154+
self.assertIsNone(parse_simple_number("12:32:00\n", 0))
140155
self.assertIsNone(parse_simple_number("1.0\n", 0))
141156
self.assertIsNone(parse_simple_number("1_000\n", 0))
142157
self.assertIsNone(parse_simple_number("x123\n", 0))

Lib/tomllib/_parser.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
4545
HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
4646
_DECDIGIT_CHARS: Final = frozenset("0123456789")
47+
_NUMBER_INITIAL_CHARS: Final = _DECDIGIT_CHARS | frozenset("+-")
48+
_NUMBER_END_CHARS: Final = frozenset(",]}") | TOML_WS_AND_NEWLINE
4749

4850
BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
4951
{
@@ -672,19 +674,29 @@ def _parse_simple_number(
672674
src: str, pos: Pos
673675
) -> None | tuple[Pos, int]:
674676
start = pos
675-
src = src.rstrip()
676677
end = len(src)
678+
end_chars = _NUMBER_END_CHARS
679+
if src[pos] in '+-':
680+
pos += 1
681+
if pos >= end:
682+
return None
683+
if src[pos] not in _DECDIGIT_CHARS:
684+
return None
685+
686+
if src[pos] == '0':
687+
pos += 1
688+
if pos < end and src[pos] not in end_chars:
689+
return None
690+
return pos, 0
691+
677692
while src[pos] in _DECDIGIT_CHARS:
678693
pos += 1
679694
if pos >= end:
680695
break
681696
else:
682-
if src[pos] != "\n":
697+
if src[pos] not in end_chars:
683698
return None
684-
digits = src[start:pos]
685-
if digits.startswith("0") and len(digits) > 1:
686-
return None
687-
return pos, int(digits)
699+
return pos, int(src[start:pos])
688700

689701

690702
def parse_value(
@@ -725,8 +737,9 @@ def parse_value(
725737
if char == "{":
726738
return parse_inline_table(src, pos, parse_float)
727739

728-
# Simple number parser avoiding regex
729-
if char in _DECDIGIT_CHARS:
740+
# First try a simple number parser which defers import tomllib._re
741+
# to speed up tomllib import time
742+
if char in _NUMBER_INITIAL_CHARS:
730743
res = _parse_simple_number(src, pos)
731744
if res is not None:
732745
return res

0 commit comments

Comments
 (0)