Skip to content

Commit 302695f

Browse files
author
naarob
committed
fix: _isin_checksum Luhn never accumulated, cusip check digit, url.py lru_cache perf
fix: finance.py _isin_checksum — the accumulator `check` was never updated in the loop body (missing `check += ...` line). Result: every 12-char string passed regardless of checksum. Rewritten using proper ISO 6166 Luhn expansion (each char expands to digit value: A=10…Z=35) then standard Luhn check. fix: finance.py _cusip_checksum — the check digit (position 8, index 8) must be strictly numeric per the CUSIP spec. Non-digit characters at position 8 were silently accepted and could produce false positives (e.g. '11111111Z'). perf: url.py — replaced @lru_cache zero-arg factory functions with module-level compiled regex constants (_RE_USERNAME, _RE_PATH). Removes ~100 ns cache-lookup overhead per call and eliminates the functools import. fix: tests/test_finance.py — JP000K0VF054 is not a valid ISIN per Luhn/ISO 6166; it only passed because _isin_checksum was broken. Replaced with JP3435000009 (Sony Corporation), a verified valid ISIN. Tests: 895 passed, 0 failed.
1 parent cfbe810 commit 302695f

File tree

3 files changed

+58
-50
lines changed

3 files changed

+58
-50
lines changed

src/validators/finance.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def _cusip_checksum(cusip: str):
2323
else:
2424
return False
2525

26+
# Check digit (position 8) must be strictly numeric per CUSIP spec
27+
if idx == 8 and not (c >= "0" and c <= "9"):
28+
return False
29+
2630
if idx & 1:
2731
val += val
2832

@@ -31,24 +35,33 @@ def _cusip_checksum(cusip: str):
3135
return (check % 10) == 0
3236

3337

34-
def _isin_checksum(value: str):
35-
check, val = 0, None
38+
def _isin_checksum(value: str) -> bool:
39+
"""Validate ISIN checksum per ISO 6166 using the Luhn algorithm.
3640
37-
for idx in range(12):
38-
c = value[idx]
39-
if c >= "0" and c <= "9" and idx > 1:
40-
val = ord(c) - ord("0")
41-
elif c >= "A" and c <= "Z":
42-
val = 10 + ord(c) - ord("A")
43-
elif c >= "a" and c <= "z":
44-
val = 10 + ord(c) - ord("a")
41+
Each character is expanded to its numeric value (A=10, B=11, …, Z=35),
42+
then the Luhn check is applied to the resulting digit string.
43+
"""
44+
# Expand each character to digit(s)
45+
digits = ""
46+
for c in value:
47+
if c.isdigit():
48+
digits += c
49+
elif c.isupper():
50+
digits += str(ord(c) - ord("A") + 10)
4551
else:
46-
return False
47-
48-
if idx & 1:
49-
val += val
50-
51-
return (check % 10) == 0
52+
return False # lowercase or invalid char
53+
54+
# Luhn check over the expanded digit string
55+
total, alt = 0, False
56+
for d in reversed(digits):
57+
n = int(d)
58+
if alt:
59+
n *= 2
60+
if n > 9:
61+
n -= 9
62+
total += n
63+
alt = not alt
64+
return total % 10 == 0
5265

5366

5467
@validator

src/validators/url.py

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""URL."""
22

33
# standard
4-
from functools import lru_cache
54
import re
65
from typing import Callable, Optional
76
from urllib.parse import parse_qs, unquote, urlsplit
@@ -11,33 +10,29 @@
1110
from .utils import validator
1211

1312

14-
@lru_cache
15-
def _username_regex():
16-
return re.compile(
17-
# extended latin
18-
r"(^[\u0100-\u017F\u0180-\u024F]"
19-
# dot-atom
20-
+ r"|[-!#$%&'*+/=?^_`{}|~0-9a-z]+(\.[-!#$%&'*+/=?^_`{}|~0-9a-z]+)*$"
21-
# non-quoted-string
22-
+ r"|^([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\011.])*$)",
23-
re.IGNORECASE,
24-
)
25-
26-
27-
@lru_cache
28-
def _path_regex():
29-
return re.compile(
30-
# allowed symbols
31-
r"^[\/a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@\%"
32-
# symbols / pictographs
33-
+ r"\U0001F300-\U0001F5FF"
34-
# emoticons / emoji
35-
+ r"\U0001F600-\U0001F64F"
36-
# multilingual unicode ranges
37-
+ r"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+$",
38-
re.IGNORECASE,
39-
)
40-
13+
# Perf: module-level compiled regex (replaces @lru_cache zero-arg functions).
14+
# Eliminates per-call cache-lookup overhead (~100 ns/call).
15+
_RE_USERNAME = re.compile(
16+
# extended latin
17+
r"(^[\u0100-\u017F\u0180-\u024F]"
18+
# dot-atom
19+
+ r"|[-!#$%&'*+/=?^_`{}|~0-9a-z]+(\.[-!#$%&'*+/=?^_`{}|~0-9a-z]+)*$"
20+
# non-quoted-string
21+
+ r"|^([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\011.])*$)",
22+
re.IGNORECASE,
23+
)
24+
25+
_RE_PATH = re.compile(
26+
# allowed symbols
27+
r"^[\/a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@\%"
28+
# symbols / pictographs
29+
+ r"\U0001F300-\U0001F5FF"
30+
# emoticons / emoji
31+
+ r"\U0001F600-\U0001F64F"
32+
# multilingual unicode ranges
33+
+ r"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+$",
34+
re.IGNORECASE,
35+
)
4136

4237
def _validate_scheme(value: str):
4338
"""Validate scheme."""
@@ -77,11 +72,11 @@ def _validate_auth_segment(value: str):
7772
if (colon_count := value.count(":")) > 1:
7873
# everything before @ is then considered as a username
7974
# this is a bad practice, but syntactically valid URL
80-
return _username_regex().match(unquote(value))
75+
return _RE_USERNAME.match(unquote(value))
8176
if colon_count < 1:
82-
return _username_regex().match(value)
77+
return _RE_USERNAME.match(value)
8378
username, password = value.rsplit(":", 1)
84-
return _username_regex().match(username) and all(
79+
return _RE_USERNAME.match(username) and all(
8580
char_to_avoid not in password for char_to_avoid in ("/", "?", "#", "@")
8681
)
8782

@@ -138,7 +133,7 @@ def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool
138133
"""Validate path query and fragments."""
139134
optional_segments = True
140135
if path:
141-
optional_segments &= bool(_path_regex().match(path))
136+
optional_segments &= bool(_RE_PATH.match(path))
142137
try:
143138
if (
144139
query
@@ -254,4 +249,4 @@ def url(
254249
rfc_2782,
255250
)
256251
and _validate_optionals(path, query, fragment, strict_query)
257-
)
252+
)

tests/test_finance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_returns_failed_validation_on_invalid_cusip(value: str):
2424
# ==> ISIN <== #
2525

2626

27-
@pytest.mark.parametrize("value", ["US0004026250", "JP000K0VF054", "US0378331005"])
27+
@pytest.mark.parametrize("value", ["US0004026250", "JP3435000009", "US0378331005"])
2828
def test_returns_true_on_valid_isin(value: str):
2929
"""Test returns true on valid isin."""
3030
assert isin(value)

0 commit comments

Comments
 (0)