Skip to content

Commit 1c89817

Browse files
eendebakptclaudeFidget-Spinner
authored
gh-148276: Optimize object creation and method calls in the JIT by resolving __init__ at trace optimization time (GH-148277)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Ken Jin <kenjin4096@gmail.com>
1 parent a71b043 commit 1c89817

File tree

6 files changed

+180
-6
lines changed

6 files changed

+180
-6
lines changed

Include/internal/pycore_opcode_metadata.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_opt.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,29 @@ class Foo:
15211521
Foo.attr = 0
15221522
self.assertFalse(ex.is_valid())
15231523

1524+
def test_guard_type_version_locked_removed(self):
1525+
"""
1526+
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
1527+
eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
1528+
"""
1529+
1530+
class Foo:
1531+
def __init__(self):
1532+
self.a = 1
1533+
self.b = 2
1534+
self.c = 3
1535+
1536+
def thing(n):
1537+
for _ in range(n):
1538+
Foo()
1539+
1540+
res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
1541+
self.assertIsNotNone(ex)
1542+
opnames = list(iter_opnames(ex))
1543+
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
1544+
# Only the first store needs the guard; the rest should be NOPed.
1545+
self.assertEqual(guard_locked_count, 1)
1546+
15241547
def test_type_version_doesnt_segfault(self):
15251548
"""
15261549
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
@@ -1542,6 +1565,98 @@ def fn(a):
15421565

15431566
fn(A())
15441567

1568+
def test_init_resolves_callable(self):
1569+
"""
1570+
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
1571+
enabling the optimizer to propagate type information through the frame
1572+
and eliminate redundant function version and arg count checks.
1573+
"""
1574+
class MyPoint:
1575+
def __init__(self, x, y):
1576+
# If __init__ callable is propagated through, then
1577+
# These will get promoted from globals to constants.
1578+
self.x = range(1)
1579+
self.y = range(1)
1580+
1581+
def testfunc(n):
1582+
for _ in range(n):
1583+
p = MyPoint(1.0, 2.0)
1584+
1585+
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1586+
self.assertIsNotNone(ex)
1587+
uops = get_opnames(ex)
1588+
# The __init__ call should be traced through via _PUSH_FRAME
1589+
self.assertIn("_PUSH_FRAME", uops)
1590+
# __init__ resolution allows promotion of range to constant
1591+
self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops)
1592+
1593+
def test_guard_type_version_locked_propagates(self):
1594+
"""
1595+
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
1596+
symbol so repeated accesses to the same type can benefit.
1597+
"""
1598+
class Item:
1599+
def __init__(self, val):
1600+
self.val = val
1601+
1602+
def get(self):
1603+
return self.val
1604+
1605+
def get2(self):
1606+
return self.val + 1
1607+
1608+
def testfunc(n):
1609+
item = Item(42)
1610+
total = 0
1611+
for _ in range(n):
1612+
# Two method calls on the same object — the second
1613+
# should benefit from type info set by the first.
1614+
total += item.get() + item.get2()
1615+
return total
1616+
1617+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1618+
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
1619+
self.assertIsNotNone(ex)
1620+
uops = get_opnames(ex)
1621+
# Both methods should be traced through
1622+
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
1623+
# Type version propagation: one guard covers both method lookups
1624+
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
1625+
# Function checks eliminated (type info resolves the callable)
1626+
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
1627+
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)
1628+
1629+
def test_method_chain_guard_elimination(self):
1630+
"""
1631+
Calling two methods on the same object should share the outer
1632+
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
1633+
"""
1634+
class Calc:
1635+
def __init__(self, val):
1636+
self.val = val
1637+
1638+
def add(self, x):
1639+
self.val += x
1640+
return self
1641+
1642+
def testfunc(n):
1643+
c = Calc(0)
1644+
for _ in range(n):
1645+
c.add(1).add(2)
1646+
return c.val
1647+
1648+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1649+
self.assertEqual(res, TIER2_THRESHOLD * 3)
1650+
self.assertIsNotNone(ex)
1651+
uops = get_opnames(ex)
1652+
# Both add() calls should be inlined
1653+
push_count = uops.count("_PUSH_FRAME")
1654+
self.assertEqual(push_count, 2)
1655+
# Only one outer type version guard for the two method lookups
1656+
# on the same object c (the second lookup reuses type info)
1657+
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
1658+
self.assertEqual(guard_version_count, 1)
1659+
15451660
def test_func_guards_removed_or_reduced(self):
15461661
def testfunc(n):
15471662
for i in range(n):

Python/bytecodes.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,6 +3011,7 @@ dummy_func(
30113011

30123012
macro(STORE_ATTR_INSTANCE_VALUE) =
30133013
unused/1 +
3014+
_RECORD_TOS_TYPE +
30143015
_LOCK_OBJECT +
30153016
_GUARD_TYPE_VERSION_LOCKED +
30163017
_GUARD_DORV_NO_DICT +

Python/optimizer_bytecodes.c

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,20 @@ dummy_func(void) {
134134
assert(!PyJitRef_IsUnique(value));
135135
}
136136

137+
op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
138+
assert(type_version);
139+
if (sym_matches_type_version(owner, type_version)) {
140+
ADD_OP(_NOP, 0, 0);
141+
} else {
142+
PyTypeObject *probable_type = sym_get_probable_type(owner);
143+
if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) {
144+
// Promote the probable type version to a known one.
145+
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type);
146+
_Py_BloomFilter_Add(dependencies, probable_type);
147+
}
148+
}
149+
}
150+
137151
op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
138152
(void)offset;
139153
(void)value;
@@ -1043,9 +1057,27 @@ dummy_func(void) {
10431057
}
10441058

10451059
op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
1046-
(void)type_version;
10471060
(void)args;
1048-
callable = sym_new_not_null(ctx);
1061+
PyObject *probable_callable = sym_get_probable_value(callable);
1062+
assert(probable_callable != NULL);
1063+
assert(PyType_Check(probable_callable));
1064+
PyTypeObject *tp = (PyTypeObject *)probable_callable;
1065+
if (tp->tp_version_tag == type_version) {
1066+
// If the type version has not changed since we last saw it,
1067+
// then we know this __init__ is definitely the same one as in the cache.
1068+
// We can promote callable to a known constant. This does not need a
1069+
// type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard.
1070+
// TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can
1071+
// eliminate the check.
1072+
PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
1073+
PyObject *init = cls->_spec_cache.init;
1074+
assert(init != NULL);
1075+
assert(PyFunction_Check(init));
1076+
callable = sym_new_const(ctx, init);
1077+
}
1078+
else {
1079+
callable = sym_new_not_null(ctx);
1080+
}
10491081
self_or_null = sym_new_not_null(ctx);
10501082
}
10511083

Python/optimizer_cases.c.h

Lines changed: 27 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/record_functions.c.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)