diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8a3819dabe44ce..6d712471095ed7 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1953,6 +1953,49 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_INT", uops) self.assertNotIn("_GUARD_TOS_INT", uops) + def test_call_len_known_length_small_int(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = (1, 2, 3, 4, 5) + if len(t) == 5: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # When the length is < _PY_NSMALLPOSINTS, the len() call is replaced + # with just an inline load. + self.assertNotIn("_CALL_LEN", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + + def test_call_len_known_length(self): + def testfunc(n): + class C: + t = tuple(range(300)) + + x = 0 + for _ in range(n): + if len(C.t) == 300: # comparison + guard removed + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # When the length is >= _PY_NSMALLPOSINTS, we cannot replace + # the len() call with an inline load, but knowing the exact + # length allows us to optimize more code, such as conditionals + # in this case + self.assertIn("_CALL_LEN", uops) + self.assertNotIn("_COMPARE_OP_INT", uops) + self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + def test_get_len_with_const_tuple(self): def testfunc(n): x = 0.0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst new file mode 100644 index 00000000000000..7965169d46e820 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst @@ -0,0 +1 @@ +Optimize _CALL_LEN in the JIT when the length is known. Patch by Tomas Roun diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index fbf4dfd3db629c..9abaad0466d5a1 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1190,8 +1190,21 @@ dummy_func(void) { sym_set_const(callable, (PyObject *)&PyUnicode_Type); } - op(_CALL_LEN, (unused, unused, unused -- res)) { + op(_CALL_LEN, (callable, null, arg -- res)) { res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + } } op(_GET_LEN, (obj -- obj, len)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b42f47c75eaf50..c2048caf38bb70 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2215,8 +2215,27 @@ } case _CALL_LEN: { + JitOptSymbol *arg; JitOptSymbol *res; + arg = stack_pointer[-1]; res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + Py_DECREF(temp); + stack_pointer += 2; + } stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS());