add unique type propagation

eendebakpt · eendebakpt · commit e8263f97577c · 2026-04-05T22:04:23.000+02:00
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
@@ -499,6 +499,10 @@ typedef struct {
     /* Static type of the result, or NULL if unknown. Used by the tier 2
        optimizer to propagate type information through _BINARY_OP_EXTEND. */
     PyTypeObject *result_type;
+    /* Nonzero iff `action` always returns a freshly allocated object (not
+       aliased to either operand). Used by the tier 2 optimizer to enable
+       inplace follow-up ops. */
+    int result_unique;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
@@ -3813,6 +3813,29 @@ def f(n):
         self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops)
         self.assertNotIn("_GUARD_TOS_TUPLE", uops)
 
+    def test_binary_op_extend_float_result_enables_inplace_multiply(self):
+        # (2 + x) * y with x, y floats: `2 + x` goes through _BINARY_OP_EXTEND
+        # (int + float). The result_type/result_unique info should let the
+        # subsequent float multiply use the inplace variant.
+        def testfunc(n):
+            x = 3.5
+            y = 2.0
+            res = 0.0
+            for _ in range(n):
+                res = (2 + x) * y
+            return res
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, 11.0)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_EXTEND", uops)
+        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)
+        self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops)
+        # NOS guard on the multiply is eliminated because _BINARY_OP_EXTEND
+        # propagates PyFloat_Type.
+        self.assertNotIn("_GUARD_NOS_FLOAT", uops)
+
     def test_binary_op_extend_list_concat_type_propagation(self):
         # list + list is specialized via BINARY_OP_EXTEND. The tier 2 optimizer
         # should learn that the result is a list and eliminate subsequent
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
@@ -1 +1,3 @@
-Specialize ``BINARY_OP`` for concatenation of lists and tuples.
+Specialize ``BINARY_OP`` for concatenation of lists and tuples, and
+propagate the result type through ``_BINARY_OP_EXTEND`` in the tier 2
+optimizer so that follow-up type guards can be eliminated.
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
@@ -413,6 +413,9 @@ dummy_func(void) {
         _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
         if (d != NULL && d->result_type != NULL) {
             res = sym_new_type(ctx, d->result_type);
+            if (d->result_unique) {
+                res = PyJitRef_MakeUnique(res);
+            }
         }
         else {
             res = sym_new_not_null(ctx);
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
diff --git a/Python/specialize.c b/Python/specialize.c
@@ -2224,28 +2224,31 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 
 static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     /* long-long arithmetic */
-    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
-    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
-    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
-    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
-    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
-    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
+    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
+    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
+    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
+    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
+    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
+    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
 
     /* float-long arithemetic */
-    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type},
-    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type},
-    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type},
-    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type},
-
-    /* float-float arithmetic */
-    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type},
-    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type},
-    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type},
-    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type},
-
-    /* list-list and tuple-tuple concatenation */
-    {NB_ADD, list_list_guard, list_list_add, &PyList_Type},
-    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type},
+    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
+    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
+    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
+    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
+
+    /* long-float arithmetic */
+    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
+    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
+    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
+    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
+
+    /* list-list concatenation: _PyList_Concat always allocates a new list */
+    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1},
+    /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
+       that can return one of the operands, so the result is not guaranteed
+       to be a freshly allocated object. */
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0},
 };
 
 static int