diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..85c43055d0dcec 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,14 +295,18 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) @refcount_test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst new file mode 100644 index 00000000000000..af1c8cac3e547a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst @@ -0,0 +1,16 @@ +Fix two bugs related to the interaction of weakrefs and the garbage +collector. The weakrefs in the ``tp_subclasses`` dictionary are needed in +order to correctly invalidate type caches (for example, by calling +``PyType_Modified()``). Clearing weakrefs before calling finalizers causes +the caches to not be correctly invalidated. That can cause crashes since the +caches can refer to invalid objects. This is fixed by deferring the +clearing of weakrefs to classes and without callbacks until after finalizers +are executed. + +The second bug is caused by weakrefs created while running finalizers. Those +weakrefs can be outside the set of unreachable garbage and therefore survive +the ``delete_garbage()`` phase (where ``tp_clear()`` is called on objects). +Those weakrefs can expose to Python-level code objects that have had +``tp_clear()`` called on them. See GH-91636 as an example of this kind of +bug. This is fixes be clearing all weakrefs to unreachable objects after +finalizers have been executed. diff --git a/Python/gc.c b/Python/gc.c index 02135a3fb442d6..4471cc3f220adf 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,58 +858,31 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. +/* Handle weakref callbacks. Note that it's possible for such weakrefs to be + * outside the unreachable set -- indeed, those are precisely the weakrefs + * whose callbacks must be invoked. See gc_weakref.txt for overview & some + * details. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) { PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ PyGC_Head *next; int num_freed = 0; gc_list_init(&wrcb_to_call); - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. + /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the + * callback needs to be invoked. We make another pass over wrcb_to_call, + * invoking callbacks, after this pass completes. */ for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); next = GC_NEXT(gc); - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -921,20 +894,47 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) */ wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. + /* `op` may have some weakrefs. March over the list and move the + * weakrefs with callbacks that must be called into wrcb_to_call. */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; + + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. However, that + // deferral could introduce other problems if some finalizer + // code expects that the weakrefs will be cleared first. So, we + // have the PyType_Check() test below to only defer the clear of + // weakrefs to types. That solves the issue for tp_subclasses. + // In a future version of Python, we should likely defer the + // weakref clear for all objects, not just types. + if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -955,10 +955,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * outside the current generation, CT may be reachable from the * callback. Then the callback could resurrect insane objects. * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. + * Since the callback is never needed and may be unsafe in this + * case, wr is simply left in the unreachable set. Note that + * clear_weakrefs() will ensure its callback will not trigger + * inside delete_garbage(). * * OTOH, if wr isn't part of CT, we should invoke the callback: the * weakref outlived the trash. Note that since wr isn't CT in this @@ -969,8 +969,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * is moved to wrcb_to_call in this case. */ if (gc_is_collecting(AS_GC((PyObject *)wr))) { - /* it should already have been cleared above */ - _PyObject_ASSERT((PyObject*)wr, wr->wr_object == Py_None); continue; } @@ -980,7 +978,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) Py_INCREF(wr); /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC((PyObject *)wr); + PyGC_Head *wrasgc = AS_GC((PyObject *)wr); // wrasgc is reachable, but next isn't, so they can't be the same _PyObject_ASSERT((PyObject *)wr, wrasgc != next); gc_list_move(wrasgc, &wrcb_to_call); @@ -996,9 +994,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyObject *callback; gc = (PyGC_Head*)wrcb_to_call._gc_next; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; + PyWeakReference *wr = (PyWeakReference *)op; callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); @@ -1037,6 +1035,62 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1736,8 +1790,8 @@ gc_collect_region(PyThreadState *tstate, } } - /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); + /* Invoke weakref callbacks as necessary. */ + stats->collected += handle_weakref_callbacks(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1751,6 +1805,22 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + /* Clear weakrefs to objects in the unreachable set. No Python-level + * code must be allowed to access those unreachable objects. During + * delete_garbage(), finalizers outside the unreachable set might run + * and if those weakrefs were not cleared, that could reveal unreachable + * objects. + * + * We used to clear weakrefs earlier, before calling finalizers. That + * causes at least two problems. First, the finalizers could create + * new weakrefs, that refer to unreachable objects. Those would not be + * cleared and could cause the problem described above (see GH-91636 as + * an example). Second, we need the weakrefs in the tp_subclasses to + * *not* be cleared so that caches based on the type version are correctly + * invalidated (see GH-135552 as a bug caused by this). + */ + clear_weakrefs(&final_unreachable); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5aaa68c5b51f95..0a2d75836a217b 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1491,8 +1491,82 @@ move_legacy_finalizer_reachable(struct collection_state *state) return 0; } -// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// enqueued in `wrcb_to_call`, but not invoked yet. +// Weakrefs with callbacks are enqueued in `wrcb_to_call`, but not invoked +// yet. +static void +find_weakref_callbacks(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs, and enqueue the weakrefs with callbacks + // that must be called into wrcb_to_call. + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; + + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. However, that + // deferral could introduce other problems if some finalizer + // code expects that the weakrefs will be cleared first. So, we + // have the PyType_Check() test below to only defer the clear of + // weakrefs to types. That solves the issue for tp_subclasses. + // In a future version of Python, we should likely defer the + // weakref clear for all objects, not just types. + if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + + // We do not invoke callbacks for weakrefs that are themselves + // unreachable. This is partly for historical reasons: weakrefs + // predate safe object finalization, and a weakref that is itself + // unreachable may have a callback that resurrects other + // unreachable objects. + if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { + continue; + } + + // Create a new reference so that wr can't go away before we can + // process it again. + merge_refcount((PyObject *)wr, 1); + + // Enqueue weakref to be called later. + worklist_push(&state->wrcb_to_call, (PyObject *)wr); + } + } +} + +// Clear all weakrefs to unreachable objects. static void clear_weakrefs(struct collection_state *state) { @@ -1525,22 +1599,6 @@ clear_weakrefs(struct collection_state *state) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - // We do not invoke callbacks for weakrefs that are themselves - // unreachable. This is partly for historical reasons: weakrefs - // predate safe object finalization, and a weakref that is itself - // unreachable may have a callback that resurrects other - // unreachable objects. - if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { - continue; - } - - // Create a new reference so that wr can't go away before we can - // process it again. - merge_refcount((PyObject *)wr, 1); - - // Enqueue weakref to be called later. - worklist_push(&state->wrcb_to_call, (PyObject *)wr); } } } @@ -2210,8 +2268,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Record the number of live GC objects interp->gc.long_lived_total = state->long_lived_total; - // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state); + // Find weakref callbacks we will honor (but do not call them). + find_weakref_callbacks(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2222,11 +2280,28 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, call_weakref_callbacks(state); finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); + // Handle any objects that may have resurrected after the finalization. err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + if (err == 0) { + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might + // run and if those weakrefs were not cleared, that could reveal + // unreachable objects. + // + // We used to clear weakrefs earlier, before calling finalizers. + // That causes at least two problems. First, the finalizers could + // create new weakrefs, that refer to unreachable objects. Those + // would not be cleared and could cause the problem described above + // (see GH-91636 as an example). Second, we need the weakrefs in the + // tp_subclasses to *not* be cleared so that caches based on the type + // version are correctly invalidated (see GH-135552 as a bug caused by + // this). + clear_weakrefs(state); + } _PyEval_StartTheWorld(interp); if (err < 0) {