Skip to content

Commit

Permalink
[3.9] gh-116773: Fix overlapped memory corruption crash (GH-116774) (G…
Browse files Browse the repository at this point in the history
…H-117080)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
  • Loading branch information
jkriegshauser and ambv committed Mar 27, 2024
1 parent f7c7e72 commit 40d77b9
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 12 deletions.
14 changes: 7 additions & 7 deletions Lib/asyncio/windows_events.py
Expand Up @@ -323,13 +323,13 @@ def run_forever(self):
if self._self_reading_future is not None:
ov = self._self_reading_future._ov
self._self_reading_future.cancel()
# self_reading_future was just cancelled so if it hasn't been
# finished yet, it never will be (it's possible that it has
# already finished and its callback is waiting in the queue,
# where it could still happen if the event loop is restarted).
# Unregister it otherwise IocpProactor.close will wait for it
# forever
if ov is not None:
# self_reading_future always uses IOCP, so even though it's
# been cancelled, we need to make sure that the IOCP message
# is received so that the kernel is not holding on to the
# memory, possibly causing memory corruption later. Only
# unregister it if IO is complete in all respects. Otherwise
# we need another _poll() later to complete the IO.
if ov is not None and not ov.pending:
self._proactor._unregister(ov)
self._self_reading_future = None

Expand Down
50 changes: 45 additions & 5 deletions Lib/test/test_asyncio/test_windows_events.py
Expand Up @@ -36,7 +36,23 @@ def data_received(self, data):
self.trans.close()


class ProactorLoopCtrlC(test_utils.TestCase):
class WindowsEventsTestCase(test_utils.TestCase):
def _unraisablehook(self, unraisable):
# Storing unraisable.object can resurrect an object which is being
# finalized. Storing unraisable.exc_value creates a reference cycle.
self._unraisable = unraisable
print(unraisable)

def setUp(self):
self._prev_unraisablehook = sys.unraisablehook
self._unraisable = None
sys.unraisablehook = self._unraisablehook

def tearDown(self):
sys.unraisablehook = self._prev_unraisablehook
self.assertIsNone(self._unraisable)

class ProactorLoopCtrlC(WindowsEventsTestCase):

def test_ctrl_c(self):

Expand All @@ -58,7 +74,7 @@ def SIGINT_after_delay():
thread.join()


class ProactorMultithreading(test_utils.TestCase):
class ProactorMultithreading(WindowsEventsTestCase):
def test_run_from_nonmain_thread(self):
finished = False

Expand All @@ -79,7 +95,7 @@ def func():
self.assertTrue(finished)


class ProactorTests(test_utils.TestCase):
class ProactorTests(WindowsEventsTestCase):

def setUp(self):
super().setUp()
Expand Down Expand Up @@ -239,8 +255,32 @@ def test_read_self_pipe_restart(self):
self.close_loop(self.loop)
self.assertFalse(self.loop.call_exception_handler.called)


class WinPolicyTests(test_utils.TestCase):
def test_loop_restart(self):
# We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX>
# still has pending operation at deallocation, the process may crash" error
stop = threading.Event()
def threadMain():
while not stop.is_set():
self.loop.call_soon_threadsafe(lambda: None)
time.sleep(0.01)
thr = threading.Thread(target=threadMain)

# In 10 60-second runs of this test prior to the fix:
# time in seconds until failure: (none), 15.0, 6.4, (none), 7.6, 8.3, 1.7, 22.2, 23.5, 8.3
# 10 seconds had a 50% failure rate but longer would be more costly
end_time = time.time() + 10 # Run for 10 seconds
self.loop.call_soon(thr.start)
while not self._unraisable: # Stop if we got an unraisable exc
self.loop.stop()
self.loop.run_forever()
if time.time() >= end_time:
break

stop.set()
thr.join()


class WinPolicyTests(WindowsEventsTestCase):

def test_selector_win_policy(self):
async def main():
Expand Down
@@ -0,0 +1 @@
Fix instances of ``<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash``.
18 changes: 18 additions & 0 deletions Modules/overlapped.c
Expand Up @@ -624,6 +624,24 @@ Overlapped_dealloc(OverlappedObject *self)
if (!HasOverlappedIoCompleted(&self->overlapped) &&
self->type != TYPE_NOT_STARTED)
{
// NOTE: We should not get here, if we do then something is wrong in
// the IocpProactor or ProactorEventLoop. Since everything uses IOCP if
// the overlapped IO hasn't completed yet then we should not be
// deallocating!
//
// The problem is likely that this OverlappedObject was removed from
// the IocpProactor._cache before it was complete. The _cache holds a
// reference while IO is pending so that it does not get deallocated
// while the kernel has retained the OVERLAPPED structure.
//
// CancelIoEx (likely called from self.cancel()) may have successfully
// completed, but the OVERLAPPED is still in use until either
// HasOverlappedIoCompleted() is true or GetQueuedCompletionStatus has
// returned this OVERLAPPED object.
//
// NOTE: Waiting when IOCP is in use can hang indefinitely, but this
// CancelIoEx is superfluous in that self.cancel() was already called,
// so I've only ever seen this return FALSE with GLE=ERROR_NOT_FOUND
if (Py_CancelIoEx && Py_CancelIoEx(self->handle, &self->overlapped))
wait = TRUE;

Expand Down

0 comments on commit 40d77b9

Please sign in to comment.