Skip to content

PYTHON-5413 Handle flaky tests #2395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 36 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
82a6392
PYTHON-5413 Handle flaky tests
blink1073 Jun 17, 2025
c30c898
clean up flaky test handling
blink1073 Jun 17, 2025
a8a1dd2
debug
blink1073 Jun 17, 2025
c6ce564
fix usage
blink1073 Jun 17, 2025
45efac9
increase retries for csot
blink1073 Jun 17, 2025
b83f9ba
try again
blink1073 Jun 17, 2025
7a316f7
try 3 times for csot
blink1073 Jun 17, 2025
6184475
use tailored list of skips
blink1073 Jun 18, 2025
c36dd26
fix skip handling
blink1073 Jun 18, 2025
85bc62d
fix skip
blink1073 Jun 18, 2025
35a3300
fix skips
blink1073 Jun 18, 2025
3ba3720
fix skips
blink1073 Jun 18, 2025
08e7324
handle more flakiness
blink1073 Jun 18, 2025
3cace1a
fix flaky test setup
blink1073 Jun 18, 2025
b2ddcc0
fix flaky test setup
blink1073 Jun 18, 2025
ae4e571
fix flaky test
blink1073 Jun 18, 2025
54fe7c2
fix flaky test
blink1073 Jun 18, 2025
ad91418
skip test on pypy
blink1073 Jun 18, 2025
da6c68a
fix skip
blink1073 Jun 18, 2025
89de6b2
fix skip
blink1073 Jun 18, 2025
5f1bbc6
add another skip
blink1073 Jun 18, 2025
22b8373
fix teardown
blink1073 Jun 18, 2025
78bcd99
add more retries
blink1073 Jun 18, 2025
ebcf174
add comment
blink1073 Jun 18, 2025
11dfd73
Skip test on Windows
blink1073 Jun 18, 2025
29e55c5
handle PYTHON-3689
blink1073 Jun 19, 2025
15ae713
fix import
blink1073 Jun 19, 2025
9d8759e
skip more tests
blink1073 Jun 20, 2025
9065639
more cleanup
blink1073 Jun 20, 2025
b40dc2c
skip more tests
blink1073 Jun 20, 2025
7e29849
fix skip
blink1073 Jun 20, 2025
e7da55b
cleanup
blink1073 Jun 20, 2025
5430ee2
add a required reason parameter
blink1073 Jun 20, 2025
a1965e5
fix test
blink1073 Jun 20, 2025
3feed33
Merge branch 'master' of github.com:mongodb/mongo-python-driver into …
blink1073 Jun 20, 2025
36d1aa7
fix skip condition
blink1073 Jun 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .evergreen/scripts/setup_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,6 @@ def handle_test_env() -> None:
write_env("PIP_PREFER_BINARY") # Prefer binary dists by default.
write_env("UV_FROZEN") # Do not modify lock files.

# Skip CSOT tests on non-linux platforms.
if PLATFORM != "linux":
write_env("SKIP_CSOT_TESTS")

# Set an environment variable for the test name and sub test name.
write_env(f"TEST_{test_name.upper()}")
write_env("TEST_NAME", test_name)
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,13 @@ If you are running one of the `no-responder` tests, omit the `run-server` step.
- Regenerate the test variants and tasks using `pre-commit run --all-files generate-config`.
- Make sure to add instructions for running the test suite to `CONTRIBUTING.md`.

## Handling flaky tests

We have a custom `flaky` decorator in [test/asynchronous/utils.py](test/asynchronous/utils.py) that can be used for
tests that are `flaky`. By default the decorator only applies when not running on CPython on Linux, since other
runtimes tend to have more variation. When using the `flaky` decorator, open a corresponding ticket and
a use the ticket number as the "reason" parameter to the decorator, e.g. `@flaky(reason="PYTHON-1234")`.

## Specification Tests

The MongoDB [specifications repository](https://github.com/mongodb/specifications)
Expand Down
18 changes: 12 additions & 6 deletions test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import warnings
from asyncio import iscoroutinefunction

from pymongo.errors import AutoReconnect
from pymongo.synchronous.uri_parser import parse_uri

try:
Expand Down Expand Up @@ -1219,12 +1220,17 @@ def teardown():
c = client_context.client
if c:
if not client_context.is_data_lake:
c.drop_database("pymongo-pooling-tests")
c.drop_database("pymongo_test")
c.drop_database("pymongo_test1")
c.drop_database("pymongo_test2")
c.drop_database("pymongo_test_mike")
c.drop_database("pymongo_test_bernie")
try:
c.drop_database("pymongo-pooling-tests")
c.drop_database("pymongo_test")
c.drop_database("pymongo_test1")
c.drop_database("pymongo_test2")
c.drop_database("pymongo_test_mike")
c.drop_database("pymongo_test_bernie")
except AutoReconnect:
# PYTHON-4982
if sys.implementation.name.lower() != "pypy":
raise
c.close()
print_running_clients()

Expand Down
18 changes: 12 additions & 6 deletions test/asynchronous/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from asyncio import iscoroutinefunction

from pymongo.asynchronous.uri_parser import parse_uri
from pymongo.errors import AutoReconnect

try:
import ipaddress
Expand Down Expand Up @@ -1235,12 +1236,17 @@ async def async_teardown():
c = async_client_context.client
if c:
if not async_client_context.is_data_lake:
await c.drop_database("pymongo-pooling-tests")
await c.drop_database("pymongo_test")
await c.drop_database("pymongo_test1")
await c.drop_database("pymongo_test2")
await c.drop_database("pymongo_test_mike")
await c.drop_database("pymongo_test_bernie")
try:
await c.drop_database("pymongo-pooling-tests")
await c.drop_database("pymongo_test")
await c.drop_database("pymongo_test1")
await c.drop_database("pymongo_test2")
await c.drop_database("pymongo_test_mike")
await c.drop_database("pymongo_test_bernie")
except AutoReconnect:
# PYTHON-4982
if sys.implementation.name.lower() != "pypy":
raise
await c.close()
print_running_clients()

Expand Down
6 changes: 4 additions & 2 deletions test/asynchronous/test_client_bulk_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
async_client_context,
unittest,
)
from test.asynchronous.utils import flaky
from test.utils_shared import (
OvertCommandListener,
)
Expand Down Expand Up @@ -619,16 +620,17 @@ async def test_15_unacknowledged_write_across_batches(self):
# https://github.com/mongodb/specifications/blob/master/source/client-side-operations-timeout/tests/README.md#11-multi-batch-bulkwrites
class TestClientBulkWriteCSOT(AsyncIntegrationTest):
async def asyncSetUp(self):
if os.environ.get("SKIP_CSOT_TESTS", ""):
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")
await super().asyncSetUp()
self.max_write_batch_size = await async_client_context.max_write_batch_size
self.max_bson_object_size = await async_client_context.max_bson_size
self.max_message_size_bytes = await async_client_context.max_message_size_bytes

@async_client_context.require_version_min(8, 0, 0, -24)
@async_client_context.require_failCommand_fail_point
@flaky(reason="PYTHON-5290", max_runs=3, affects_cpython_linux=True)
async def test_timeout_in_multi_batch_bulk_write(self):
if sys.platform != "linux":
self.skipTest("PYTHON-3522 CSOT test runs too slow on Windows and MacOS")
_OVERHEAD = 500

internal_client = await self.async_rs_or_single_client(timeoutMS=None)
Expand Down
7 changes: 3 additions & 4 deletions test/asynchronous/test_csot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous.unified_format import generate_test_classes
from test.asynchronous.utils import flaky

import pymongo
from pymongo import _csot
Expand All @@ -43,9 +44,8 @@
class TestCSOT(AsyncIntegrationTest):
RUN_ON_LOAD_BALANCER = True

@flaky(reason="PYTHON-3522")
async def test_timeout_nested(self):
if os.environ.get("SKIP_CSOT_TESTS", ""):
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")
coll = self.db.coll
self.assertEqual(_csot.get_timeout(), None)
self.assertEqual(_csot.get_deadline(), float("inf"))
Expand Down Expand Up @@ -82,9 +82,8 @@ async def test_timeout_nested(self):
self.assertEqual(_csot.get_rtt(), 0.0)

@async_client_context.require_change_streams
@flaky(reason="PYTHON-3522")
async def test_change_stream_can_resume_after_timeouts(self):
if os.environ.get("SKIP_CSOT_TESTS", ""):
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")
coll = self.db.test
await coll.insert_one({})
async with await coll.watch() as stream:
Expand Down
7 changes: 3 additions & 4 deletions test/asynchronous/test_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
sys.path[0:0] = [""]

from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous.utils import flaky
from test.utils_shared import (
AllowListEventListener,
EventListener,
Expand Down Expand Up @@ -1406,9 +1407,8 @@ async def test_to_list_length(self):
docs = await c.to_list(3)
self.assertEqual(len(docs), 2)

@flaky(reason="PYTHON-3522")
async def test_to_list_csot_applied(self):
if os.environ.get("SKIP_CSOT_TESTS", ""):
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")
client = await self.async_single_client(timeoutMS=500, w=1)
coll = client.pymongo.test
# Initialize the client with a larger timeout to help make test less flakey
Expand Down Expand Up @@ -1449,9 +1449,8 @@ async def test_command_cursor_to_list_length(self):
self.assertEqual(len(await result.to_list(1)), 1)

@async_client_context.require_failCommand_blockConnection
@flaky(reason="PYTHON-3522")
async def test_command_cursor_to_list_csot_applied(self):
if os.environ.get("SKIP_CSOT_TESTS", ""):
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")
client = await self.async_single_client(timeoutMS=500, w=1)
coll = client.pymongo.test
# Initialize the client with a larger timeout to help make test less flakey
Expand Down
6 changes: 6 additions & 0 deletions test/asynchronous/test_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import warnings
from test.asynchronous import AsyncIntegrationTest, AsyncPyMongoTestCase, async_client_context
from test.asynchronous.test_bulk import AsyncBulkTestBase
from test.asynchronous.utils import flaky
from test.asynchronous.utils_spec_runner import AsyncSpecRunner, AsyncSpecTestCreator
from threading import Thread
from typing import Any, Dict, Mapping, Optional
Expand Down Expand Up @@ -3247,6 +3248,7 @@ async def test_kms_retry(self):
class TestAutomaticDecryptionKeys(AsyncEncryptionIntegrationTest):
@async_client_context.require_no_standalone
@async_client_context.require_version_min(7, 0, -1)
@flaky(reason="PYTHON-4982")
async def asyncSetUp(self):
await super().asyncSetUp()
self.key1_document = json_data("etc", "data", "keys", "key1-document.json")
Expand Down Expand Up @@ -3489,6 +3491,8 @@ async def test_implicit_session_ignored_when_unsupported(self):

self.assertNotIn("lsid", self.listener.started_events[1].command)

await self.mongocryptd_client.close()

async def test_explicit_session_errors_when_unsupported(self):
self.listener.reset()
async with self.mongocryptd_client.start_session() as s:
Expand All @@ -3501,6 +3505,8 @@ async def test_explicit_session_errors_when_unsupported(self):
):
await self.mongocryptd_client.db.test.insert_one({"x": 1}, session=s)

await self.mongocryptd_client.close()


if __name__ == "__main__":
unittest.main()
3 changes: 2 additions & 1 deletion test/asynchronous/test_retryable_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pprint
import sys
import threading
from test.asynchronous.utils import async_set_fail_point
from test.asynchronous.utils import async_set_fail_point, flaky

sys.path[0:0] = [""]

Expand Down Expand Up @@ -466,6 +466,7 @@ class TestPoolPausedError(AsyncIntegrationTest):
@async_client_context.require_failCommand_blockConnection
@async_client_context.require_retryable_writes
@client_knobs(heartbeat_frequency=0.05, min_heartbeat_interval=0.05)
@flaky(reason="PYTHON-5291")
async def test_pool_paused_error_is_retryable(self):
cmap_listener = CMAPListener()
cmd_listener = OvertCommandListener()
Expand Down
2 changes: 2 additions & 0 deletions test/asynchronous/test_server_selection_in_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pathlib import Path
from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous.helpers import ConcurrentRunner
from test.asynchronous.utils import flaky
from test.asynchronous.utils_selection_tests import create_topology
from test.asynchronous.utils_spec_runner import AsyncSpecTestCreator
from test.utils_shared import (
Expand Down Expand Up @@ -137,6 +138,7 @@ async def frequencies(self, client, listener, n_finds=10):

@async_client_context.require_failCommand_appName
@async_client_context.require_multiple_mongoses
@flaky(reason="PYTHON-3689")
async def test_load_balancing(self):
listener = OvertCommandListener()
cmap_listener = CMAPListener()
Expand Down
2 changes: 2 additions & 0 deletions test/asynchronous/test_srv_polling.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import asyncio
import sys
import time
from test.asynchronous.utils import flaky
from test.utils_shared import FunctionCallRecorder
from typing import Any

Expand Down Expand Up @@ -254,6 +255,7 @@ def final_callback():
# Nodelist should reflect new valid DNS resolver response.
await self.assert_nodelist_change(response_final, client)

@flaky(reason="PYTHON-5315")
async def test_recover_from_initially_empty_seedlist(self):
def empty_seedlist():
return []
Expand Down
80 changes: 41 additions & 39 deletions test/asynchronous/unified_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,11 @@
client_knobs,
unittest,
)
from test.asynchronous.utils import async_get_pool
from test.asynchronous.utils import async_get_pool, flaky
from test.asynchronous.utils_spec_runner import SpecRunnerTask
from test.unified_format_shared import (
KMS_TLS_OPTS,
PLACEHOLDER_MAP,
SKIP_CSOT_TESTS,
EventListenerUtil,
MatchEvaluatorUtil,
coerce_result,
Expand Down Expand Up @@ -518,20 +517,30 @@ def maybe_skip_test(self, spec):
self.skipTest("Implement PYTHON-1894")
if "timeoutMS applied to entire download" in spec["description"]:
self.skipTest("PyMongo's open_download_stream does not cap the stream's lifetime")
if (
"Error returned from connection pool clear with interruptInUseConnections=true is retryable"
in spec["description"]
and not _IS_SYNC
):
self.skipTest("PYTHON-5170 tests are flakey")
if "Driver extends timeout while streaming" in spec["description"] and not _IS_SYNC:
self.skipTest("PYTHON-5174 tests are flakey")

class_name = self.__class__.__name__.lower()
description = spec["description"].lower()
if "csot" in class_name:
if "gridfs" in class_name and sys.platform == "win32":
self.skipTest("PYTHON-3522 CSOT GridFS tests are flaky on Windows")
# Skip tests that are too slow to run on a given platform.
slow_macos = [
"operation fails after two consecutive socket timeouts.*",
"operation succeeds after one socket timeout.*",
"Non-tailable cursor lifetime remaining timeoutMS applied to getMore if timeoutMode is unset",
]
slow_win32 = [
*slow_macos,
"maxTimeMS value in the command is less than timeoutMS",
]
if sys.platform == "win32" and "gridfs" in class_name:
self.skipTest("PYTHON-3522 CSOT GridFS test runs too slow on Windows")
if sys.platform == "win32":
for pat in slow_win32:
if re.match(pat.lower(), description):
self.skipTest("PYTHON-3522 CSOT test runs too slow on Windows")
if sys.platform == "darwin":
for pat in slow_macos:
if re.match(pat.lower(), description):
self.skipTest("PYTHON-3522 CSOT test runs too slow on MacOS")
if "change" in description or "change" in class_name:
self.skipTest("CSOT not implemented for watch()")
if "cursors" in class_name:
Expand Down Expand Up @@ -1347,38 +1356,31 @@ async def verify_outcome(self, spec):
self.assertListEqual(sorted_expected_documents, actual_documents)

async def run_scenario(self, spec, uri=None):
if "csot" in self.id().lower() and SKIP_CSOT_TESTS:
raise unittest.SkipTest("SKIP_CSOT_TESTS is set, skipping...")

# Kill all sessions before and after each test to prevent an open
# transaction (from a test failure) from blocking collection/database
# operations during test set up and tear down.
await self.kill_all_sessions()

if "csot" in self.id().lower():
# Retry CSOT tests up to 2 times to deal with flakey tests.
attempts = 3
for i in range(attempts):
try:
return await self._run_scenario(spec, uri)
except (AssertionError, OperationFailure) as exc:
if isinstance(exc, OperationFailure) and (
_IS_SYNC or "failpoint" not in exc._message
):
raise
if i < attempts - 1:
print(
f"Retrying after attempt {i+1} of {self.id()} failed with:\n"
f"{traceback.format_exc()}",
file=sys.stderr,
)
await self.asyncSetUp()
continue
raise
return None
else:
await self._run_scenario(spec, uri)
return None
# Handle flaky tests.
flaky_tests = [
("PYTHON-5170", ".*test_discovery_and_monitoring.*"),
("PYTHON-5174", ".*Driver_extends_timeout_while_streaming"),
("PYTHON-5315", ".*TestSrvPolling.test_recover_from_initially_.*"),
("PYTHON-4987", ".*UnknownTransactionCommitResult_labels_to_connection_errors"),
("PYTHON-3689", ".*TestProse.test_load_balancing"),
("PYTHON-3522", ".*csot.*"),
]
for reason, flaky_test in flaky_tests:
if re.match(flaky_test.lower(), self.id().lower()) is not None:
func_name = self.id()
options = dict(reason=reason, reset_func=self.asyncSetUp, func_name=func_name)
if "csot" in func_name.lower():
options["max_runs"] = 3
options["affects_cpython_linux"] = True
decorator = flaky(**options)
await decorator(self._run_scenario)(spec, uri)
return
await self._run_scenario(spec, uri)

async def _run_scenario(self, spec, uri=None):
# maybe skip test manually
Expand Down
Loading
Loading