Skip to content

cluster_test.py::test_migration_rebalance_node #4551

Closed
@BagritsevichStepan

Description

@BagritsevichStepan

https://github.com/dragonflydb/dragonfly/actions/runs/13063852208/job/36452588898

...
stderr=b''.join(stderr_seq) if stderr_seq else None)
E           subprocess.TimeoutExpired: Command '['/__w/dragonfly/dragonfly/build/dragonfly', '--proactor_threads=4', '--cluster_mode=yes', '--port=30123', '--admin_port=30124', '--vmodule=outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2,streamer=2', '--dbfilename=', '--noversion_check', '--maxmemory=8G', '--jsonpathv2', '--list_experimental_v2', '--log_dir=/tmp/dragonfly_logs/test_migration_rebalance_node_df_seeder_factory0-df_factory0_', '--serialization_max_chunk_size=300000', '--fiber_safety_margin=4096', '--num_shards=3']' timed out after 120 seconds

/usr/lib/python3.8/subprocess.py:1072: TimeoutExpired

During handling of the above exception, another exception occurred:

self = Factory({'proactor_threads': 4, 'cluster_mode': 'yes'})

    async def stop_all(self):
        """Stop all launched instances."""
        exceptions = []  # To collect exceptions
        for instance in self.instances:
            await instance.close_clients()
            try:
               instance.stop()

dragonfly/instance.py:464: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = :30123, kill = False

    def stop(self, kill=False):
        proc, self.proc = self.proc, None
        if proc is None:
            return
    
        logging.debug(f"Stopping instance on {self._port}")
        try:
            if kill:
                proc.kill()
            else:
                proc.terminate()
                proc.communicate(timeout=120)
                # if the return code is 0 it means normal termination
                # if the return code is negative it means termination by signal
                # if the return code is positive it means abnormal exit
                if proc.returncode != 0:
                    raise Exception(
                        f"Dragonfly did not terminate gracefully, exit code {proc.returncode}, "
                        f"pid: {proc.pid}"
                    )
    
        except subprocess.TimeoutExpired:
            # We need to send SIGUSR1 to DF such that it prints the stacktrace
            proc.send_signal(signal.SIGUSR1)
            # Then we sleep for 5 seconds such that DF has enough time to print the stacktraces
            # We can't really synchronize here because SIGTERM and SIGKILL do not block even if
            # sigaction explicitly blocks other incoming signals until it handles SIGUSR1.
            # Even worse, on SIGTERM and SIGKILL none of the handlers registered via sigaction
            # are guranteed to run
            time.sleep(5)
            logging.debug(f"Unable to kill the process on port {self._port}")
            logging.debug(f"INFO LOGS of DF are:")
            self.print_info_logs_to_debug_log()
            proc.kill()
            proc.communicate()
           raise Exception("Unable to terminate DragonflyDB gracefully, it was killed")
E           Exception: Unable to terminate DragonflyDB gracefully, it was killed

dragonfly/instance.py:258: Exception

The above exception was the direct cause of the following exception:

    def finalizer() -> None:
        """Yield again, to finalize."""
    
        async def async_finalizer() -> None:
            try:
                await gen_obj.__anext__()
            except StopAsyncIteration:
                pass
            else:
                msg = "Async generator fixture didn't stop."
                msg += "Yield only once."
                raise ValueError(msg)
    
       event_loop.run_until_complete(async_finalizer())

/usr/local/lib/python3.8/dist-packages/pytest_asyncio/plugin.py:276: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/lib/python3.8/asyncio/base_events.py:616: in run_until_complete
    return future.result()
/usr/local/lib/python3.8/dist-packages/pytest_asyncio/plugin.py:268: in async_finalizer
    await gen_obj.__anext__()
dragonfly/conftest.py:146: in df_factory
    await factory.stop_all()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Factory({'proactor_threads': 4, 'cluster_mode': 'yes'})

    async def stop_all(self):
        """Stop all launched instances."""
        exceptions = []  # To collect exceptions
        for instance in self.instances:
            await instance.close_clients()
            try:
                instance.stop()
            except Exception as e:
                exceptions.append(e)  # Collect the exception
        if exceptions:
            first_exception = exceptions[0]
           raise Exception(
                f"One or more errors occurred while stopping instances. "
                f"First exception: {first_exception}"
            ) from first_exception
E           Exception: One or more errors occurred while stopping instances. First exception: Unable to terminate DragonflyDB gracefully, it was killed

dragonfly/instance.py:469: Exception
----------------------------- Captured stdout call -----------------------------
.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
cpu time 2.4496088027[954](https://github.com/dragonflydb/dragonfly/actions/runs/13063852208/job/36452588898#step:6:956)1 batches 1717 commands 171700
.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
------------------------------ Captured log call -------------------------------

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions