Skip to content

Flaky test_localcluster_start_exception #6769

Description

@gjoseph92
____________ ERROR at teardown of test_localcluster_start_exception ____________
cls = <class '_pytest.runner.CallInfo'>
func = <function call_runtest_hook.<locals>.<lambda> at 0x7f52ce314550>
when = 'teardown'
reraise = (<class '_pytest.outcomes.Exit'>, <class 'KeyboardInterrupt'>)
@classmethod
deffrom_call(
cls,
        func: "Callable[[], TResult]",
        when: "Literal['collect', 'setup', 'call', 'teardown']",
        reraise: Optional[
            Union[Type[BaseException], Tuple[Type[BaseException], ...]]
        ] = None,
    ) -> "CallInfo[TResult]":
"""Call func, wrapping the result in a CallInfo.
    :param func:
        The function to call. Called without arguments.
    :param when:
        The phase in which the function is called.
    :param reraise:
        Exception or exceptions that shall propagate if raised by the
        function, instead of being wrapped in the CallInfo.
    """
        excinfo = None
        start = timing.time()
        precise_start = timing.perf_counter()
try:
>           result: Optional[TResult] = func()
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/_pytest/runner.py:338: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/_pytest/runner.py:259: in <lambda>
lambda: ihook(item=item, **kwds), when=when, reraise=reraise
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/pluggy/_hooks.py:265: in __call__
returnself._hookexec(self.name, self.get_hookimpls(), kwargs, firstresult)
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/pluggy/_manager.py:80: in _hookexec
returnself._inner_hookexec(hook_name, methods, kwargs, firstresult)
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/_pytest/unraisableexception.py:93: in pytest_runtest_teardown
yield from unraisable_exception_runtest_hook()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
defunraisable_exception_runtest_hook() -> Generator[None, None, None]:
with catch_unraisable_exception() as cm:
yield
if cm.unraisable:
if cm.unraisable.err_msg isnotNone:
                    err_msg = cm.unraisable.err_msg
else:
                    err_msg = "Exception ignored in"
                msg = f"{err_msg}: {cm.unraisable.object!r}\n\n"
                msg += "".join(
                    traceback.format_exception(
                        cm.unraisable.exc_type,
                        cm.unraisable.exc_value,
                        cm.unraisable.exc_traceback,
                    )
                )
>               warnings.warn(pytest.PytestUnraisableExceptionWarning(msg))
E               pytest.PytestUnraisableExceptionWarning: Exception ignored in: <coroutine object InProc.write at 0x7f52cdae1240>
E               
E               Traceback (most recent call last):
E                 File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/warnings.py", line 506, in _warn_unawaited_coroutine
E                   warn(msg, category=RuntimeWarning, stacklevel=2, source=coro)
E               RuntimeWarning: coroutine 'InProc.write' was never awaited
/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/_pytest/unraisableexception.py:78: PytestUnraisableExceptionWarning
----------------------------- Captured stderr call -----------------------------
No module named 'my_nonexistent_library'
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/utils.py", line 778, in wrapper
    return await func(*args, **kwargs)
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1794, in plugin_add
    result = plugin.setup(worker=self)
  File "/home/runner/work/distributed/distributed/distributed/deploy/tests/test_local.py", line 1256, in setup
    import my_nonexistent_library  # noqa
ModuleNotFoundError: No module named 'my_nonexistent_library'
2022-07-08 13:18:29,757 - distributed.nanny - ERROR - Failed to start worker
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 480, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 455, in wait_for
    return await fut
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1408, in start_unsafe
    raise plugins_exceptions[0]
  File "/home/runner/work/distributed/distributed/distributed/utils.py", line 778, in wrapper
    return await func(*args, **kwargs)
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1794, in plugin_add
    result = plugin.setup(worker=self)
  File "/home/runner/work/distributed/distributed/distributed/deploy/tests/test_local.py", line 1256, in setup
    import my_nonexistent_library  # noqa
ModuleNotFoundError: No module named 'my_nonexistent_library'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 886, in run
    await worker
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 488, in start
    raise RuntimeError(f"{type(self).__name__} failed to start.") from exc
RuntimeError: Worker failed to start.
2022-07-08 13:18:29,807 - distributed.nanny - ERROR - Failed while trying to start worker process: Worker failed to start.
2022-07-08 13:18:29,809 - distributed.nanny - ERROR - Failed to connect to process
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 480, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 455, in wait_for
    return await fut
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1408, in start_unsafe
    raise plugins_exceptions[0]
  File "/home/runner/work/distributed/distributed/distributed/utils.py", line 778, in wrapper
    return await func(*args, **kwargs)
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1794, in plugin_add
    result = plugin.setup(worker=self)
  File "/home/runner/work/distributed/distributed/distributed/deploy/tests/test_local.py", line 1256, in setup
    import my_nonexistent_library  # noqa
ModuleNotFoundError: No module named 'my_nonexistent_library'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 694, in start
    msg = await self._wait_until_connected(uid)
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 814, in _wait_until_connected
    raise msg["exception"]
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 886, in run
    await worker
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 488, in start
    raise RuntimeError(f"{type(self).__name__} failed to start.") from exc
RuntimeError: Worker failed to start.
2022-07-08 13:18:29,809 - distributed.nanny - ERROR - Failed to start process
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 480, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 455, in wait_for
    return await fut
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1408, in start_unsafe
    raise plugins_exceptions[0]
  File "/home/runner/work/distributed/distributed/distributed/utils.py", line 778, in wrapper
    return await func(*args, **kwargs)
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1794, in plugin_add
    result = plugin.setup(worker=self)
  File "/home/runner/work/distributed/distributed/distributed/deploy/tests/test_local.py", line 1256, in setup
    import my_nonexistent_library  # noqa
ModuleNotFoundError: No module named 'my_nonexistent_library'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 444, in instantiate
    result = await self.process.start()
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 694, in start
    msg = await self._wait_until_connected(uid)
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 814, in _wait_until_connected
    raise msg["exception"]
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 886, in run
    await worker
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 488, in start
    raise RuntimeError(f"{type(self).__name__} failed to start.") from exc
RuntimeError: Worker failed to start.
------------------------------ Captured log call -------------------------------
ERROR    asyncio:base_events.py:1707 Task exception was never retrieved
future: <Task finished name='Task-42370' coro=<_wrap_awaitable() done, defined at /usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py:688> exception=RuntimeError('Nanny failed to start.')>
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 480, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 455, in wait_for
    return await fut
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1408, in start_unsafe
    raise plugins_exceptions[0]
  File "/home/runner/work/distributed/distributed/distributed/utils.py", line 778, in wrapper
    return await func(*args, **kwargs)
  File "/home/runner/work/distributed/distributed/distributed/worker.py", line 1794, in plugin_add
    result = plugin.setup(worker=self)
  File "/home/runner/work/distributed/distributed/distributed/deploy/tests/test_local.py", line 1256, in setup
    import my_nonexistent_library  # noqa
ModuleNotFoundError: No module named 'my_nonexistent_library'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 480, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 455, in wait_for
    return await fut
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 371, in start_unsafe
    response = await self.instantiate()
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 444, in instantiate
    result = await self.process.start()
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 694, in start
    msg = await self._wait_until_connected(uid)
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 814, in _wait_until_connected
    raise msg["exception"]
  File "/home/runner/work/distributed/distributed/distributed/nanny.py", line 886, in run
    await worker
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 488, in start
    raise RuntimeError(f"{type(self).__name__} failed to start.") from exc
RuntimeError: Worker failed to start.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/usr/share/miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py", line 695, in _wrap_awaitable
    return (yield from awaitable.__await__())
  File "/home/runner/work/distributed/distributed/distributed/core.py", line 488, in start
    raise RuntimeError(f"{type(self).__name__} failed to start.") from exc
RuntimeError: Nanny failed to start.
---------------------------- Captured log teardown -----------------------------
ERROR    asyncio:base_events.py:1707 Task was destroyed but it is pending!
task: <Task pending name='Task-4[1944](https://github.com/dask/distributed/runs/7251966582?check_suite_focus=true#step:11:1945)' coro=<InProc.write() running at /home/runner/work/distributed/distributed/distributed/comm/inproc.py:215> cb=[IOLoop.add_future.<locals>.<lambda>() at /usr/share/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/tornado/ioloop.py:688]>

https://github.com/dask/distributed/runs/7251966582?check_suite_focus=true#step:11:1809

Metadata

Metadata

Assignees

No one assigned

    Labels

    flaky testIntermittent failures on CI.

    Type

    No type

    Fields

    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions