From 2dbc52d5f62f36e9b11204c8cef5cca9db3790b4 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 2 Apr 2024 01:21:11 +0200 Subject: [PATCH] Invalidate cache to check whether remote files exist. --- docs/source/changes.md | 3 +++ src/_pytask/nodes.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/docs/source/changes.md b/docs/source/changes.md index d8e84e3d9..e9c9038f8 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -30,6 +30,9 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and - {pull}`587` improves typing of `capture.py`. - {pull}`588` resets class variables of `ExecutionReport` and `Traceback`. - {pull}`590` fixes an error introduced in {pull}`588`. +- {pull}`591` invalidates the cache of fsspec when checking whether a remote file + exists. Otherwise, a remote file might be reported as missing although it was just + created. See https://github.com/fsspec/s3fs/issues/851 for more info. ## 0.4.6 - 2024-03-13 diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index e7674e0fc..9133eebba 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -14,6 +14,7 @@ from attrs import define from attrs import field +from upath import UPath from upath._stat import UPathStatResult from _pytask._hashlib import hash_value @@ -376,6 +377,12 @@ def _get_state(path: Path) -> str | None: A simple function to handle local and remote files. """ + # Invalidate the cache of the path if it is a UPath because it might have changed in + # a different process with pytask-parallel and the main process does not know about + # it and relies on the cache. + if isinstance(path, UPath): + path.fs.invalidate_cache() + try: stat = path.stat() except FileNotFoundError: