diff --git a/dandi/conftest.py b/dandi/conftest.py index 924a32d14..347f31f1b 100644 --- a/dandi/conftest.py +++ b/dandi/conftest.py @@ -1,7 +1,13 @@ +from typing import List + +from _pytest.config import Config +from _pytest.config.argparsing import Parser +from _pytest.nodes import Item + from .tests.fixtures import * # noqa: F401, F403 # lgtm [py/polluting-import] -def pytest_addoption(parser): +def pytest_addoption(parser: Parser) -> None: parser.addoption( "--dandi-api", action="store_true", @@ -10,7 +16,7 @@ def pytest_addoption(parser): ) -def pytest_collection_modifyitems(items, config): +def pytest_collection_modifyitems(items: List[Item], config: Config) -> None: # Based on if config.getoption("--dandi-api"): diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index abe0e68a4..a9aa3a27d 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -581,7 +581,10 @@ def parse(cls, url: str, *, map_instance: bool = True) -> ParsedDandiURL: ) ) known_instance = get_instance(settings["map_instance"]) - parsed_url.api_url = known_instance.api + assert known_instance.api is not None + parsed_url.api_url = cast( + AnyHttpUrl, parse_obj_as(AnyHttpUrl, known_instance.api) + ) continue # in this run we ignore and match further elif "instance_name" in groups: try: diff --git a/dandi/dandiset.py b/dandi/dandiset.py index c2254349d..8c9aceaf0 100644 --- a/dandi/dandiset.py +++ b/dandi/dandiset.py @@ -1,5 +1,6 @@ """Classes/utilities for support of a dandiset""" from pathlib import Path +from typing import Optional, Type, TypeVar, Union from dandischema.models import get_schema_version @@ -9,32 +10,34 @@ lgr = get_logger() +D = TypeVar("D", bound="Dandiset") -class Dandiset(object): + +class Dandiset: """A prototype class for all things dandiset""" __slots__ = ["metadata", "path", "path_obj", "_metadata_file_obj"] - def __init__(self, path, allow_empty=False): + def __init__(self, path: Union[str, Path], allow_empty: bool = False) -> None: self.path = str(path) self.path_obj = Path(path) if not allow_empty and not (self.path_obj / dandiset_metadata_file).exists(): raise ValueError(f"No dandiset at {path}") - self.metadata = None + self.metadata: Optional[dict] = None self._metadata_file_obj = self.path_obj / dandiset_metadata_file self._load_metadata() @classmethod - def find(cls, path): + def find(cls: Type[D], path: Union[str, Path, None]) -> Optional[D]: """Find a dandiset possibly pointing to a directory within it""" dandiset_path = find_parent_directory_containing(dandiset_metadata_file, path) # TODO?: identify "class" for the dandiset to use (this one or APIDandiset) - if dandiset_path: + if dandiset_path is not None: return cls(dandiset_path) return None - def _load_metadata(self): + def _load_metadata(self) -> None: if self._metadata_file_obj.exists(): with open(self._metadata_file_obj) as f: # TODO it would cast 000001 if not explicitly string into @@ -44,7 +47,7 @@ def _load_metadata(self): self.metadata = None @classmethod - def get_dandiset_record(cls, meta): + def get_dandiset_record(cls, meta: dict) -> str: dandiset_identifier = cls._get_identifier(meta) if not dandiset_identifier: lgr.warning("No identifier for a dandiset was provided in %s", str(meta)) @@ -61,7 +64,7 @@ def get_dandiset_record(cls, meta): yaml_rec = yaml_dump(meta) return header + yaml_rec - def update_metadata(self, meta): + def update_metadata(self, meta: dict) -> None: """Update existing metadata record in dandiset.yaml""" if not meta: lgr.debug("No updates to metadata, returning") @@ -84,7 +87,7 @@ def update_metadata(self, meta): self._load_metadata() @classmethod - def _get_identifier(cls, metadata): + def _get_identifier(cls, metadata: dict) -> Optional[str]: """Given a metadata record, determine identifier""" # ATM since we have dichotomy in dandiset metadata schema from drafts # and from published versions, we will just test both locations @@ -108,14 +111,19 @@ def _get_identifier(cls, metadata): f"with 'propertyID: DANDI': {id_}" ) id_ = str(id_.get("value", "")) - elif id_.startswith("DANDI:"): - # result of https://github.com/dandi/dandi-cli/pull/348 which - id_ = id_[len("DANDI:") :] + elif id_ is not None: + assert isinstance(id_, str) + if id_.startswith("DANDI:"): + # result of https://github.com/dandi/dandi-cli/pull/348 which + id_ = id_[len("DANDI:") :] + assert id_ is None or isinstance(id_, str) return id_ @property - def identifier(self): + def identifier(self) -> str: + if self.metadata is None: + raise ValueError("No metadata record found in Dandiset") id_ = self._get_identifier(self.metadata) if not id_: raise ValueError( @@ -127,7 +135,12 @@ def identifier(self): class APIDandiset(Dandiset): """A dandiset to replace "classical" Dandiset whenever we migrate to new API based server""" - def __init__(self, path, allow_empty=False, schema_version=None): + def __init__( + self, + path: Union[str, Path], + allow_empty: bool = False, + schema_version: Optional[str] = None, + ) -> None: if schema_version is not None: current_version = get_schema_version() if schema_version != current_version: diff --git a/dandi/delete.py b/dandi/delete.py index 8b4acf50b..3f40dcd49 100644 --- a/dandi/delete.py +++ b/dandi/delete.py @@ -129,6 +129,7 @@ def register_url(self, url: str) -> None: def register_local_path_equivalent(self, instance_name: str, filepath: str) -> None: instance = get_instance(instance_name) api_url = instance.api + assert api_url is not None dandiset_id, asset_path = find_local_asset(filepath) if not self.set_dandiset(api_url, dandiset_id): return diff --git a/dandi/download.py b/dandi/download.py index 33ab159d9..0c5753d23 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -15,21 +15,43 @@ import sys from threading import Lock import time -from typing import Any, Callable, Dict, Iterator, Optional, Tuple +from types import TracebackType +from typing import ( + IO, + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Type, + Union, +) from dandischema.models import DigestType +from fasteners import InterProcessLock import humanize from interleave import FINISH_CURRENT, interleave import requests from . import get_logger from .consts import RETRY_STATUSES, dandiset_metadata_file -from .dandiapi import AssetType, RemoteZarrAsset -from .dandiarchive import DandisetURL, MultiAssetURL, SingleAssetURL, parse_dandi_url +from .dandiapi import AssetType, RemoteAsset, RemoteDandiset, RemoteZarrAsset +from .dandiarchive import ( + DandisetURL, + MultiAssetURL, + ParsedDandiURL, + SingleAssetURL, + parse_dandi_url, +) from .dandiset import Dandiset from .exceptions import NotFoundError -from .files import DandisetMetadataFile, find_dandi_files +from .files import LocalAsset, find_dandi_files from .support.digests import get_digest, get_zarr_checksum +from .support.iterators import IteratorWithAggregation from .support.pyout import naturalsize from .utils import ( abbrev_prompt, @@ -46,17 +68,17 @@ def download( - urls, - output_dir, + urls: Union[str, Sequence[str]], + output_dir: Union[str, Path], *, - format="pyout", - existing="error", - jobs=1, - jobs_per_zarr=None, - get_metadata=True, - get_assets=True, - sync=False, -): + format: str = "pyout", + existing: str = "error", + jobs: int = 1, + jobs_per_zarr: Optional[int] = None, + get_metadata: bool = True, + get_assets: bool = True, + sync: bool = False, +) -> None: # TODO: unduplicate with upload. For now stole from that one # We will again use pyout to provide a neat table summarizing our progress # with upload etc @@ -75,7 +97,9 @@ def download( # TODO: if we are ALREADY in a dandiset - we can validate that it is the # same dandiset and use that dandiset path as the one to download under + output_path: Union[str, Path] if isinstance(parsed_url, DandisetURL): + assert parsed_url.dandiset_id is not None output_path = op.join(output_dir, parsed_url.dandiset_id) else: output_path = output_dir @@ -144,7 +168,7 @@ def download( for df in find_dandi_files( download_dir, dandiset_path=download_dir, allow_all=True ): - if isinstance(df, DandisetMetadataFile): + if not isinstance(df, LocalAsset): continue a_path = op.normpath(op.join(prefix, df.path)) if on_windows: @@ -174,16 +198,16 @@ def download( def download_generator( - parsed_url, - output_path, + parsed_url: ParsedDandiURL, + output_path: Union[str, Path], *, - assets_it=None, - yield_generator_for_fields=None, - existing="error", - get_metadata=True, - get_assets=True, - jobs_per_zarr=None, -): + assets_it: Optional[IteratorWithAggregation] = None, + yield_generator_for_fields: Optional[Tuple[str, ...]] = None, + existing: str = "error", + get_metadata: bool = True, + get_assets: bool = True, + jobs_per_zarr: Optional[int] = None, +) -> Iterator[dict]: """A generator for downloads of files, folders, or entire dandiset from DANDI (as identified by URL) @@ -206,6 +230,7 @@ def download_generator( assets = assets_it if isinstance(parsed_url, DandisetURL) and get_metadata: + assert dandiset is not None for resp in _populate_dandiset_yaml(output_path, dandiset, existing): yield dict(path=dandiset_metadata_file, **resp) @@ -261,7 +286,10 @@ def download_generator( "Asset %s is missing blobDateModified metadata field", asset.path, ) - mtime = asset.modified + if isinstance(asset, RemoteAsset): + # TODO: Remove ^^ guard ^^ once dandi-archive#681 is + # supported + mtime = asset.modified _download_generator = _download_file( asset.get_download_file_iter(), download_path, @@ -306,17 +334,22 @@ class ItemsSummary: To be used as a callback to IteratorWithAggregation """ - def __init__(self): + def __init__(self) -> None: self.files = 0 # TODO: get rid of needing it - self.t0 = None # when first record is seen + self.t0: Optional[float] = None # when first record is seen self.size = 0 self.has_unknown_sizes = False - def as_dict(self): - return {a: getattr(self, a) for a in ("files", "size", "has_unknown_sizes")} + def as_dict(self) -> dict: + return { + "files": self.files, + "size": self.size, + "has_unknown_sizes": self.has_unknown_sizes, + } - def __call__(self, rec, prior=None): + # TODO: Determine the proper annotation for `rec` + def __call__(self, rec: Any, prior: Optional[ItemsSummary] = None) -> ItemsSummary: assert prior in (None, self) if not self.files: self.t0 = time.time() @@ -335,8 +368,6 @@ class PYOUTHelper: def __init__(self): # Establish "fancy" download while still possibly traversing the dandiset # functionality. - from .support.iterators import IteratorWithAggregation - self.items_summary = ItemsSummary() self.it = IteratorWithAggregation( # unfortunately Yarik missed the point that we need to wrap @@ -347,13 +378,13 @@ def __init__(self): self.items_summary, ) - def agg_files(self, *ignored): + def agg_files(self, *ignored: Any) -> str: ret = str(self.items_summary.files) if not self.it.finished: ret += "+" return ret - def agg_size(self, sizes): + def agg_size(self, sizes: Iterable[int]) -> Union[str, List[str]]: """Formatter for "size" column where it would show how much is "active" (or done) @@ -378,7 +409,7 @@ def agg_size(self, sizes): v.append(extra_str) return v - def agg_done(self, done_sizes): + def agg_done(self, done_sizes: Iterator[int]) -> List[str]: """Formatter for "DONE" column""" done = sum(done_sizes) if self.it.finished and done == 0 and self.items_summary.size == 0: @@ -412,11 +443,13 @@ def agg_done(self, done_sizes): return v -def _skip_file(msg): +def _skip_file(msg: Any) -> dict: return {"status": "skipped", "message": str(msg)} -def _populate_dandiset_yaml(dandiset_path, dandiset, existing): +def _populate_dandiset_yaml( + dandiset_path: Union[str, Path], dandiset: RemoteDandiset, existing: str +) -> Iterator[dict]: metadata = dandiset.get_raw_metadata() if not metadata: lgr.warning( @@ -459,7 +492,7 @@ def _populate_dandiset_yaml(dandiset_path, dandiset, existing): def _download_file( downloader: Callable[[int], Iterator[bytes]], path: str, - toplevel_path: str, + toplevel_path: Union[str, Path], lock: Lock, size: Optional[int] = None, mtime: Optional[datetime] = None, @@ -607,7 +640,8 @@ def _download_file( downloaded_digest = digester() # start empty warned = False # I wonder if we could make writing async with downloader - with DownloadDirectory(path, digests) as dldir: + with DownloadDirectory(path, digests or {}) as dldir: + assert dldir.offset is not None downloaded = dldir.offset resuming = downloaded > 0 if size is not None and downloaded == size: @@ -622,7 +656,7 @@ def _download_file( downloaded_digest.update(block) downloaded += len(block) # TODO: yield progress etc - out = {"done": downloaded} + out: Dict[str, Any] = {"done": downloaded} if size: if downloaded > size and not warned: warned = True @@ -632,7 +666,7 @@ def _download_file( downloaded, size, ) - out["done%"] = 100 * downloaded / size if size else "100" + out["done%"] = 100 * downloaded / size # TODO: ETA etc yield out dldir.append(block) @@ -688,7 +722,7 @@ def _download_file( class DownloadDirectory: - def __init__(self, filepath, digests): + def __init__(self, filepath: Union[str, Path], digests: Dict[str, str]) -> None: #: The path to which to save the file after downloading self.filepath = Path(filepath) #: Expected hashes of the downloaded data, as a mapping from algorithm @@ -701,15 +735,13 @@ def __init__(self, filepath, digests): #: received self.writefile = self.dirpath / "file" #: A `fasteners.InterProcessLock` on `dirpath` - self.lock = None + self.lock: Optional[InterProcessLock] = None #: An open filehandle to `writefile` - self.fp = None + self.fp: Optional[IO[bytes]] = None #: How much of the data has been downloaded so far - self.offset = None - - def __enter__(self): - from fasteners import InterProcessLock + self.offset: Optional[int] = None + def __enter__(self) -> DownloadDirectory: self.dirpath.mkdir(parents=True, exist_ok=True) self.lock = InterProcessLock(str(self.dirpath / "lock")) if not self.lock.acquire(blocking=False): @@ -747,7 +779,13 @@ def __enter__(self): self.offset = self.fp.tell() return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + assert self.fp is not None self.fp.close() try: if exc_type is None: @@ -757,22 +795,24 @@ def __exit__(self, exc_type, exc_value, traceback): rmtree(self.filepath) self.writefile.replace(self.filepath) finally: + assert self.lock is not None self.lock.release() if exc_type is None: rmtree(self.dirpath, ignore_errors=True) self.lock = None self.fp = None self.offset = None - return False - def append(self, blob): + def append(self, blob: bytes) -> None: + if self.fp is None: + raise ValueError("DownloadDirectory.append() called outside of context manager") self.fp.write(blob) def _download_zarr( asset: RemoteZarrAsset, download_path: str, - toplevel_path: str, + toplevel_path: Union[str, Path], existing: str, lock: Lock, jobs: Optional[int] = None, diff --git a/dandi/exceptions.py b/dandi/exceptions.py index c23c5c94b..15a246734 100644 --- a/dandi/exceptions.py +++ b/dandi/exceptions.py @@ -1,3 +1,8 @@ +from typing import List + +from semantic_version import Version + + class OrganizeImpossibleError(ValueError): """Exception to be raised if given current list of files it is impossible @@ -34,12 +39,14 @@ class LockingError(RuntimeError): class CliVersionError(RuntimeError): """Base class for `CliVersionTooOldError` and `BadCliVersionError`""" - def __init__(self, our_version, minversion, bad_versions): + def __init__( + self, our_version: Version, minversion: Version, bad_versions: List[Version] + ) -> None: self.our_version = our_version self.minversion = minversion self.bad_versions = bad_versions - def server_requirements(self): + def server_requirements(self) -> str: s = f"Server requires at least version {self.minversion}" if self.bad_versions: s += f" (but not {', '.join(map(str, self.bad_versions))})" @@ -47,7 +54,7 @@ def server_requirements(self): class CliVersionTooOldError(CliVersionError): - def __str__(self): + def __str__(self) -> str: return ( f"Client version {self.our_version} is too old! " + self.server_requirements() @@ -55,7 +62,7 @@ def __str__(self): class BadCliVersionError(CliVersionError): - def __str__(self): + def __str__(self) -> str: return ( f"Client version {self.our_version} is rejected by server! " + self.server_requirements() diff --git a/dandi/keyring.py b/dandi/keyring.py index bae5db4ef..e488e3996 100644 --- a/dandi/keyring.py +++ b/dandi/keyring.py @@ -1,8 +1,9 @@ import os.path as op from pathlib import Path +from typing import Optional, Tuple import click -from keyring.backend import get_all_keyring +from keyring.backend import KeyringBackend, get_all_keyring from keyring.core import get_keyring, load_config, load_env from keyring.errors import KeyringError from keyring.util.platform_ import config_root @@ -13,7 +14,9 @@ lgr = get_logger() -def keyring_lookup(service_name, username): +def keyring_lookup( + service_name: str, username: str +) -> Tuple[KeyringBackend, Optional[str]]: """ Determine a keyring backend to use for storing & retrieving credentials as follows: @@ -94,5 +97,5 @@ def keyring_lookup(service_name, username): return (kb, password) -def keyringrc_file(): +def keyringrc_file() -> Path: return Path(config_root(), "keyringrc.cfg") diff --git a/dandi/metadata.py b/dandi/metadata.py index 8d4e5a1d3..4a86a2f82 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -1,10 +1,26 @@ -from datetime import datetime +from __future__ import annotations + +from datetime import datetime, timedelta from functools import lru_cache import os import os.path as op from pathlib import Path import re -from typing import Dict, List, Optional, Tuple, Union +import sys +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, + cast, +) from uuid import uuid4 from xml.dom.minidom import parseString @@ -29,7 +45,7 @@ @metadata_cache.memoize_path -def get_metadata(path): +def get_metadata(path: Union[str, Path]) -> Optional[dict]: """Get selected metadata from a .nwb file or a dandiset directory If a directory given and it is not a Dandiset, None is returned @@ -50,7 +66,7 @@ def get_metadata(path): if op.isdir(path): try: dandiset = Dandiset(path) - return dandiset.metadata + return cast(dict, dandiset.metadata) except ValueError as exc: lgr.debug("Failed to get metadata for %s: %s", path, exc) return None @@ -103,7 +119,7 @@ def get_metadata(path): return meta -def _parse_iso8601(age): +def _parse_iso8601(age: str) -> List[str]: """checking if age is proper iso8601, additional formatting""" # allowing for comma instead of ., e.g. P1,5D age = age.replace(",", ".") @@ -118,7 +134,7 @@ def _parse_iso8601(age): raise ValueError(f"ISO 8601 expected, but {age!r} was received") -def _parse_age_re(age, unit, tp="date"): +def _parse_age_re(age: str, unit: str, tp: str = "date") -> Tuple[str, Optional[str]]: """finding parts that have in various forms""" if unit == "Y": @@ -135,6 +151,8 @@ def _parse_age_re(age, unit, tp="date"): pat_un = "(min|m(inute)?)" elif unit == "S": pat_un = "(sec|s(econd)?)" + else: + raise AssertionError(f"Unexpected unit: {unit!r}") m = re.match(rf"(\d+\.?\d*)\s*({pat_un}s?)", age, flags=re.I) swap_flag = False @@ -157,7 +175,7 @@ def _parse_age_re(age, unit, tp="date"): return (age[: m.start()] + age[m.end() :]).strip(), f"{qty}{unit}" -def _parse_hours_format(age): +def _parse_hours_format(age: str) -> Tuple[str, List[str]]: """parsing format 0:30:10""" m = re.match(r"\s*(\d\d?):(\d\d):(\d\d)", age) if m: @@ -167,7 +185,7 @@ def _parse_hours_format(age): return age, [] -def _check_decimal_parts(age_parts): +def _check_decimal_parts(age_parts: List[str]) -> bool: """checking if decimal parts are only in the lowest order component""" # if the last part is the T component I have to separate the parts if "T" in age_parts[-1]: @@ -176,12 +194,14 @@ def _check_decimal_parts(age_parts): age_parts[-1], flags=re.I, ) + if m is None: + raise ValueError(f"Failed to parse the trailing part of age {age_parts[-1]!r}") age_parts = age_parts[:-1] + [m[i] for i in range(1, 3) if m[i]] decim_part = ["." in el for el in age_parts] return not (any(decim_part) and any(decim_part[:-1])) -def parse_age(age): +def parse_age(age: Optional[str]) -> Tuple[str, str]: """ Parsing age field and converting into an ISO 8601 duration @@ -191,7 +211,7 @@ def parse_age(age): Returns ------- - str + Tuple[str, str] """ if not age: @@ -200,8 +220,7 @@ def parse_age(age): age_orig = age if age.lower().startswith("gestation"): - m = re.match("^gest[a-z]*", age, flags=re.I) - age = age[: m.start()] + age[m.end() :] + age = re.sub("^gest[a-z]*", "", age, flags=re.I) ref = "Gestational" else: ref = "Birth" @@ -218,7 +237,7 @@ def parse_age(age): if not age: raise ValueError("Age doesn't have any information") - date_f = [] + date_f: List[str] = [] for unit in ["Y", "M", "W", "D"]: if not age: break @@ -229,7 +248,7 @@ def parse_age(age): date_f = ["P", part_f] if ref == "Birth": - time_f = [] + time_f: List[str] = [] for un in ["H", "M", "S"]: if not age: break @@ -260,7 +279,7 @@ def parse_age(age): return "".join(age_f), ref -def extract_age(metadata): +def extract_age(metadata: dict) -> Optional[models.PropertyValue]: try: dob = ensure_datetime(metadata["date_of_birth"]) start = ensure_datetime(metadata["session_start_time"]) @@ -268,7 +287,7 @@ def extract_age(metadata): if metadata.get("age") is not None: duration, ref = parse_age(metadata["age"]) else: - return ... + return None else: duration, ref = timedelta2duration(start - dob), "Birth" return models.PropertyValue( @@ -280,7 +299,7 @@ def extract_age(metadata): ) -def timedelta2duration(delta): +def timedelta2duration(delta: timedelta) -> str: """ Convert a datetime.timedelta to ISO 8601 duration format @@ -296,7 +315,7 @@ def timedelta2duration(delta): if delta.days: s += f"{delta.days}D" if delta.seconds or delta.microseconds: - sec = delta.seconds + sec: Union[int, float] = delta.seconds if delta.microseconds: # Don't add when microseconds is 0, so that sec will be an int then sec += delta.microseconds / 1e6 @@ -306,7 +325,7 @@ def timedelta2duration(delta): return s -def extract_sex(metadata): +def extract_sex(metadata: dict) -> Optional[models.SexType]: value = metadata.get("sex", None) if value is not None and value != "": value = value.lower() @@ -329,7 +348,7 @@ def extract_sex(metadata): raise ValueError(f"Cannot interpret sex field: {value}") return models.SexType(identifier=value_id, name=value) else: - return ... + return None species_map = [ @@ -422,7 +441,7 @@ def parse_purlobourl( return values -def extract_species(metadata): +def extract_species(metadata: dict) -> Optional[models.SpeciesType]: value_orig = metadata.get("species", None) value_id = None if value_orig is not None and value_orig != "": @@ -467,35 +486,40 @@ def extract_species(metadata): ) return models.SpeciesType(identifier=value_id, name=value) else: - return ... + return None -def extract_assay_type(metadata): +def extract_assay_type(metadata: dict) -> Optional[List[models.AssayType]]: if "assayType" in metadata: return [models.AssayType(identifier="assayType", name=metadata["assayType"])] else: - return ... + return None -def extract_anatomy(metadata): +def extract_anatomy(metadata: dict) -> Optional[List[models.Anatomy]]: if "anatomy" in metadata: return [models.Anatomy(identifier="anatomy", name=metadata["anatomy"])] else: - return ... + return None + + +M = TypeVar("M", bound=models.DandiBaseModel) -def extract_model(modelcls, metadata, **kwargs): - m = modelcls.unvalidated() +def extract_model(modelcls: Type[M], metadata: dict, **kwargs: Any) -> M: + m = cast(M, modelcls.unvalidated()) for field in m.__fields__.keys(): value = kwargs.get(field, extract_field(field, metadata)) - if value is not Ellipsis: + if value is not None: setattr(m, field, value) # return modelcls(**m.dict()) return m -def extract_model_list(modelcls, id_field, id_source, **kwargs): - def func(metadata): +def extract_model_list( + modelcls: Type[M], id_field: str, id_source: str, **kwargs: Any +) -> Callable[[dict], List[M]]: + def func(metadata: dict) -> List[M]: m = extract_model( modelcls, metadata, **{id_field: metadata.get(id_source)}, **kwargs ) @@ -507,8 +531,8 @@ def func(metadata): return func -def extract_wasDerivedFrom(metadata): - derived_from = None +def extract_wasDerivedFrom(metadata: dict) -> Optional[List[models.BioSample]]: + derived_from: Optional[List[models.BioSample]] = None for field, sample_name in [ ("tissue_sample_id", "tissuesample"), ("slice_id", "slice"), @@ -526,7 +550,7 @@ def extract_wasDerivedFrom(metadata): extract_wasAttributedTo = extract_model_list( - models.Participant, "identifier", "subject_id", id=... + models.Participant, "identifier", "subject_id", id=None ) @@ -554,14 +578,16 @@ def extract_session(metadata: dict) -> Optional[List[models.Session]]: ] -def extract_digest(metadata): +def extract_digest( + metadata: dict, +) -> Optional[Dict[models.DigestType, str]]: if "digest" in metadata: return {models.DigestType[metadata["digest_type"]]: metadata["digest"]} else: - return ... + return None -FIELD_EXTRACTORS = { +FIELD_EXTRACTORS: Dict[str, Callable[[dict], Any]] = { "wasDerivedFrom": extract_wasDerivedFrom, "wasAttributedTo": extract_wasAttributedTo, "wasGeneratedBy": extract_session, @@ -574,14 +600,27 @@ def extract_digest(metadata): } -def extract_field(field, metadata): +def extract_field(field: str, metadata: dict) -> Any: if field in FIELD_EXTRACTORS: return FIELD_EXTRACTORS[field](metadata) else: - return metadata.get(field, ...) + return metadata.get(field) -neurodata_typemap = { +if TYPE_CHECKING: + if sys.version_info >= (3, 8): + from typing import TypedDict + else: + from typing_extensions import TypedDict + + class Neurodatum(TypedDict): + module: str + neurodata_type: str + technique: Optional[str] + approach: Optional[str] + + +neurodata_typemap: Dict[str, Neurodatum] = { "ElectricalSeries": { "module": "ecephys", "neurodata_type": "ElectricalSeries", @@ -771,7 +810,9 @@ def extract_field(field, metadata): } -def process_ndtypes(asset, nd_types): +def process_ndtypes( + asset: models.BareAsset, nd_types: Iterable[str] +) -> models.BareAsset: approach = set() technique = set() variables = set() @@ -867,6 +908,6 @@ def get_generator(start_time: datetime, end_time: datetime) -> models.Activity: ) -def metadata2asset(metadata): +def metadata2asset(metadata: dict) -> models.BareAsset: bare_dict = extract_model(models.BareAsset, metadata).json_dict() return models.BareAsset(**bare_dict) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index d92af47f6..a9326a744 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -1,7 +1,9 @@ from collections import Counter import os import os.path as op +from pathlib import Path import re +from typing import Any, Dict, List, Tuple, TypeVar, Union import warnings import dandischema @@ -52,7 +54,7 @@ def _sanitize_nwb_version(v, filename=None, log=None): log = lgr.warning elif not log: - def log(v): # does nothing + def log(v: str) -> str: # does nothing return v if isinstance(v, str): @@ -104,10 +106,11 @@ def get_nwb_version(filepath, sanitize=False): try: return _sanitize(h5file["nwb_version"][...].tostring().decode()) except Exception: - lgr.debug("%s has no nwb_version" % filepath) + lgr.debug("%s has no nwb_version", filepath) + return None -def get_neurodata_types_to_modalities_map(): +def get_neurodata_types_to_modalities_map() -> Dict[str, str]: """Return a dict to map neurodata types known to pynwb to "modalities" It is an ugly hack, largely to check feasibility. @@ -116,7 +119,7 @@ def get_neurodata_types_to_modalities_map(): """ import inspect - ndtypes = {} + ndtypes: Dict[str, str] = {} # TODO: if there are extensions, they might have types subclassed from the base # types. There might be a map within pynwb (pynwb.get_type_map?) to return @@ -151,7 +154,7 @@ def get_neurodata_types_to_modalities_map(): @metadata_cache.memoize_path -def get_neurodata_types(filepath): +def get_neurodata_types(filepath: Union[str, Path]) -> List[str]: with h5py.File(filepath, "r") as h5file: all_pairs = _scan_neurodata_types(h5file) @@ -169,7 +172,7 @@ def get_neurodata_types(filepath): return out -def _scan_neurodata_types(grp): +def _scan_neurodata_types(grp: h5py.File) -> List[Tuple[Any, Any]]: out = [] if "neurodata_type" in grp.attrs: out.append((grp.attrs["neurodata_type"], grp.attrs.get("description", None))) @@ -179,7 +182,7 @@ def _scan_neurodata_types(grp): return out -def _get_pynwb_metadata(path): +def _get_pynwb_metadata(path: Union[str, Path]) -> Dict[str, Any]: out = {} with NWBHDF5IO(path, "r", load_namespaces=True) as io: nwb = io.read() @@ -231,7 +234,7 @@ def _get_pynwb_metadata(path): @validate_cache.memoize_path -def validate(path, devel_debug=False): +def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]: """Run validation on a file and return errors In case of an exception being thrown, an error message added to the @@ -242,6 +245,7 @@ def validate(path, devel_debug=False): path: str or Path """ path = str(path) # Might come in as pathlib's PATH + errors: List[str] try: with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader: errors = pynwb.validate(reader) @@ -299,7 +303,7 @@ def validate(path, devel_debug=False): _ignored_benign_pynwb_warnings = False -def ignore_benign_pynwb_warnings(): +def ignore_benign_pynwb_warnings() -> None: global _ignored_benign_pynwb_warnings if _ignored_benign_pynwb_warnings: return @@ -312,7 +316,7 @@ def ignore_benign_pynwb_warnings(): _ignored_benign_pynwb_warnings = True -def get_object_id(path): +def get_object_id(path: Union[str, Path]) -> Any: """Read, if present an object_id if not available -- would simply raise a corresponding exception @@ -321,7 +325,12 @@ def get_object_id(path): return f.attrs["object_id"] -def make_nwb_file(filename, *args, cache_spec=False, **kwargs): +StrPath = TypeVar("StrPath", str, Path) + + +def make_nwb_file( + filename: StrPath, *args: Any, cache_spec: bool = False, **kwargs: Any +) -> StrPath: """A little helper to produce an .nwb file in the path using NWBFile Note: it doesn't cache_spec by default @@ -332,7 +341,7 @@ def make_nwb_file(filename, *args, cache_spec=False, **kwargs): return filename -def copy_nwb_file(src, dest): +def copy_nwb_file(src: Union[str, Path], dest: Union[str, Path]) -> str: """ "Copy" .nwb file by opening and saving into a new path. New file (`dest`) then should have new `object_id` attribute, and thus be @@ -360,17 +369,17 @@ def copy_nwb_file(src, dest): data = ior.read() data.generate_new_id() iow.export(ior, nwbfile=data) - return dest + return str(dest) @metadata_cache.memoize_path -def nwb_has_external_links(filepath): +def nwb_has_external_links(filepath: Union[str, Path]) -> bool: with h5py.File(filepath, "r") as fp: visited = set() # cannot use `file.visititems` because it skips external links # (https://github.com/h5py/h5py/issues/671) - def visit(path="/"): + def visit(path: str = "/") -> bool: if isinstance(fp[path], h5py.Group): for key in fp[path].keys(): key_path = path + "/" + key diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 1dd85bbb9..af07fad86 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass, field from datetime import datetime import logging @@ -6,11 +8,13 @@ import re import shutil from subprocess import DEVNULL, check_output, run +import sys import tempfile from time import sleep -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional, Union from uuid import uuid4 +from _pytest.fixtures import FixtureRequest from click.testing import CliRunner from dandischema.consts import DANDI_SCHEMA_VERSION from dateutil.tz import tzutc @@ -32,16 +36,16 @@ @pytest.fixture(autouse=True) -def capture_all_logs(caplog): +def capture_all_logs(caplog: pytest.LogCaptureFixture) -> None: caplog.set_level(logging.DEBUG, logger="dandi") # TODO: move into some common fixtures. We might produce a number of files # and also carry some small ones directly in git for regression testing @pytest.fixture(scope="session") -def simple1_nwb_metadata(tmpdir_factory): +def simple1_nwb_metadata() -> Dict[str, Any]: # very simple assignment with the same values as the key with 1 as suffix - metadata = {f: "{}1".format(f) for f in metadata_nwb_file_fields} + metadata: Dict[str, Any] = {f: f"{f}1" for f in metadata_nwb_file_fields} metadata["identifier"] = uuid4().hex # subject_fields @@ -60,7 +64,9 @@ def simple1_nwb_metadata(tmpdir_factory): @pytest.fixture(scope="session") -def simple1_nwb(simple1_nwb_metadata, tmpdir_factory): +def simple1_nwb( + simple1_nwb_metadata: Dict[str, Any], tmpdir_factory: pytest.TempdirFactory +) -> str: return make_nwb_file( str(tmpdir_factory.mktemp("simple1").join("simple1.nwb")), **simple1_nwb_metadata, @@ -68,7 +74,9 @@ def simple1_nwb(simple1_nwb_metadata, tmpdir_factory): @pytest.fixture(scope="session") -def simple2_nwb(simple1_nwb_metadata, tmpdir_factory): +def simple2_nwb( + simple1_nwb_metadata: Dict[str, Any], tmpdir_factory: pytest.TempdirFactory +) -> str: """With a subject""" return make_nwb_file( str(tmpdir_factory.mktemp("simple2").join("simple2.nwb")), @@ -83,7 +91,9 @@ def simple2_nwb(simple1_nwb_metadata, tmpdir_factory): @pytest.fixture(scope="session") -def organized_nwb_dir(simple2_nwb, tmp_path_factory): +def organized_nwb_dir( + simple2_nwb: str, tmp_path_factory: pytest.TempPathFactory +) -> Path: tmp_path = tmp_path_factory.mktemp("organized_nwb_dir") (tmp_path / dandiset_metadata_file).write_text("{}\n") r = CliRunner().invoke( @@ -94,12 +104,16 @@ def organized_nwb_dir(simple2_nwb, tmp_path_factory): @pytest.fixture(scope="session") -def organized_nwb_dir2(simple1_nwb_metadata, simple2_nwb, tmp_path_factory): +def organized_nwb_dir2( + simple1_nwb_metadata: Dict[str, Any], + simple2_nwb: str, + tmp_path_factory: pytest.TempPathFactory, +) -> Path: tmp_path = tmp_path_factory.mktemp("organized_nwb_dir2") # need to copy first and then use -f move since we will create one more # file to be "organized" - shutil.copy(str(simple2_nwb), str(tmp_path)) + shutil.copy(simple2_nwb, tmp_path) make_nwb_file( str(tmp_path / "simple3.nwb"), subject=pynwb.file.Subject( @@ -117,13 +131,32 @@ def organized_nwb_dir2(simple1_nwb_metadata, simple2_nwb, tmp_path_factory): return tmp_path -def get_gitrepo_fixture(url, committish=None, scope="session"): +if TYPE_CHECKING: + if sys.version_info >= (3, 8): + from typing import Literal + else: + from typing_extensions import Literal + + Scope = Union[ + Literal["session"], + Literal["package"], + Literal["module"], + Literal["class"], + Literal["function"], + ] + + +def get_gitrepo_fixture( + url: str, + committish: Optional[str] = None, + scope: Scope = "session", +) -> Callable[[], Iterator[str]]: if committish: raise NotImplementedError() @pytest.fixture(scope=scope) - def fixture(): + def fixture() -> Iterator[str]: skipif.no_network() skipif.no_git() @@ -153,7 +186,7 @@ def fixture(): @pytest.fixture(scope="session") -def docker_compose_setup(): +def docker_compose_setup() -> Iterator[Dict[str, str]]: skipif.no_network() skipif.no_docker_engine() @@ -261,7 +294,7 @@ def api_url(self) -> str: @pytest.fixture(scope="session") -def local_dandi_api(docker_compose_setup): +def local_dandi_api(docker_compose_setup: Dict[str, str]) -> Iterator[DandiAPI]: instance_id = "dandi-api-local-docker-tests" instance = known_instances[instance_id] api_key = docker_compose_setup["django_api_key"] @@ -286,7 +319,9 @@ class SampleDandiset: def client(self) -> DandiAPIClient: return self.api.client - def upload(self, paths: Optional[List[str]] = None, **kwargs: Any) -> None: + def upload( + self, paths: Optional[List[Union[str, Path]]] = None, **kwargs: Any + ) -> None: with pytest.MonkeyPatch().context() as m: m.setenv("DANDI_API_KEY", self.api.api_key) upload( @@ -299,7 +334,11 @@ def upload(self, paths: Optional[List[str]] = None, **kwargs: Any) -> None: @pytest.fixture() -def new_dandiset(local_dandi_api, request, tmp_path_factory): +def new_dandiset( + local_dandi_api: DandiAPI, + request: FixtureRequest, + tmp_path_factory: pytest.TempPathFactory, +) -> SampleDandiset: d = local_dandi_api.client.create_dandiset( f"Sample Dandiset for {request.node.name}", # Minimal metadata needed to create a publishable Dandiset: @@ -330,7 +369,7 @@ def new_dandiset(local_dandi_api, request, tmp_path_factory): @pytest.fixture() -def text_dandiset(new_dandiset): +def text_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset: (new_dandiset.dspath / "file.txt").write_text("This is test text.\n") (new_dandiset.dspath / "subdir1").mkdir() (new_dandiset.dspath / "subdir1" / "apple.txt").write_text("Apple\n") @@ -343,7 +382,7 @@ def text_dandiset(new_dandiset): @pytest.fixture() -def zarr_dandiset(new_dandiset): +def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset: zarr.save( new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1) ) diff --git a/dandi/tests/test_dandiapi.py b/dandi/tests/test_dandiapi.py index 82564c59a..69afec028 100644 --- a/dandi/tests/test_dandiapi.py +++ b/dandi/tests/test_dandiapi.py @@ -2,16 +2,20 @@ from datetime import datetime, timezone import logging import os.path +from pathlib import Path import random import re from shutil import rmtree +from typing import Union import anys import click from dandischema.models import UUID_PATTERN, DigestType, get_schema_version import pytest +from pytest_mock import MockerFixture import responses +from .fixtures import DandiAPI, SampleDandiset from .skip import mark from .. import dandiapi from ..consts import ( @@ -26,7 +30,7 @@ from ..utils import list_paths -def test_upload(new_dandiset, simple1_nwb, tmp_path): +def test_upload(new_dandiset: SampleDandiset, simple1_nwb: str, tmp_path: Path) -> None: d = new_dandiset.dandiset assert d.version_id == DRAFT d.upload_raw_asset(simple1_nwb, {"path": "testing/simple1.nwb"}) @@ -38,7 +42,7 @@ def test_upload(new_dandiset, simple1_nwb, tmp_path): assert paths[0].stat().st_size == os.path.getsize(simple1_nwb) -def test_publish_and_manipulate(new_dandiset, tmp_path): +def test_publish_and_manipulate(new_dandiset: SampleDandiset, tmp_path: Path) -> None: d = new_dandiset.dandiset dandiset_id = d.identifier dspath = new_dandiset.dspath @@ -102,7 +106,7 @@ def test_publish_and_manipulate(new_dandiset, tmp_path): assert file_in_version.read_text() == "This is test text.\n" -def test_get_asset_metadata(new_dandiset, simple1_nwb): +def test_get_asset_metadata(new_dandiset: SampleDandiset, simple1_nwb: str) -> None: d = new_dandiset.dandiset d.upload_raw_asset(simple1_nwb, {"path": "testing/simple1.nwb", "foo": "bar"}) (asset,) = d.get_assets() @@ -112,7 +116,7 @@ def test_get_asset_metadata(new_dandiset, simple1_nwb): assert metadata["foo"] == "bar" -def test_large_upload(new_dandiset, tmp_path): +def test_large_upload(new_dandiset: SampleDandiset, tmp_path: Path) -> None: asset_file = tmp_path / "asset.dat" meg = bytes(random.choices(range(256), k=1 << 20)) with asset_file.open("wb") as fp: @@ -121,7 +125,9 @@ def test_large_upload(new_dandiset, tmp_path): new_dandiset.dandiset.upload_raw_asset(asset_file, {"path": "testing/asset.dat"}) -def test_authenticate_bad_key_good_key_input(local_dandi_api, mocker, monkeypatch): +def test_authenticate_bad_key_good_key_input( + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: good_key = local_dandi_api.api_key bad_key = "1234567890" client_name = local_dandi_api.instance_id @@ -153,7 +159,9 @@ def test_authenticate_bad_key_good_key_input(local_dandi_api, mocker, monkeypatc confirm_mock.assert_called_once_with("API key is invalid; enter another?") -def test_authenticate_good_key_keyring(local_dandi_api, mocker, monkeypatch): +def test_authenticate_good_key_keyring( + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: good_key = local_dandi_api.api_key client_name = local_dandi_api.instance_id app_id = f"dandi-api-{client_name}" @@ -181,8 +189,8 @@ def test_authenticate_good_key_keyring(local_dandi_api, mocker, monkeypatch): def test_authenticate_bad_key_keyring_good_key_input( - local_dandi_api, mocker, monkeypatch -): + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: good_key = local_dandi_api.api_key bad_key = "1234567890" client_name = local_dandi_api.instance_id @@ -213,7 +221,7 @@ def test_authenticate_bad_key_keyring_good_key_input( @mark.skipif_no_network -def test_get_content_url(tmp_path): +def test_get_content_url(tmp_path: Path) -> None: with DandiAPIClient.for_dandi_instance("dandi") as client: asset = client.get_dandiset("000027", "draft").get_asset_by_path( "sub-RAT123/sub-RAT123.nwb" @@ -232,7 +240,7 @@ def test_get_content_url(tmp_path): @mark.skipif_no_network -def test_get_content_url_regex(tmp_path): +def test_get_content_url_regex(tmp_path: Path) -> None: with DandiAPIClient.for_dandi_instance("dandi") as client: asset = client.get_dandiset("000027", "draft").get_asset_by_path( "sub-RAT123/sub-RAT123.nwb" @@ -245,7 +253,7 @@ def test_get_content_url_regex(tmp_path): @mark.skipif_no_network -def test_get_content_url_follow_one_redirects_strip_query(): +def test_get_content_url_follow_one_redirects_strip_query() -> None: with DandiAPIClient.for_dandi_instance("dandi") as client: asset = client.get_dandiset("000027", "draft").get_asset_by_path( "sub-RAT123/sub-RAT123.nwb" @@ -257,7 +265,7 @@ def test_get_content_url_follow_one_redirects_strip_query(): ) -def test_remote_asset_json_dict(text_dandiset): +def test_remote_asset_json_dict(text_dandiset: SampleDandiset) -> None: asset = text_dandiset.dandiset.get_asset_by_path("file.txt") assert asset.json_dict() == { "asset_id": anys.ANY_STR, @@ -270,7 +278,7 @@ def test_remote_asset_json_dict(text_dandiset): @responses.activate -def test_check_schema_version_matches_default(): +def test_check_schema_version_matches_default() -> None: responses.add( responses.GET, "https://test.nil/api/info/", @@ -281,7 +289,7 @@ def test_check_schema_version_matches_default(): @responses.activate -def test_check_schema_version_mismatch(): +def test_check_schema_version_mismatch() -> None: responses.add( responses.GET, "https://test.nil/api/info/", json={"schema_version": "4.5.6"} ) @@ -295,12 +303,14 @@ def test_check_schema_version_mismatch(): ) -def test_get_dandisets(text_dandiset): +def test_get_dandisets(text_dandiset: SampleDandiset) -> None: dandisets = list(text_dandiset.client.get_dandisets()) assert sum(1 for d in dandisets if d.identifier == text_dandiset.dandiset_id) == 1 -def test_get_dandiset_lazy(mocker, text_dandiset): +def test_get_dandiset_lazy( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: client = text_dandiset.client get_spy = mocker.spy(client, "get") dandiset = client.get_dandiset(text_dandiset.dandiset_id, DRAFT, lazy=True) @@ -320,7 +330,9 @@ def test_get_dandiset_lazy(mocker, text_dandiset): get_spy.assert_not_called() -def test_get_dandiset_non_lazy(mocker, text_dandiset): +def test_get_dandiset_non_lazy( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: client = text_dandiset.client get_spy = mocker.spy(client, "get") dandiset = client.get_dandiset(text_dandiset.dandiset_id, DRAFT, lazy=False) @@ -341,7 +353,7 @@ def test_get_dandiset_non_lazy(mocker, text_dandiset): @pytest.mark.parametrize("lazy", [True, False]) -def test_get_dandiset_no_version_id(lazy, text_dandiset): +def test_get_dandiset_no_version_id(lazy: bool, text_dandiset: SampleDandiset) -> None: dandiset = text_dandiset.client.get_dandiset(text_dandiset.dandiset_id, lazy=lazy) assert dandiset.version_id == DRAFT assert isinstance(dandiset.created, datetime) @@ -358,7 +370,7 @@ def test_get_dandiset_no_version_id(lazy, text_dandiset): @pytest.mark.parametrize("lazy", [True, False]) -def test_get_dandiset_published(lazy, text_dandiset): +def test_get_dandiset_published(lazy: bool, text_dandiset: SampleDandiset) -> None: d = text_dandiset.dandiset d.wait_until_valid() v = d.publish().version.identifier @@ -379,7 +391,9 @@ def test_get_dandiset_published(lazy, text_dandiset): @pytest.mark.parametrize("lazy", [True, False]) -def test_get_dandiset_published_no_version_id(lazy, text_dandiset): +def test_get_dandiset_published_no_version_id( + lazy: bool, text_dandiset: SampleDandiset +) -> None: d = text_dandiset.dandiset d.wait_until_valid() v = d.publish().version.identifier @@ -400,7 +414,9 @@ def test_get_dandiset_published_no_version_id(lazy, text_dandiset): @pytest.mark.parametrize("lazy", [True, False]) -def test_get_dandiset_published_draft(lazy, text_dandiset): +def test_get_dandiset_published_draft( + lazy: bool, text_dandiset: SampleDandiset +) -> None: d = text_dandiset.dandiset d.wait_until_valid() v = d.publish().version.identifier @@ -421,7 +437,9 @@ def test_get_dandiset_published_draft(lazy, text_dandiset): @pytest.mark.parametrize("lazy", [True, False]) -def test_get_dandiset_published_other_version(lazy, text_dandiset): +def test_get_dandiset_published_other_version( + lazy: bool, text_dandiset: SampleDandiset +) -> None: d = text_dandiset.dandiset d.wait_until_valid() v1 = d.publish().version.identifier @@ -449,7 +467,7 @@ def test_get_dandiset_published_other_version(lazy, text_dandiset): assert sorted(vobj.identifier for vobj in versions) == [v1, v2, DRAFT] -def test_set_asset_metadata(text_dandiset): +def test_set_asset_metadata(text_dandiset: SampleDandiset) -> None: asset = text_dandiset.dandiset.get_asset_by_path("file.txt") md = asset.get_metadata() md.blobDateModified = datetime(2038, 1, 19, 3, 14, 7, tzinfo=timezone.utc) @@ -457,7 +475,7 @@ def test_set_asset_metadata(text_dandiset): assert asset.get_raw_metadata()["blobDateModified"] == "2038-01-19T03:14:07+00:00" -def test_remote_dandiset_json_dict(text_dandiset): +def test_remote_dandiset_json_dict(text_dandiset: SampleDandiset) -> None: data = text_dandiset.dandiset.json_dict() assert data == { "identifier": anys.AnyFullmatch(dandiset_identifier_regex), @@ -479,7 +497,7 @@ def test_remote_dandiset_json_dict(text_dandiset): assert data["draft_version"] == data["version"] -def test_set_dandiset_metadata(text_dandiset): +def test_set_dandiset_metadata(text_dandiset: SampleDandiset) -> None: dandiset = text_dandiset.dandiset md = dandiset.get_metadata() md.description = "A test Dandiset with altered metadata" @@ -498,19 +516,23 @@ def test_set_dandiset_metadata(text_dandiset): (None, r"[0-9a-f]{32}-\d{1,5}"), ], ) -def test_get_raw_digest(digest_type, digest_regex, text_dandiset): +def test_get_raw_digest( + digest_type: Union[str, DigestType, None], + digest_regex: str, + text_dandiset: SampleDandiset, +) -> None: asset = text_dandiset.dandiset.get_asset_by_path("file.txt") d = asset.get_raw_digest(digest_type) assert re.fullmatch(digest_regex, d) -def test_get_raw_digest_nonexistent(text_dandiset): +def test_get_raw_digest_nonexistent(text_dandiset: SampleDandiset) -> None: asset = text_dandiset.dandiset.get_asset_by_path("file.txt") with pytest.raises(NotFoundError): asset.get_raw_digest("md5") -def test_refresh(text_dandiset): +def test_refresh(text_dandiset: SampleDandiset) -> None: dandiset = text_dandiset.dandiset mtime = dandiset.version.modified md = dandiset.get_metadata() @@ -526,7 +548,9 @@ def test_refresh(text_dandiset): assert dandiset.most_recent_published_version is not None -def test_get_asset_with_and_without_metadata(mocker, text_dandiset): +def test_get_asset_with_and_without_metadata( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: path_asset = text_dandiset.dandiset.get_asset_by_path("file.txt") id_asset = text_dandiset.dandiset.get_asset(path_asset.identifier) assert path_asset == id_asset @@ -541,7 +565,7 @@ def test_get_asset_with_and_without_metadata(mocker, text_dandiset): @responses.activate -def test_retry_logging(caplog): +def test_retry_logging(caplog: pytest.LogCaptureFixture) -> None: responses.add(responses.GET, "https://test.nil/api/info/", status=503) responses.add(responses.GET, "https://test.nil/api/info/", status=503) responses.add(responses.GET, "https://test.nil/api/info/", json={"foo": "bar"}) @@ -566,7 +590,7 @@ def test_retry_logging(caplog): assert ("dandi", logging.DEBUG, "Response: 200") in caplog.record_tuples -def test_get_assets_order(text_dandiset): +def test_get_assets_order(text_dandiset: SampleDandiset) -> None: assert [ asset.path for asset in text_dandiset.dandiset.get_assets(order="path") ] == ["file.txt", "subdir1/apple.txt", "subdir2/banana.txt", "subdir2/coconut.txt"] @@ -575,7 +599,7 @@ def test_get_assets_order(text_dandiset): ] == ["subdir2/coconut.txt", "subdir2/banana.txt", "subdir1/apple.txt", "file.txt"] -def test_get_assets_with_path_prefix(text_dandiset): +def test_get_assets_with_path_prefix(text_dandiset: SampleDandiset) -> None: assert sorted( asset.path for asset in text_dandiset.dandiset.get_assets_with_path_prefix("subdir") diff --git a/dandi/tests/test_dandiarchive.py b/dandi/tests/test_dandiarchive.py index d5194c86b..a3292598a 100644 --- a/dandi/tests/test_dandiarchive.py +++ b/dandi/tests/test_dandiarchive.py @@ -9,12 +9,15 @@ AssetPathPrefixURL, BaseAssetIDURL, DandisetURL, + ParsedDandiURL, follow_redirect, parse_dandi_url, ) from dandi.exceptions import NotFoundError, UnknownURLError from dandi.tests.skip import mark +from .fixtures import DandiAPI, SampleDandiset + @pytest.mark.parametrize( "url,parsed_url", @@ -290,7 +293,7 @@ ), ], ) -def test_parse_api_url(url, parsed_url): +def test_parse_api_url(url: str, parsed_url: ParsedDandiURL) -> None: assert parse_dandi_url(url) == parsed_url @@ -303,12 +306,12 @@ def test_parse_api_url(url, parsed_url): # "https://identifiers.org/DANDI:000027/draft", ], ) -def test_parse_bad_api_url(url): +def test_parse_bad_api_url(url: str) -> None: with pytest.raises(UnknownURLError): parse_dandi_url(url) -def test_parse_dandi_url_unknown_instance(): +def test_parse_dandi_url_unknown_instance() -> None: with pytest.raises(UnknownURLError) as excinfo: parse_dandi_url("dandi://not-an-instance/000001") assert str(excinfo.value) == ( @@ -318,14 +321,14 @@ def test_parse_dandi_url_unknown_instance(): @mark.skipif_no_network -def test_parse_dandi_url_not_found(): +def test_parse_dandi_url_not_found() -> None: # Unlikely this one would ever come to existence with pytest.raises(NotFoundError): parse_dandi_url("https://dandiarchive.org/dandiset/999999") @mark.skipif_no_network -def test_follow_redirect(): +def test_follow_redirect() -> None: assert ( follow_redirect("https://bit.ly/dandi12") == "https://gui.dandiarchive.org/#/file-browser/folder/5e72b6ac3da50caa9adb0498" @@ -333,7 +336,7 @@ def test_follow_redirect(): @responses.activate -def test_parse_gui_new_redirect(): +def test_parse_gui_new_redirect() -> None: redirector_base = known_instances["dandi"].redirector responses.add( responses.GET, @@ -359,7 +362,9 @@ def test_parse_gui_new_redirect(): @pytest.mark.parametrize("version_suffix", ["", "@draft", "@0.999999.9999"]) -def test_get_nonexistent_dandiset(local_dandi_api, version_suffix): +def test_get_nonexistent_dandiset( + local_dandi_api: DandiAPI, version_suffix: str +) -> None: url = f"dandi://{local_dandi_api.instance_id}/999999{version_suffix}" parsed_url = parse_dandi_url(url) client = local_dandi_api.client @@ -374,7 +379,9 @@ def test_get_nonexistent_dandiset(local_dandi_api, version_suffix): @pytest.mark.parametrize("version", ["draft", "0.999999.9999"]) -def test_get_nonexistent_dandiset_asset_id(local_dandi_api, version): +def test_get_nonexistent_dandiset_asset_id( + local_dandi_api: DandiAPI, version: str +) -> None: url = ( f"{local_dandi_api.api_url}/dandisets/999999/versions/{version}" "/assets/00000000-0000-0000-0000-000000000000/" @@ -387,7 +394,7 @@ def test_get_nonexistent_dandiset_asset_id(local_dandi_api, version): assert str(excinfo.value) == "No such Dandiset: '999999'" -def test_get_dandiset_nonexistent_asset_id(text_dandiset): +def test_get_dandiset_nonexistent_asset_id(text_dandiset: SampleDandiset) -> None: url = ( f"{text_dandiset.api.api_url}/dandisets/" f"{text_dandiset.dandiset_id}/versions/draft/assets/" @@ -404,7 +411,7 @@ def test_get_dandiset_nonexistent_asset_id(text_dandiset): ) -def test_get_nonexistent_asset_id(local_dandi_api): +def test_get_nonexistent_asset_id(local_dandi_api: DandiAPI) -> None: url = f"{local_dandi_api.api_url}/assets/00000000-0000-0000-0000-000000000000/" parsed_url = parse_dandi_url(url) client = local_dandi_api.client @@ -415,7 +422,9 @@ def test_get_nonexistent_asset_id(local_dandi_api): @pytest.mark.parametrize("version_suffix", ["", "@draft", "@0.999999.9999"]) -def test_get_nonexistent_dandiset_asset_path(local_dandi_api, version_suffix): +def test_get_nonexistent_dandiset_asset_path( + local_dandi_api: DandiAPI, version_suffix: str +) -> None: url = f"dandi://{local_dandi_api.instance_id}/999999{version_suffix}/does/not/exist" parsed_url = parse_dandi_url(url) client = local_dandi_api.client @@ -425,7 +434,7 @@ def test_get_nonexistent_dandiset_asset_path(local_dandi_api, version_suffix): assert str(excinfo.value) == "No such Dandiset: '999999'" -def test_get_nonexistent_asset_path(text_dandiset): +def test_get_nonexistent_asset_path(text_dandiset: SampleDandiset) -> None: url = ( f"dandi://{text_dandiset.api.instance_id}/" f"{text_dandiset.dandiset_id}/does/not/exist" @@ -439,7 +448,9 @@ def test_get_nonexistent_asset_path(text_dandiset): @pytest.mark.parametrize("version_suffix", ["", "@draft", "@0.999999.9999"]) -def test_get_nonexistent_dandiset_asset_folder(local_dandi_api, version_suffix): +def test_get_nonexistent_dandiset_asset_folder( + local_dandi_api: DandiAPI, version_suffix: str +) -> None: url = ( f"dandi://{local_dandi_api.instance_id}/999999{version_suffix}" "/does/not/exist/" @@ -452,7 +463,7 @@ def test_get_nonexistent_dandiset_asset_folder(local_dandi_api, version_suffix): assert str(excinfo.value) == "No such Dandiset: '999999'" -def test_get_nonexistent_asset_folder(text_dandiset): +def test_get_nonexistent_asset_folder(text_dandiset: SampleDandiset) -> None: url = ( f"dandi://{text_dandiset.api.instance_id}/" f"{text_dandiset.dandiset_id}/does/not/exist/" @@ -464,7 +475,9 @@ def test_get_nonexistent_asset_folder(text_dandiset): @pytest.mark.parametrize("version", ["draft", "0.999999.9999"]) -def test_get_nonexistent_dandiset_asset_prefix(local_dandi_api, version): +def test_get_nonexistent_dandiset_asset_prefix( + local_dandi_api: DandiAPI, version: str +) -> None: url = ( f"{local_dandi_api.api_url}/dandisets/999999/versions/{version}" "/assets/?path=does/not/exist" @@ -477,7 +490,7 @@ def test_get_nonexistent_dandiset_asset_prefix(local_dandi_api, version): assert str(excinfo.value) == "No such Dandiset: '999999'" -def test_get_nonexistent_asset_prefix(text_dandiset): +def test_get_nonexistent_asset_prefix(text_dandiset: SampleDandiset) -> None: url = ( f"{text_dandiset.api.api_url}/dandisets/" f"{text_dandiset.dandiset_id}/versions/draft/assets/?path=does/not/exist" diff --git a/dandi/tests/test_dandiset.py b/dandi/tests/test_dandiset.py index 931f3de30..39653345a 100644 --- a/dandi/tests/test_dandiset.py +++ b/dandi/tests/test_dandiset.py @@ -1,7 +1,7 @@ from ..dandiset import Dandiset -def test_get_dandiset_record(): +def test_get_dandiset_record() -> None: out = Dandiset.get_dandiset_record({"identifier": "000000"}) # Should have only header with "DO NOT EDIT" assert out.startswith("# DO NOT EDIT") diff --git a/dandi/tests/test_delete.py b/dandi/tests/test_delete.py index 087e56a12..6aa690833 100644 --- a/dandi/tests/test_delete.py +++ b/dandi/tests/test_delete.py @@ -1,7 +1,10 @@ from pathlib import Path +from typing import List import pytest +from pytest_mock import MockerFixture +from .fixtures import DandiAPI, SampleDandiset from ..consts import DRAFT, dandiset_metadata_file from ..dandiapi import RESTFullAPIClient from ..delete import delete @@ -54,7 +57,14 @@ ), ], ) -def test_delete_paths(mocker, monkeypatch, text_dandiset, tmp_path, paths, remainder): +def test_delete_paths( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, + tmp_path: Path, + paths: List[str], + remainder: List[Path], +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -69,12 +79,17 @@ def test_delete_paths(mocker, monkeypatch, text_dandiset, tmp_path, paths, remai delete_spy.assert_called() download(text_dandiset.dandiset.version_api_url, tmp_path) assert list_paths(tmp_path) == [ - tmp_path / dandiset_id / f for f in ["dandiset.yaml"] + remainder + tmp_path / dandiset_id / f for f in [Path("dandiset.yaml")] + remainder ] @pytest.mark.parametrize("confirm", [True, False]) -def test_delete_path_confirm(confirm, mocker, monkeypatch, text_dandiset): +def test_delete_path_confirm( + confirm: bool, + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -91,7 +106,11 @@ def test_delete_path_confirm(confirm, mocker, monkeypatch, text_dandiset): delete_spy.assert_not_called() -def test_delete_path_pyout(mocker, monkeypatch, text_dandiset): +def test_delete_path_pyout( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -116,7 +135,12 @@ def test_delete_path_pyout(mocker, monkeypatch, text_dandiset): ], ], ) -def test_delete_dandiset(mocker, monkeypatch, text_dandiset, paths): +def test_delete_dandiset( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, + paths: List[str], +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -134,7 +158,12 @@ def test_delete_dandiset(mocker, monkeypatch, text_dandiset, paths): @pytest.mark.parametrize("confirm", [True, False]) -def test_delete_dandiset_confirm(confirm, mocker, monkeypatch, text_dandiset): +def test_delete_dandiset_confirm( + confirm: bool, + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -151,7 +180,11 @@ def test_delete_dandiset_confirm(confirm, mocker, monkeypatch, text_dandiset): delete_spy.assert_not_called() -def test_delete_dandiset_mismatch(mocker, monkeypatch, text_dandiset): +def test_delete_dandiset_mismatch( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -176,7 +209,11 @@ def test_delete_dandiset_mismatch(mocker, monkeypatch, text_dandiset): delete_spy.assert_not_called() -def test_delete_instance_mismatch(mocker, monkeypatch, text_dandiset): +def test_delete_instance_mismatch( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.chdir(text_dandiset.dspath) monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id @@ -201,7 +238,9 @@ def test_delete_instance_mismatch(mocker, monkeypatch, text_dandiset): delete_spy.assert_not_called() -def test_delete_nonexistent_dandiset(local_dandi_api, mocker, monkeypatch): +def test_delete_nonexistent_dandiset( + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.setenv("DANDI_API_KEY", local_dandi_api.api_key) instance = local_dandi_api.instance_id delete_spy = mocker.spy(RESTFullAPIClient, "delete") @@ -216,7 +255,9 @@ def test_delete_nonexistent_dandiset(local_dandi_api, mocker, monkeypatch): delete_spy.assert_not_called() -def test_delete_nonexistent_dandiset_skip_missing(local_dandi_api, mocker, monkeypatch): +def test_delete_nonexistent_dandiset_skip_missing( + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.setenv("DANDI_API_KEY", local_dandi_api.api_key) instance = local_dandi_api.instance_id delete_spy = mocker.spy(RESTFullAPIClient, "delete") @@ -230,7 +271,11 @@ def test_delete_nonexistent_dandiset_skip_missing(local_dandi_api, mocker, monke delete_spy.assert_not_called() -def test_delete_nonexistent_asset(mocker, monkeypatch, text_dandiset): +def test_delete_nonexistent_asset( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id dandiset_id = text_dandiset.dandiset_id @@ -253,8 +298,11 @@ def test_delete_nonexistent_asset(mocker, monkeypatch, text_dandiset): def test_delete_nonexistent_asset_skip_missing( - mocker, monkeypatch, text_dandiset, tmp_path -): + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, + tmp_path: Path, +) -> None: monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id dandiset_id = text_dandiset.dandiset_id @@ -279,7 +327,11 @@ def test_delete_nonexistent_asset_skip_missing( ] -def test_delete_nonexistent_asset_folder(mocker, monkeypatch, text_dandiset): +def test_delete_nonexistent_asset_folder( + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, +) -> None: monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id dandiset_id = text_dandiset.dandiset_id @@ -302,8 +354,11 @@ def test_delete_nonexistent_asset_folder(mocker, monkeypatch, text_dandiset): def test_delete_nonexistent_asset_folder_skip_missing( - mocker, monkeypatch, text_dandiset, tmp_path -): + mocker: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + text_dandiset: SampleDandiset, + tmp_path: Path, +) -> None: monkeypatch.setenv("DANDI_API_KEY", text_dandiset.api.api_key) instance = text_dandiset.api.instance_id dandiset_id = text_dandiset.dandiset_id @@ -328,7 +383,9 @@ def test_delete_nonexistent_asset_folder_skip_missing( ] -def test_delete_version(local_dandi_api, mocker, monkeypatch): +def test_delete_version( + local_dandi_api: DandiAPI, mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.setenv("DANDI_API_KEY", local_dandi_api.api_key) instance = local_dandi_api.instance_id delete_spy = mocker.spy(RESTFullAPIClient, "delete") @@ -346,7 +403,9 @@ def test_delete_version(local_dandi_api, mocker, monkeypatch): delete_spy.assert_not_called() -def test_delete_no_dandiset(mocker, monkeypatch, tmp_path): +def test_delete_no_dandiset( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: monkeypatch.chdir(tmp_path) delete_spy = mocker.spy(RESTFullAPIClient, "delete") with pytest.raises(RuntimeError) as excinfo: diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 8729b038e..8b031a102 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1,15 +1,18 @@ import json import os import os.path as op +from pathlib import Path import re from shutil import rmtree -from typing import List, Tuple +from typing import Callable, List, Tuple import numpy as np import pytest +from pytest_mock import MockerFixture import responses import zarr +from .fixtures import SampleDandiset from .skip import mark from .test_helpers import assert_dirtrees_eq from ..consts import DRAFT, dandiset_metadata_file @@ -29,15 +32,14 @@ "https://dandiarchive.org/dandiset/000027/draft", ], ) -def test_download_000027(url, tmpdir): - ret = download(url, tmpdir) +def test_download_000027(url: str, tmp_path: Path) -> None: + ret = download(url, tmp_path) # type: ignore[func-returns-value] assert not ret # we return nothing ATM, might want to "generate" - dsdir = tmpdir / "000027" - downloads = (x.relto(dsdir) for x in dsdir.visit()) - assert sorted(downloads) == [ - "dandiset.yaml", - "sub-RAT123", - op.join("sub-RAT123", "sub-RAT123.nwb"), + dsdir = tmp_path / "000027" + assert list_paths(dsdir, dirs=True) == [ + dsdir / "dandiset.yaml", + dsdir / "sub-RAT123", + dsdir / "sub-RAT123" / "sub-RAT123.nwb", ] # and checksum should be correct as well from ..support.digests import Digester @@ -48,12 +50,12 @@ def test_download_000027(url, tmpdir): ) # redownload - since already exist there should be an exception with pytest.raises(FileExistsError): - download(url, tmpdir) + download(url, tmp_path) # TODO: somehow get that status report about what was downloaded and what not - download(url, tmpdir, existing="skip") # TODO: check that skipped - download(url, tmpdir, existing="overwrite") # TODO: check that redownloaded - download(url, tmpdir, existing="refresh") # TODO: check that skipped (the same) + download(url, tmp_path, existing="skip") # TODO: check that skipped + download(url, tmp_path, existing="overwrite") # TODO: check that redownloaded + download(url, tmp_path, existing="refresh") # TODO: check that skipped (the same) @mark.skipif_no_network @@ -65,12 +67,11 @@ def test_download_000027(url, tmpdir): "https://dandiarchive.org/dandiset/000027/draft", ], ) -def test_download_000027_metadata_only(url, tmpdir): - ret = download(url, tmpdir, get_assets=False) +def test_download_000027_metadata_only(url: str, tmp_path: Path) -> None: + ret = download(url, tmp_path, get_assets=False) # type: ignore[func-returns-value] assert not ret # we return nothing ATM, might want to "generate" - dsdir = tmpdir / "000027" - downloads = (x.relto(dsdir) for x in dsdir.visit()) - assert sorted(downloads) == ["dandiset.yaml"] + dsdir = tmp_path / "000027" + assert list_paths(dsdir, dirs=True) == [dsdir / "dandiset.yaml"] @mark.skipif_no_network @@ -82,18 +83,22 @@ def test_download_000027_metadata_only(url, tmpdir): "https://dandiarchive.org/dandiset/000027/draft", ], ) -def test_download_000027_assets_only(url, tmpdir): - ret = download(url, tmpdir, get_metadata=False) +def test_download_000027_assets_only(url: str, tmp_path: Path) -> None: + ret = download(url, tmp_path, get_metadata=False) # type: ignore[func-returns-value] assert not ret # we return nothing ATM, might want to "generate" - dsdir = tmpdir / "000027" - downloads = (x.relto(dsdir) for x in dsdir.visit()) - assert sorted(downloads) == ["sub-RAT123", op.join("sub-RAT123", "sub-RAT123.nwb")] + dsdir = tmp_path / "000027" + assert list_paths(dsdir, dirs=True) == [ + dsdir / "sub-RAT123", + dsdir / "sub-RAT123" / "sub-RAT123.nwb", + ] @mark.skipif_no_network @pytest.mark.parametrize("resizer", [lambda sz: 0, lambda sz: sz // 2, lambda sz: sz]) @pytest.mark.parametrize("version", ["0.210831.2033", DRAFT]) -def test_download_000027_resume(tmp_path, resizer, version): +def test_download_000027_resume( + tmp_path: Path, resizer: Callable[[int], int], version: str +) -> None: from ..support.digests import Digester url = f"https://dandiarchive.org/dandiset/000027/{version}" @@ -121,7 +126,7 @@ def test_download_000027_resume(tmp_path, resizer, version): assert digester(str(nwb)) == digests -def test_download_newest_version(text_dandiset, tmp_path): +def test_download_newest_version(text_dandiset: SampleDandiset, tmp_path: Path) -> None: dandiset = text_dandiset.dandiset dandiset_id = text_dandiset.dandiset_id download(dandiset.api_url, tmp_path) @@ -135,7 +140,7 @@ def test_download_newest_version(text_dandiset, tmp_path): assert (tmp_path / dandiset_id / "file.txt").read_text() == "This is test text.\n" -def test_download_folder(text_dandiset, tmp_path): +def test_download_folder(text_dandiset: SampleDandiset, tmp_path: Path) -> None: dandiset_id = text_dandiset.dandiset_id download( f"dandi://{text_dandiset.api.instance_id}/{dandiset_id}/subdir2/", tmp_path @@ -149,7 +154,7 @@ def test_download_folder(text_dandiset, tmp_path): assert (tmp_path / "subdir2" / "coconut.txt").read_text() == "Coconut\n" -def test_download_item(text_dandiset, tmp_path): +def test_download_item(text_dandiset: SampleDandiset, tmp_path: Path) -> None: dandiset_id = text_dandiset.dandiset_id download( f"dandi://{text_dandiset.api.instance_id}/{dandiset_id}/subdir2/coconut.txt", @@ -159,14 +164,14 @@ def test_download_item(text_dandiset, tmp_path): assert (tmp_path / "coconut.txt").read_text() == "Coconut\n" -def test_download_asset_id(text_dandiset, tmp_path): +def test_download_asset_id(text_dandiset: SampleDandiset, tmp_path: Path) -> None: asset = text_dandiset.dandiset.get_asset_by_path("subdir2/coconut.txt") download(asset.download_url, tmp_path) assert list_paths(tmp_path, dirs=True) == [tmp_path / "coconut.txt"] assert (tmp_path / "coconut.txt").read_text() == "Coconut\n" -def test_download_asset_id_only(text_dandiset, tmp_path): +def test_download_asset_id_only(text_dandiset: SampleDandiset, tmp_path: Path) -> None: asset = text_dandiset.dandiset.get_asset_by_path("subdir2/coconut.txt") download(asset.base_download_url, tmp_path) assert list_paths(tmp_path, dirs=True) == [tmp_path / "coconut.txt"] @@ -174,7 +179,9 @@ def test_download_asset_id_only(text_dandiset, tmp_path): @pytest.mark.parametrize("confirm", [True, False]) -def test_download_sync(confirm, mocker, text_dandiset, tmp_path): +def test_download_sync( + confirm: bool, mocker: MockerFixture, text_dandiset: SampleDandiset, tmp_path: Path +) -> None: text_dandiset.dandiset.get_asset_by_path("file.txt").delete() dspath = tmp_path / text_dandiset.dandiset_id os.rename(text_dandiset.dspath, dspath) @@ -194,7 +201,9 @@ def test_download_sync(confirm, mocker, text_dandiset, tmp_path): assert (dspath / "file.txt").exists() -def test_download_sync_folder(mocker, text_dandiset): +def test_download_sync_folder( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: text_dandiset.dandiset.get_asset_by_path("file.txt").delete() text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt").delete() confirm_mock = mocker.patch("dandi.download.abbrev_prompt", return_value="yes") @@ -209,7 +218,12 @@ def test_download_sync_folder(mocker, text_dandiset): assert not (text_dandiset.dspath / "subdir2" / "banana.txt").exists() -def test_download_sync_list(capsys, mocker, text_dandiset, tmp_path): +def test_download_sync_list( + capsys: pytest.CaptureFixture[str], + mocker: MockerFixture, + text_dandiset: SampleDandiset, + tmp_path: Path, +) -> None: text_dandiset.dandiset.get_asset_by_path("file.txt").delete() dspath = tmp_path / text_dandiset.dandiset_id os.rename(text_dandiset.dspath, dspath) @@ -228,7 +242,9 @@ def test_download_sync_list(capsys, mocker, text_dandiset, tmp_path): assert capsys.readouterr().out.splitlines()[-1] == str(dspath / "file.txt") -def test_download_sync_zarr(mocker, zarr_dandiset, tmp_path): +def test_download_sync_zarr( + mocker: MockerFixture, zarr_dandiset: SampleDandiset, tmp_path: Path +) -> None: zarr_dandiset.dandiset.get_asset_by_path("sample.zarr").delete() dspath = tmp_path / zarr_dandiset.dandiset_id os.rename(zarr_dandiset.dspath, dspath) @@ -244,7 +260,9 @@ def test_download_sync_zarr(mocker, zarr_dandiset, tmp_path): @responses.activate -def test_download_no_blobDateModified(text_dandiset, tmp_path): +def test_download_no_blobDateModified( + text_dandiset: SampleDandiset, tmp_path: Path +) -> None: # Regression test for #806 responses.add_passthru(re.compile("^http")) dandiset = text_dandiset.dandiset @@ -256,7 +274,7 @@ def test_download_no_blobDateModified(text_dandiset, tmp_path): @responses.activate -def test_download_metadata404(text_dandiset, tmp_path): +def test_download_metadata404(text_dandiset: SampleDandiset, tmp_path: Path) -> None: responses.add_passthru(re.compile("^http")) asset = text_dandiset.dandiset.get_asset_by_path("subdir1/apple.txt") responses.add(responses.GET, asset.api_url, status=404) @@ -287,7 +305,7 @@ def test_download_metadata404(text_dandiset, tmp_path): ] -def test_download_zarr(tmp_path, zarr_dandiset): +def test_download_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None: download(zarr_dandiset.dandiset.version_api_url, tmp_path) assert_dirtrees_eq( zarr_dandiset.dspath / "sample.zarr", @@ -295,7 +313,7 @@ def test_download_zarr(tmp_path, zarr_dandiset): ) -def test_download_different_zarr(tmp_path, zarr_dandiset): +def test_download_different_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None: dd = tmp_path / zarr_dandiset.dandiset_id dd.mkdir() zarr.save(dd / "sample.zarr", np.eye(5)) @@ -308,7 +326,9 @@ def test_download_different_zarr(tmp_path, zarr_dandiset): ) -def test_download_different_zarr_delete_dir(new_dandiset, tmp_path): +def test_download_different_zarr_delete_dir( + new_dandiset: SampleDandiset, tmp_path: Path +) -> None: d = new_dandiset.dandiset dspath = new_dandiset.dspath zarr.save(dspath / "sample.zarr", np.eye(5)) @@ -322,7 +342,9 @@ def test_download_different_zarr_delete_dir(new_dandiset, tmp_path): assert_dirtrees_eq(dspath / "sample.zarr", dd / "sample.zarr") -def test_download_zarr_to_nonzarr_path(tmp_path, zarr_dandiset): +def test_download_zarr_to_nonzarr_path( + tmp_path: Path, zarr_dandiset: SampleDandiset +) -> None: dd = tmp_path / zarr_dandiset.dandiset_id dd.mkdir() (dd / "sample.zarr").write_text("This is not a Zarr.\n") @@ -335,7 +357,9 @@ def test_download_zarr_to_nonzarr_path(tmp_path, zarr_dandiset): ) -def test_download_nonzarr_to_zarr_path(new_dandiset, tmp_path): +def test_download_nonzarr_to_zarr_path( + new_dandiset: SampleDandiset, tmp_path: Path +) -> None: d = new_dandiset.dandiset (new_dandiset.dspath / "sample.zarr").write_text("This is not a Zarr.\n") new_dandiset.upload(allow_any_path=True) diff --git a/dandi/tests/test_fixtures.py b/dandi/tests/test_fixtures.py index fba837190..bcfa9abfd 100644 --- a/dandi/tests/test_fixtures.py +++ b/dandi/tests/test_fixtures.py @@ -1,10 +1,12 @@ # Largely a helper to quickly trigger fixtures to smoke test them # and possibly go through their internal asserts +from pathlib import Path -def test_organized_nwb_dir(organized_nwb_dir): + +def test_organized_nwb_dir(organized_nwb_dir: Path) -> None: pass # Just a smoke test to trigger fixture's asserts -def test_organized_nwb_dir2(organized_nwb_dir2): +def test_organized_nwb_dir2(organized_nwb_dir2: Path) -> None: pass # Just a smoke test to trigger fixture's asserts diff --git a/dandi/tests/test_keyring.py b/dandi/tests/test_keyring.py index 50fea4912..6db9d01f5 100644 --- a/dandi/tests/test_keyring.py +++ b/dandi/tests/test_keyring.py @@ -1,17 +1,20 @@ from pathlib import Path +from typing import Callable, Optional from keyring.backend import get_all_keyring from keyring.backends import fail, null from keyring.errors import KeyringError from keyrings.alt import file as keyfile import pytest +from pytest_mock import MockerFixture +from .fixtures import DandiAPI from ..dandiapi import DandiAPIClient from ..keyring import keyring_lookup, keyringrc_file @pytest.fixture(scope="module", autouse=True) -def ensure_keyring_backends(): +def ensure_keyring_backends() -> None: # Ensure that keyring backends are initialized before running any tests get_all_keyring() # This function caches its results, so it's safe to call if the backends @@ -21,7 +24,9 @@ def ensure_keyring_backends(): # happens to have. -def test_dandi_authenticate_no_env_var(local_dandi_api, monkeypatch, mocker): +def test_dandi_authenticate_no_env_var( + local_dandi_api: DandiAPI, monkeypatch: pytest.MonkeyPatch, mocker: MockerFixture +) -> None: monkeypatch.delenv("DANDI_API_KEY", raising=False) monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.null.Keyring") inputmock = mocker.patch( @@ -33,7 +38,7 @@ def test_dandi_authenticate_no_env_var(local_dandi_api, monkeypatch, mocker): ) -def setup_keyringrc_no_password(): +def setup_keyringrc_no_password() -> None: rc = keyringrc_file() rc.parent.mkdir(parents=True, exist_ok=True) rc.write_text("[backend]\ndefault-keyring = keyring.backends.null.Keyring\n") @@ -48,7 +53,7 @@ def setup_keyringrc_password(): ) -def setup_keyringrc_fail(): +def setup_keyringrc_fail() -> None: rc = keyringrc_file() rc.parent.mkdir(parents=True, exist_ok=True) rc.write_text("[backend]\ndefault-keyring = keyring.backends.fail.Keyring\n") @@ -59,7 +64,10 @@ def setup_keyringrc_fail(): [None, setup_keyringrc_no_password, setup_keyringrc_password, setup_keyringrc_fail], ) @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_envvar_no_password(monkeypatch, rcconfig): +def test_keyring_lookup_envvar_no_password( + monkeypatch: pytest.MonkeyPatch, + rcconfig: Optional[Callable[[], None]], +) -> None: monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.null.Keyring") if rcconfig is not None: rcconfig() @@ -72,7 +80,10 @@ def test_keyring_lookup_envvar_no_password(monkeypatch, rcconfig): "rcconfig", [None, setup_keyringrc_no_password, setup_keyringrc_fail] ) @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_envvar_password(monkeypatch, rcconfig): +def test_keyring_lookup_envvar_password( + monkeypatch: pytest.MonkeyPatch, + rcconfig: Optional[Callable[[], None]], +) -> None: monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyrings.alt.file.PlaintextKeyring") keyfile.PlaintextKeyring().set_password( "testservice", "testusername", "testpassword" @@ -89,7 +100,10 @@ def test_keyring_lookup_envvar_password(monkeypatch, rcconfig): [None, setup_keyringrc_no_password, setup_keyringrc_password, setup_keyringrc_fail], ) @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_envvar_fail(monkeypatch, rcconfig): +def test_keyring_lookup_envvar_fail( + monkeypatch: pytest.MonkeyPatch, + rcconfig: Optional[Callable[[], None]], +) -> None: monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring") if rcconfig is not None: rcconfig() @@ -98,7 +112,7 @@ def test_keyring_lookup_envvar_fail(monkeypatch, rcconfig): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_rccfg_no_password(monkeypatch): +def test_keyring_lookup_rccfg_no_password(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) setup_keyringrc_no_password() kb, password = keyring_lookup("testservice", "testusername") @@ -107,7 +121,7 @@ def test_keyring_lookup_rccfg_no_password(monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_rccfg_password(monkeypatch): +def test_keyring_lookup_rccfg_password(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) setup_keyringrc_password() kb, password = keyring_lookup("testservice", "testusername") @@ -116,7 +130,7 @@ def test_keyring_lookup_rccfg_password(monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_rccfg_fail(monkeypatch): +def test_keyring_lookup_rccfg_fail(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) setup_keyringrc_fail() with pytest.raises(KeyringError): @@ -124,7 +138,9 @@ def test_keyring_lookup_rccfg_fail(monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_default_no_password(mocker, monkeypatch): +def test_keyring_lookup_default_no_password( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) kb0 = null.Keyring() get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=kb0) @@ -135,7 +151,9 @@ def test_keyring_lookup_default_no_password(mocker, monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_default_password(mocker, monkeypatch): +def test_keyring_lookup_default_password( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) kb0 = keyfile.PlaintextKeyring() kb0.set_password("testservice", "testusername", "testpassword") @@ -151,7 +169,9 @@ class EncryptedFailure(fail.Keyring, keyfile.EncryptedKeyring): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_fail_default_encrypted(mocker, monkeypatch): +def test_keyring_lookup_fail_default_encrypted( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch( "dandi.keyring.get_keyring", return_value=EncryptedFailure() @@ -162,7 +182,9 @@ def test_keyring_lookup_fail_default_encrypted(mocker, monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_encrypted_fallback_exists_no_password(mocker, monkeypatch): +def test_keyring_lookup_encrypted_fallback_exists_no_password( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=fail.Keyring()) kf = Path(keyfile.EncryptedKeyring().file_path) @@ -175,7 +197,9 @@ def test_keyring_lookup_encrypted_fallback_exists_no_password(mocker, monkeypatc @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_encrypted_fallback_exists_password(mocker, monkeypatch): +def test_keyring_lookup_encrypted_fallback_exists_password( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=fail.Keyring()) kb0 = keyfile.EncryptedKeyring() @@ -191,7 +215,9 @@ def test_keyring_lookup_encrypted_fallback_exists_password(mocker, monkeypatch): @pytest.mark.usefixtures("tmp_home") -def test_keyring_lookup_encrypted_fallback_not_exists_no_create(mocker, monkeypatch): +def test_keyring_lookup_encrypted_fallback_not_exists_no_create( + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=fail.Keyring()) confirm = mocker.patch("click.confirm", return_value=False) @@ -205,8 +231,8 @@ def test_keyring_lookup_encrypted_fallback_not_exists_no_create(mocker, monkeypa @pytest.mark.usefixtures("tmp_home") def test_keyring_lookup_encrypted_fallback_not_exists_create_rcconf( - mocker, monkeypatch -): + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=fail.Keyring()) confirm = mocker.patch("click.confirm", return_value=True) @@ -225,8 +251,8 @@ def test_keyring_lookup_encrypted_fallback_not_exists_create_rcconf( @pytest.mark.usefixtures("tmp_home") def test_keyring_lookup_encrypted_fallback_not_exists_create_rcconf_exists( - mocker, monkeypatch -): + mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.delenv("PYTHON_KEYRING_BACKEND", raising=False) get_keyring = mocker.patch("dandi.keyring.get_keyring", return_value=fail.Keyring()) confirm = mocker.patch("click.confirm", return_value=True) diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index a0a6bce19..502fa0b6d 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta import json from pathlib import Path +from typing import Any, Dict, Optional, Tuple, Union from dandischema.consts import DANDI_SCHEMA_VERSION from dandischema.metadata import validate @@ -28,7 +29,7 @@ METADATA_DIR = Path(__file__).with_name("data") / "metadata" -def test_get_metadata(simple1_nwb, simple1_nwb_metadata): +def test_get_metadata(simple1_nwb: str, simple1_nwb_metadata: Dict[str, Any]) -> None: target_metadata = simple1_nwb_metadata.copy() # we will also get some counts target_metadata["number_of_electrodes"] = 0 @@ -91,7 +92,7 @@ def test_get_metadata(simple1_nwb, simple1_nwb_metadata): ("Gestational Week 19", ("P19W", "Gestational")), ], ) -def test_parse_age(age, duration): +def test_parse_age(age: str, duration: Union[str, Tuple[str, str]]) -> None: if isinstance(duration, tuple): duration, ref = duration else: # birth will be a default ref @@ -126,7 +127,7 @@ def test_parse_age(age, duration): ), ], ) -def test_parse_error(age, errmsg): +def test_parse_error(age: Optional[str], errmsg: str) -> None: with pytest.raises(ValueError) as excinfo: parse_age(age) assert str(excinfo.value) == errmsg @@ -141,7 +142,7 @@ def test_parse_error(age, errmsg): (timedelta(days=5, seconds=23, microseconds=2000), "P5DT23.002S"), ], ) -def test_timedelta2duration(td, duration): +def test_timedelta2duration(td: timedelta, duration: str) -> None: assert timedelta2duration(td) == duration @@ -263,7 +264,7 @@ def test_timedelta2duration(td, duration): ), ], ) -def test_metadata2asset(filename, metadata): +def test_metadata2asset(filename: str, metadata: Dict[str, Any]) -> None: data = metadata2asset(metadata) with (METADATA_DIR / filename).open() as fp: data_as_dict = json.load(fp) @@ -280,7 +281,7 @@ def test_metadata2asset(filename, metadata): validate(data_as_dict) -def test_dandimeta_migration(): +def test_dandimeta_migration() -> None: with (METADATA_DIR / "dandimeta_migration.new.json").open() as fp: data_as_dict = json.load(fp) data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION @@ -288,7 +289,7 @@ def test_dandimeta_migration(): validate(data_as_dict) -def test_time_extract(): +def test_time_extract() -> None: # if metadata contains date_of_birth and session_start_time, # age will be calculated from the values meta_birth = { @@ -297,6 +298,7 @@ def test_time_extract(): "date_of_birth": "2020-07-31T12:20:00-04:00", } age_birth = extract_age(meta_birth) + assert age_birth is not None assert age_birth.value == "P31DT88S" assert age_birth.valueReference == PropertyValue( value=AgeReferenceType("dandi:BirthReference") @@ -305,19 +307,21 @@ def test_time_extract(): # if metadata doesn't contain date_of_birth, the age field will be used meta = {"session_start_time": "2020-08-31T12:21:28-04:00", "age": "31 days"} age = extract_age(meta) + assert age is not None assert age.value == "P31D" assert age.valueReference == PropertyValue( value=AgeReferenceType("dandi:BirthReference") ) -def test_time_extract_gest(): +def test_time_extract_gest() -> None: """extract age with Gestational ref""" meta_birth = { "session_start_time": "2020-08-31T12:21:28-04:00", "age": "Gestational week 3", } age_birth = extract_age(meta_birth) + assert age_birth is not None assert age_birth.value == "P3W" assert age_birth.valueReference == PropertyValue( value=AgeReferenceType("dandi:GestationalReference") diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 38943f2d7..5031f3ad9 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -2,6 +2,7 @@ import os import os.path as op from pathlib import Path +from typing import Any, NoReturn from click.testing import CliRunner import pytest @@ -21,27 +22,27 @@ from ..utils import find_files, on_windows, yaml_load -def test_sanitize_value(): +def test_sanitize_value() -> None: # . is not sanitized in extension but elsewhere assert _sanitize_value("_.ext", "extension") == "-.ext" assert _sanitize_value("_.ext", "unrelated") == "--ext" -def test_populate_dataset_yml(tmpdir): +def test_populate_dataset_yml(tmp_path: Path) -> None: # should work even on an empty file - path = tmpdir / "blah.yaml" + path = tmp_path / "blah.yaml" - def c(): # shortcut + def c() -> Any: # shortcut with open(path) as f: return yaml_load(f, typ="safe") - path.write("") + path.write_text("") populate_dataset_yml(str(path), []) # doesn't crash - path.write("id: test1 # comment") # no ID assumptions, or querying + path.write_text("id: test1 # comment") # no ID assumptions, or querying populate_dataset_yml(str(path), []) # doesn't crash # even comments should be preserved and no changes if no relevant metadata - assert path.read().strip() == "id: test1 # comment" + assert path.read_text().strip() == "id: test1 # comment" metadata = [ # context for all the ids are dataset level ATM, so even when no @@ -62,7 +63,7 @@ def c(): # shortcut } # and if we set units and redo -- years should stay unchanged, while other fields change - m = yaml_load(path.read()) + m = yaml_load(path.read_text()) m["age"]["units"] = "years" with open(path, "w") as fp: ruamel.yaml.YAML().dump(m, fp) @@ -103,8 +104,8 @@ def c(): # shortcut @pytest.mark.integration @pytest.mark.parametrize("mode", no_move_modes) -def test_organize_nwb_test_data(nwb_test_data, tmpdir, mode): - outdir = str(tmpdir / "organized") +def test_organize_nwb_test_data(nwb_test_data: str, tmp_path: Path, mode: str) -> None: + outdir = str(tmp_path / "organized") relative = False if mode == "symlink-relative": @@ -118,9 +119,9 @@ def test_organize_nwb_test_data(nwb_test_data, tmpdir, mode): nwb_test_data = op.relpath(nwb_test_data, cwd) outdir = op.relpath(outdir, cwd) - src = Path(tmpdir, "src") + src = tmp_path / "src" src.touch() - dest = Path(tmpdir, "dest") + dest = tmp_path / "dest" try: dest.symlink_to(src) except OSError: @@ -179,7 +180,7 @@ def test_organize_nwb_test_data(nwb_test_data, tmpdir, mode): assert not any(op.islink(p) for p in produced_paths) -def test_ambiguous(simple2_nwb, tmp_path): +def test_ambiguous(simple2_nwb: str, tmp_path: Path) -> None: copy2 = copy_nwb_file(simple2_nwb, tmp_path) outdir = str(tmp_path / "organized") args = ["--files-mode", "copy", "-d", outdir, simple2_nwb, copy2] @@ -195,7 +196,7 @@ def test_ambiguous(simple2_nwb, tmp_path): ) -def test_ambiguous_probe1(): +def test_ambiguous_probe1() -> None: base = dict(subject_id="1", session="2", extension="nwb") # fake filenames should be ok since we never should get to reading them for object_id metadata = [ @@ -239,11 +240,17 @@ def test_ambiguous_probe1(): (False, False, "copy"), ], ) -def test_detect_link_type(monkeypatch, tmp_path, sym_success, hard_success, result): - def succeed_link(src, dest): +def test_detect_link_type( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + sym_success: bool, + hard_success: bool, + result: str, +) -> None: + def succeed_link(src: Any, dest: Any) -> None: pass - def error_link(src, dest): + def error_link(src: Any, dest: Any) -> NoReturn: raise OSError("Operation failed") monkeypatch.setattr(os, "symlink", succeed_link if sym_success else error_link) diff --git a/dandi/tests/test_pynwb_utils.py b/dandi/tests/test_pynwb_utils.py index 5d3fd705a..298d973ff 100644 --- a/dandi/tests/test_pynwb_utils.py +++ b/dandi/tests/test_pynwb_utils.py @@ -1,5 +1,6 @@ from datetime import datetime, timezone import re +from typing import Any, Callable, NoReturn import numpy as np from pynwb import NWBHDF5IO, NWBFile, TimeSeries @@ -7,7 +8,7 @@ from ..pynwb_utils import _sanitize_nwb_version, nwb_has_external_links -def test_pynwb_io(simple1_nwb): +def test_pynwb_io(simple1_nwb: str) -> None: # To verify that our dependencies spec is sufficient to avoid # stepping into known pynwb/hdmf issues with NWBHDF5IO(str(simple1_nwb), "r", load_namespaces=True) as reader: @@ -16,12 +17,12 @@ def test_pynwb_io(simple1_nwb): assert str(nwbfile) -def test_sanitize_nwb_version(): - def _nocall(*args): +def test_sanitize_nwb_version() -> None: + def _nocall(*args: Any) -> NoReturn: raise AssertionError(f"Should have not been called. Was called with {args}") - def assert_regex(regex): - def search(v): + def assert_regex(regex: str) -> Callable[[str], None]: + def search(v: str) -> None: assert re.search(regex, v) return search diff --git a/dandi/tests/test_upload.py b/dandi/tests/test_upload.py index 3e4646b58..56e62c577 100644 --- a/dandi/tests/test_upload.py +++ b/dandi/tests/test_upload.py @@ -1,11 +1,15 @@ import os +from pathlib import Path from shutil import copyfile, rmtree +from typing import Any, Dict import numpy as np import pynwb import pytest +from pytest_mock import MockerFixture import zarr +from .fixtures import SampleDandiset from .test_helpers import assert_dirtrees_eq from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..dandiapi import AssetType, RemoteBlobAsset, RemoteZarrAsset @@ -17,7 +21,9 @@ from ..utils import list_paths, yaml_dump -def test_upload_download(new_dandiset, organized_nwb_dir, tmp_path): +def test_upload_download( + new_dandiset: SampleDandiset, organized_nwb_dir: str, tmp_path: Path +) -> None: d = new_dandiset.dandiset dspath = new_dandiset.dspath (nwb_file,) = [ @@ -35,13 +41,14 @@ def test_upload_download(new_dandiset, organized_nwb_dir, tmp_path): ] -def test_upload_dandiset_metadata(new_dandiset): +def test_upload_dandiset_metadata(new_dandiset: SampleDandiset) -> None: # For now let's "manually" populate dandiset.yaml in that downloaded location # which is missing due to https://github.com/dandi/dandi-api/issues/63 d = new_dandiset.dandiset dspath = new_dandiset.dspath ds_orig = APIDandiset(dspath) ds_metadata = ds_orig.metadata + assert ds_metadata is not None ds_metadata["description"] = "very long" ds_metadata["name"] = "shorty" (dspath / dandiset_metadata_file).write_text(yaml_dump(ds_metadata)) @@ -52,28 +59,36 @@ def test_upload_dandiset_metadata(new_dandiset): assert d.version.name == "shorty" -def test_upload_extant_existing(mocker, text_dandiset): +def test_upload_extant_existing( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") with pytest.raises(FileExistsError): text_dandiset.upload(existing="error") iter_upload_spy.assert_not_called() -def test_upload_extant_skip(mocker, text_dandiset): +def test_upload_extant_skip( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") text_dandiset.upload(existing="skip") iter_upload_spy.assert_not_called() @pytest.mark.parametrize("existing", ["overwrite", "refresh"]) -def test_upload_extant_eq_overwrite(existing, mocker, text_dandiset): +def test_upload_extant_eq_overwrite( + existing: str, mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") text_dandiset.upload(existing=existing) iter_upload_spy.assert_not_called() @pytest.mark.parametrize("existing", ["overwrite", "refresh"]) -def test_upload_extant_neq_overwrite(existing, mocker, text_dandiset, tmp_path): +def test_upload_extant_neq_overwrite( + existing: str, mocker: MockerFixture, text_dandiset: SampleDandiset, tmp_path: Path +) -> None: (text_dandiset.dspath / "file.txt").write_text("This is different text.\n") iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") text_dandiset.upload(existing=existing) @@ -84,7 +99,9 @@ def test_upload_extant_neq_overwrite(existing, mocker, text_dandiset, tmp_path): ).read_text() == "This is different text.\n" -def test_upload_extant_old_refresh(mocker, text_dandiset): +def test_upload_extant_old_refresh( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: (text_dandiset.dspath / "file.txt").write_text("This is different text.\n") os.utime(text_dandiset.dspath / "file.txt", times=(0, 0)) iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") @@ -92,13 +109,17 @@ def test_upload_extant_old_refresh(mocker, text_dandiset): iter_upload_spy.assert_not_called() -def test_upload_extant_force(mocker, text_dandiset): +def test_upload_extant_force( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") text_dandiset.upload(existing="force") iter_upload_spy.assert_called() -def test_upload_extant_bad_existing(mocker, text_dandiset): +def test_upload_extant_bad_existing( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") text_dandiset.upload(existing="foobar") iter_upload_spy.assert_not_called() @@ -116,7 +137,9 @@ def test_upload_extant_bad_existing(mocker, text_dandiset): b"x", ], ) -def test_upload_download_small_file(contents, new_dandiset, tmp_path): +def test_upload_download_small_file( + contents: bytes, new_dandiset: SampleDandiset, tmp_path: Path +) -> None: d = new_dandiset.dandiset dandiset_id = d.identifier dspath = new_dandiset.dspath @@ -131,7 +154,9 @@ def test_upload_download_small_file(contents, new_dandiset, tmp_path): @pytest.mark.parametrize("confirm", [True, False]) -def test_upload_sync(confirm, mocker, text_dandiset): +def test_upload_sync( + confirm: bool, mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: (text_dandiset.dspath / "file.txt").unlink() confirm_mock = mocker.patch("click.confirm", return_value=confirm) text_dandiset.upload(sync=True) @@ -143,7 +168,9 @@ def test_upload_sync(confirm, mocker, text_dandiset): text_dandiset.dandiset.get_asset_by_path("file.txt") -def test_upload_sync_folder(mocker, text_dandiset): +def test_upload_sync_folder( + mocker: MockerFixture, text_dandiset: SampleDandiset +) -> None: (text_dandiset.dspath / "file.txt").unlink() (text_dandiset.dspath / "subdir2" / "banana.txt").unlink() confirm_mock = mocker.patch("click.confirm", return_value=True) @@ -165,7 +192,9 @@ def test_upload_sync_zarr(mocker, zarr_dandiset): zarr_dandiset.dandiset.get_asset_by_path("sample.zarr") -def test_upload_invalid_metadata(new_dandiset, simple1_nwb_metadata): +def test_upload_invalid_metadata( + new_dandiset: SampleDandiset, simple1_nwb_metadata: Dict[str, Any] +) -> None: make_nwb_file( new_dandiset.dspath / "broken.nwb", subject=pynwb.file.Subject( @@ -181,7 +210,7 @@ def test_upload_invalid_metadata(new_dandiset, simple1_nwb_metadata): new_dandiset.dandiset.get_asset_by_path("broken.nwb") -def test_upload_zarr(new_dandiset): +def test_upload_zarr(new_dandiset: SampleDandiset) -> None: zarr.save( new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1) ) @@ -192,7 +221,7 @@ def test_upload_zarr(new_dandiset): assert asset.path == "sample.zarr" -def test_upload_different_zarr(tmp_path, zarr_dandiset): +def test_upload_different_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None: rmtree(zarr_dandiset.dspath / "sample.zarr") zarr.save(zarr_dandiset.dspath / "sample.zarr", np.eye(5)) zarr_dandiset.upload() @@ -203,7 +232,9 @@ def test_upload_different_zarr(tmp_path, zarr_dandiset): ) -def test_upload_nonzarr_to_zarr_path(tmp_path, zarr_dandiset): +def test_upload_nonzarr_to_zarr_path( + tmp_path: Path, zarr_dandiset: SampleDandiset +) -> None: rmtree(zarr_dandiset.dspath / "sample.zarr") (zarr_dandiset.dspath / "sample.zarr").write_text("This is not a Zarr.\n") zarr_dandiset.upload(allow_any_path=True) @@ -218,7 +249,9 @@ def test_upload_nonzarr_to_zarr_path(tmp_path, zarr_dandiset): ).read_text() == "This is not a Zarr.\n" -def test_upload_zarr_to_nonzarr_path(new_dandiset, tmp_path): +def test_upload_zarr_to_nonzarr_path( + new_dandiset: SampleDandiset, tmp_path: Path +) -> None: d = new_dandiset.dandiset dspath = new_dandiset.dspath (dspath / "sample.zarr").write_text("This is not a Zarr.\n") @@ -244,7 +277,7 @@ def test_upload_zarr_to_nonzarr_path(new_dandiset, tmp_path): assert_dirtrees_eq(dspath / "sample.zarr", tmp_path / d.identifier / "sample.zarr") -def test_upload_zarr_with_empty_dir(new_dandiset): +def test_upload_zarr_with_empty_dir(new_dandiset: SampleDandiset) -> None: zarr.save( new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1) ) diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index a95f0b0d8..2dece14a1 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -1,7 +1,9 @@ import inspect import os import os.path as op +from pathlib import Path import time +from typing import Iterable, List import pytest import requests @@ -27,7 +29,7 @@ ) -def test_find_files(): +def test_find_files() -> None: tests_dir = op.dirname(__file__) proj_dir = op.normpath(op.join(op.dirname(__file__), op.pardir)) @@ -57,36 +59,37 @@ def test_find_files(): # TODO: more tests -def test_find_files_dotfiles(tmpdir): - tmpsubdir = tmpdir.mkdir("subdir") +def test_find_files_dotfiles(tmp_path: Path) -> None: + tmpsubdir = tmp_path / "subdir" + tmpsubdir.mkdir() for p in (".dot.nwb", "regular", ".git"): - for f in (tmpdir / p, tmpsubdir / p): - f.write_text("", "utf-8") + for f in (tmp_path / p, tmpsubdir / p): + f.touch() - def relpaths(paths): - return sorted(op.relpath(p, tmpdir) for p in paths) + def relpaths(paths: Iterable[str]) -> List[str]: + return sorted(op.relpath(p, tmp_path) for p in paths) regular = ["regular", op.join("subdir", "regular")] dotfiles = [".dot.nwb", op.join("subdir", ".dot.nwb")] vcs = [".git", op.join("subdir", ".git")] - ff = find_files(".*", tmpdir) + ff = find_files(".*", tmp_path) assert relpaths(ff) == regular - ff = find_files(".*", tmpdir, exclude_dotfiles=False) + ff = find_files(".*", tmp_path, exclude_dotfiles=False) # we still exclude VCS assert relpaths(ff) == sorted(regular + dotfiles) # current VCS are also dot files - ff = find_files(".*", tmpdir, exclude_vcs=False) + ff = find_files(".*", tmp_path, exclude_vcs=False) assert relpaths(ff) == regular # current VCS are also dot files - ff = find_files(".*", tmpdir, exclude_vcs=False, exclude_dotfiles=False) + ff = find_files(".*", tmp_path, exclude_vcs=False, exclude_dotfiles=False) assert relpaths(ff) == sorted(regular + dotfiles + vcs) -def test_times_manipulations(): +def test_times_manipulations() -> None: t0 = get_utcnow_datetime() t0_isoformat = ensure_strtime(t0) t0_str = ensure_strtime(t0, isoformat=False) @@ -116,13 +119,13 @@ def test_times_manipulations(): @pytest.mark.parametrize( "t", ["2018-09-26 17:29:17.000000-07:00", "2018-09-26 17:29:17-07:00"] ) -def test_time_samples(t): +def test_time_samples(t: str) -> None: assert is_same_time( ensure_datetime(t), "2018-09-27 00:29:17-00:00", tolerance=0 ) # exactly the same -def test_flatten(): +def test_flatten() -> None: assert inspect.isgenerator(flatten([1])) # flattened is just a list() around flatten assert flattened([1, [2, 3, [4]], 5, (i for i in range(2))]) == [ @@ -140,7 +143,7 @@ def test_flatten(): @responses.activate -def test_get_instance_dandi_with_api(): +def test_get_instance_dandi_with_api() -> None: responses.add( responses.GET, f"{redirector_base}/server-info", @@ -163,7 +166,7 @@ def test_get_instance_dandi_with_api(): @responses.activate -def test_get_instance_url(): +def test_get_instance_url() -> None: responses.add( responses.GET, "https://example.dandi/server-info", @@ -186,7 +189,7 @@ def test_get_instance_url(): @responses.activate -def test_get_instance_cli_version_too_old(): +def test_get_instance_cli_version_too_old() -> None: responses.add( responses.GET, "https://example.dandi/server-info", @@ -210,7 +213,7 @@ def test_get_instance_cli_version_too_old(): @responses.activate -def test_get_instance_bad_cli_version(): +def test_get_instance_bad_cli_version() -> None: responses.add( responses.GET, "https://example.dandi/server-info", @@ -234,7 +237,7 @@ def test_get_instance_bad_cli_version(): @responses.activate -def test_get_instance_id_bad_response(): +def test_get_instance_id_bad_response() -> None: responses.add( responses.GET, f"{redirector_base}/server-info", @@ -245,7 +248,8 @@ def test_get_instance_id_bad_response(): @responses.activate -def test_get_instance_known_url_bad_response(): +def test_get_instance_known_url_bad_response() -> None: + assert redirector_base is not None responses.add( responses.GET, f"{redirector_base}/server-info", @@ -256,7 +260,7 @@ def test_get_instance_known_url_bad_response(): @responses.activate -def test_get_instance_unknown_url_bad_response(): +def test_get_instance_unknown_url_bad_response() -> None: responses.add( responses.GET, "https://dandi.nil/server-info", @@ -272,7 +276,7 @@ def test_get_instance_unknown_url_bad_response(): @responses.activate -def test_get_instance_bad_version_from_server(): +def test_get_instance_bad_version_from_server() -> None: responses.add( responses.GET, "https://example.dandi/server-info", @@ -296,7 +300,7 @@ def test_get_instance_bad_version_from_server(): assert "foobar" in str(excinfo.value) -def test_get_instance_actual_dandi(): +def test_get_instance_actual_dandi() -> None: inst = get_instance("dandi") assert inst.api is not None @@ -309,7 +313,7 @@ def test_get_instance_actual_dandi(): @pytest.mark.redirector @using_docker -def test_server_info(): +def test_server_info() -> None: r = requests.get(f"{redirector_base}/server-info") r.raise_for_status() data = r.json() @@ -320,7 +324,7 @@ def test_server_info(): assert "services" in data -def test_get_module_version(): +def test_get_module_version() -> None: import pynwb import dandi @@ -352,5 +356,5 @@ def test_get_module_version(): ("foo.txz", "application/x-xz"), ], ) -def test_get_mime_type(filename, mtype): +def test_get_mime_type(filename: str, mtype: str) -> None: assert get_mime_type(filename) == mtype diff --git a/dandi/upload.py b/dandi/upload.py index 580146949..1c9d31ccb 100644 --- a/dandi/upload.py +++ b/dandi/upload.py @@ -1,36 +1,49 @@ +from collections import defaultdict from functools import reduce import os.path from pathlib import Path import re +import sys import time +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Set, Union import click from . import lgr from .consts import DRAFT, dandiset_identifier_regex, dandiset_metadata_file from .exceptions import NotFoundError -from .files import DandisetMetadataFile, LocalAsset, find_dandi_files +from .files import DandiFile, DandisetMetadataFile, LocalAsset, find_dandi_files from .utils import ensure_datetime, get_instance, pluralize +if TYPE_CHECKING: + if sys.version_info >= (3, 8): + from typing import TypedDict + else: + from typing_extensions import TypedDict + + class Uploaded(TypedDict): + size: int + errors: List[str] + def upload( - paths, - existing="refresh", - validation="require", - dandiset_path=None, - dandi_instance="dandi", - allow_any_path=False, - upload_dandiset_metadata=False, - devel_debug=False, - jobs=None, - jobs_per_file=None, - sync=False, -): + paths: List[Union[str, Path]], + existing: str = "refresh", + validation: str = "require", + dandiset_path: Union[str, Path, None] = None, + dandi_instance: str = "dandi", + allow_any_path: bool = False, + upload_dandiset_metadata: bool = False, + devel_debug: bool = False, + jobs: Optional[int] = None, + jobs_per_file: Optional[int] = None, + sync: bool = False, +) -> None: from .dandiapi import DandiAPIClient from .dandiset import APIDandiset, Dandiset - dandiset = Dandiset.find(dandiset_path) - if not dandiset: + dandiset_ = Dandiset.find(dandiset_path) + if not dandiset_: raise RuntimeError( f"Found no {dandiset_metadata_file} anywhere. " "Use 'dandi download' or 'organize' first" @@ -44,7 +57,7 @@ def upload( client.check_schema_version() client.dandi_authenticate() - dandiset = APIDandiset(dandiset.path) # "cast" to a new API based dandiset + dandiset = APIDandiset(dandiset_.path) # "cast" to a new API based dandiset ds_identifier = dandiset.identifier remote_dandiset = client.get_dandiset(ds_identifier, DRAFT) @@ -84,17 +97,16 @@ def upload( # we could limit the number of them until # https://github.com/pyout/pyout/issues/87 # properly addressed - process_paths = set() - from collections import defaultdict + process_paths: Set[str] = set() - uploaded_paths = defaultdict(lambda: {"size": 0, "errors": []}) + uploaded_paths: Dict[str, Uploaded] = defaultdict(lambda: {"size": 0, "errors": []}) - def skip_file(msg): + def skip_file(msg: Any) -> dict: return {"status": "skipped", "message": str(msg)} # TODO: we might want to always yield a full record so no field is not # provided to pyout to cause it to halt - def process_path(dfile): + def process_path(dfile: DandiFile) -> Iterator[dict]: """ Parameters @@ -149,11 +161,13 @@ def process_path(dfile): # online. if upload_dandiset_metadata: yield {"status": "updating metadata"} + assert dandiset.metadata is not None remote_dandiset.set_raw_metadata(dandiset.metadata) yield {"status": "updated metadata"} else: yield skip_file("should be edited online") return + assert isinstance(dfile, LocalAsset) # # Compute checksums @@ -170,6 +184,7 @@ def process_path(dfile): except NotFoundError: extant = None else: + assert extant is not None metadata = extant.get_raw_metadata() local_mtime = dfile.modified remote_mtime_str = metadata.get("blobDateModified") @@ -274,7 +289,7 @@ def process_path(dfile): # for the upload speeds we need to provide a custom aggregate t0 = time.time() - def upload_agg(*ignored): + def upload_agg(*ignored: Any) -> str: dt = time.time() - t0 # to help avoiding dict length changes during upload # might be not a proper solution @@ -300,6 +315,7 @@ def upload_agg(*ignored): process_paths.add(str(dfile.filepath)) + rec: Dict[Any, Any] if isinstance(dfile, DandisetMetadataFile): rec = {"path": dandiset_metadata_file} else: @@ -318,11 +334,11 @@ def upload_agg(*ignored): out(rec) if sync: - relpaths = [] + relpaths: List[str] = [] for p in original_paths: rp = os.path.relpath(p, dandiset.path) relpaths.append("" if rp == "." else rp) - path_prefix = reduce(os.path.commonprefix, relpaths) + path_prefix = reduce(os.path.commonprefix, relpaths) # type: ignore[arg-type] to_delete = [] for asset in remote_dandiset.get_assets_with_path_prefix(path_prefix): if ( diff --git a/dandi/utils.py b/dandi/utils.py index d3de0c9db..47af06b03 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -1,11 +1,4 @@ import datetime - -try: - from importlib.metadata import version as importlib_version -except ImportError: - # TODO - remove whenever python >= 3.8 - from importlib_metadata import version as importlib_version - import inspect import io import itertools @@ -19,20 +12,34 @@ import subprocess import sys import types -from typing import Iterable, Iterator, List, Optional, TypeVar, Union +from typing import ( + Any, + Iterable, + Iterator, + List, + Optional, + Set, + TextIO, + Tuple, + Type, + TypeVar, + Union, +) import dateutil.parser import requests import ruamel.yaml from semantic_version import Version -# -# Additional handlers -# from . import __version__, get_logger from .consts import DandiInstance, known_instances, known_instances_rev from .exceptions import BadCliVersionError, CliVersionTooOldError +if sys.version_info >= (3, 8): + from importlib.metadata import version as importlib_version +else: + from importlib_metadata import version as importlib_version + lgr = get_logger() _sys_excepthook = sys.excepthook # Just in case we ever need original one @@ -58,21 +65,25 @@ ) -def is_interactive(): +def is_interactive() -> bool: """Return True if all in/outs are tty""" # TODO: check on windows if hasattr check would work correctly and add value: # return sys.stdin.isatty() and sys.stdout.isatty() and sys.stderr.isatty() -def setup_exceptionhook(ipython=False): +def setup_exceptionhook(ipython: bool = False) -> None: """Overloads default sys.excepthook with our exceptionhook handler. If interactive, our exceptionhook handler will invoke pdb.post_mortem; if not interactive, then invokes default handler. """ - def _pdb_excepthook(type, value, tb): + def _pdb_excepthook( + type: Type[BaseException], + value: BaseException, + tb: Optional[types.TracebackType], + ) -> None: import traceback traceback.print_exception(type, value, tb) @@ -94,7 +105,7 @@ def _pdb_excepthook(type, value, tb): sys.excepthook = _pdb_excepthook -def get_utcnow_datetime(microseconds=True): +def get_utcnow_datetime(microseconds: bool = True) -> datetime.datetime: """Return current time as datetime with time zone information. Microseconds are stripped away. @@ -108,7 +119,11 @@ def get_utcnow_datetime(microseconds=True): return ret.replace(microsecond=0) -def is_same_time(*times, tolerance=1e-6, strip_tzinfo=False): +def is_same_time( + *times: Union[datetime.datetime, int, float, str], + tolerance: float = 1e-6, + strip_tzinfo: bool = False, +) -> bool: """Helper to do comparison between time points Time zone information gets stripped @@ -137,7 +152,9 @@ def is_same_time(*times, tolerance=1e-6, strip_tzinfo=False): ) -def ensure_strtime(t, isoformat=True): +def ensure_strtime( + t: Union[str, int, float, datetime.datetime], isoformat: bool = True +) -> str: """Ensures that time is a string in iso format Note: if `t` is already a string, no conversion of any kind is done. @@ -160,7 +177,7 @@ def ensure_strtime(t, isoformat=True): raise TypeError(f"Do not know how to convert {t_orig!r} to string datetime") -def fromisoformat(t): +def fromisoformat(t: str) -> datetime.datetime: # datetime.fromisoformat "does not support parsing arbitrary ISO 8601 # strings" . In # particular, it does not parse the time zone suffixes recently @@ -169,7 +186,11 @@ def fromisoformat(t): return dateutil.parser.isoparse(t) -def ensure_datetime(t, strip_tzinfo=False, tz=None): +def ensure_datetime( + t: Union[datetime.datetime, int, float, str], + strip_tzinfo: bool = False, + tz: Optional[datetime.tzinfo] = None, +) -> datetime.datetime: """Ensures that time is a datetime strip_tzinfo applies only to str records passed in @@ -197,7 +218,7 @@ def ensure_datetime(t, strip_tzinfo=False, tz=None): # # Generic # -def flatten(it): +def flatten(it: Iterable) -> Iterator: """Yield items flattened if list, tuple or a generator""" for i in it: if isinstance(i, (list, tuple)) or inspect.isgenerator(i): @@ -206,7 +227,7 @@ def flatten(it): yield i -def flattened(it): +def flattened(it: Iterable) -> list: """Return list with items flattened if list, tuple or a generator""" return list(flatten(it)) @@ -216,7 +237,7 @@ def flattened(it): # -def load_jsonl(filename): +def load_jsonl(filename: Union[str, Path]) -> list: """Load json lines formatted file""" import json @@ -232,16 +253,19 @@ def load_jsonl(filename): _DATALAD_REGEX = r"%s\.(?:datalad)(?:%s|$)" % (_encoded_dirsep, _encoded_dirsep) +AnyPath = Union[str, Path] + + def find_files( - regex, - paths=os.curdir, - exclude=None, - exclude_dotfiles=True, - exclude_dotdirs=True, - exclude_vcs=True, - exclude_datalad=False, - dirs=False, -): + regex: str, + paths: Union[List[AnyPath], Tuple[AnyPath, ...], Set[AnyPath], AnyPath] = os.curdir, + exclude: Optional[str] = None, + exclude_dotfiles: bool = True, + exclude_dotdirs: bool = True, + exclude_vcs: bool = True, + exclude_datalad: bool = False, + dirs: bool = False, +) -> Iterator[str]: """Generator to find files matching regex Parameters @@ -264,7 +288,7 @@ def find_files( Whether to match directories as well as files """ - def exclude_path(path): + def exclude_path(path: str) -> bool: path = path.rstrip(op.sep) if exclude and re.search(exclude, path): return True @@ -274,8 +298,8 @@ def exclude_path(path): return True return False - def good_file(path): - return re.search(regex, path) and not exclude_path(path) + def good_file(path: str) -> bool: + return bool(re.search(regex, path)) and not exclude_path(path) if isinstance(paths, (list, tuple, set)): for path in paths: @@ -290,22 +314,22 @@ def good_file(path): exclude_datalad=exclude_datalad, dirs=dirs, ) - elif good_file(path): - yield path + elif good_file(str(path)): + yield str(path) else: # Provided path didn't match regex, thus excluded pass return elif op.isfile(paths): - if good_file(paths): - yield paths + if good_file(str(paths)): + yield str(paths) return for dirpath, dirnames, filenames in os.walk(paths): names = (dirnames + filenames) if dirs else filenames # TODO: might want to uniformize on windows to use '/' if exclude_dotfiles: - names = (n for n in names if not n.startswith(".")) + names = [n for n in names if not n.startswith(".")] if exclude_dotdirs: # and we should filter out directories from dirnames # Since we need to del which would change index, let's @@ -313,15 +337,15 @@ def good_file(path): for i in range(len(dirnames))[::-1]: if dirnames[i].startswith("."): del dirnames[i] - paths = (op.join(dirpath, name) for name in names) - for path in filter(re.compile(regex).search, paths): - if not exclude_path(path): - if op.islink(path) and op.isdir(path): + strpaths = [op.join(dirpath, name) for name in names] + for p in filter(re.compile(regex).search, strpaths): + if not exclude_path(p): + if op.islink(p) and op.isdir(p): lgr.warning( "%s: Ignoring unsupported symbolic link to directory", path ) else: - yield path + yield p def list_paths(dirpath: Union[str, Path], dirs: bool = False) -> List[Path]: @@ -331,7 +355,7 @@ def list_paths(dirpath: Union[str, Path], dirs: bool = False) -> List[Path]: _cp_supports_reflink: Optional[bool] = None -def copy_file(src, dst): +def copy_file(src: Union[str, Path], dst: Union[str, Path]) -> None: """Copy file from src to dst""" global _cp_supports_reflink if _cp_supports_reflink is None: @@ -349,15 +373,17 @@ def copy_file(src, dst): ["cp", "-f", "--reflink=auto", "--", str(src), str(dst)], check=True ) else: - return shutil.copy2(src, dst) + shutil.copy2(src, dst) -def move_file(src, dst): +def move_file(src: Union[str, Path], dst: Union[str, Path]) -> Any: """Move file from src to dst""" - return shutil.move(src, dst) + return shutil.move(str(src), str(dst)) -def find_parent_directory_containing(filename, path=None): +def find_parent_directory_containing( + filename: Union[str, Path], path: Union[str, Path, None] = None +) -> Optional[Path]: """Find a directory, on the path to 'path' containing filename if no 'path' - path from cwd @@ -376,7 +402,7 @@ def find_parent_directory_containing(filename, path=None): path = path.parent # go up -def yaml_dump(rec): +def yaml_dump(rec: Any) -> str: """Consistent dump into yaml Of primary importance is default_flow_style=False @@ -390,7 +416,7 @@ def yaml_dump(rec): return out.getvalue() -def yaml_load(f, typ=None): +def yaml_load(f: Union[str, TextIO], typ: Optional[str] = None) -> Any: """ Load YAML source from a file or string. @@ -415,12 +441,12 @@ def yaml_load(f, typ=None): # -def with_pathsep(path): +def with_pathsep(path: str) -> str: """Little helper to guarantee that path ends with /""" return path + op.sep if not path.endswith(op.sep) else path -def _get_normalized_paths(path, prefix): +def _get_normalized_paths(path: str, prefix: str) -> Tuple[str, str]: if op.isabs(path) != op.isabs(prefix): raise ValueError( "Both paths must either be absolute or relative. " @@ -431,7 +457,7 @@ def _get_normalized_paths(path, prefix): return path, prefix -def path_is_subpath(path, prefix): +def path_is_subpath(path: str, prefix: str) -> bool: """Return True if path is a subpath of prefix It will return False if path == prefix. @@ -445,7 +471,7 @@ def path_is_subpath(path, prefix): return (len(prefix) < len(path)) and path.startswith(prefix) -def shortened_repr(value, length=30): +def shortened_repr(value: Any, length: int = 30) -> str: try: if hasattr(value, "__repr__") and (value.__repr__ is not object.__repr__): value_repr = repr(value) @@ -468,8 +494,8 @@ def shortened_repr(value, length=30): return value_repr -def __auto_repr__(obj): - attr_names = tuple() +def __auto_repr__(obj: Any) -> str: + attr_names: Tuple[str, ...] = () if hasattr(obj, "__dict__"): attr_names += tuple(obj.__dict__.keys()) if hasattr(obj, "__slots__"): @@ -489,7 +515,10 @@ def __auto_repr__(obj): return "%s(%s)" % (obj.__class__.__name__, ", ".join(items)) -def auto_repr(cls): +TT = TypeVar("TT", bound=type) + + +def auto_repr(cls: TT) -> TT: """Decorator for a class to assign it an automagic quick and dirty __repr__ It uses public class attributes to prepare repr of a class @@ -497,11 +526,11 @@ def auto_repr(cls): Original idea: http://stackoverflow.com/a/27799004/1265472 """ - cls.__repr__ = __auto_repr__ + cls.__repr__ = __auto_repr__ # type: ignore[assignment] return cls -def Parallel(**kwargs): # TODO: disable lint complaint +def Parallel(**kwargs: Any) -> Any: # TODO: disable lint complaint """Adapter for joblib.Parallel so we could if desired, centralize control""" # ATM just a straight invocation import joblib @@ -517,7 +546,7 @@ def delayed(*args, **kwargs): return joblib.delayed(*args, **kwargs) -def get_instance(dandi_instance_id): +def get_instance(dandi_instance_id: str) -> DandiInstance: if dandi_instance_id.lower().startswith(("http://", "https://")): redirector_url = dandi_instance_id dandi_id = known_instances_rev.get(redirector_url) @@ -527,9 +556,10 @@ def get_instance(dandi_instance_id): instance = None else: instance = known_instances[dandi_instance_id] - redirector_url = instance.redirector - if redirector_url is None: + if instance.redirector is None: return instance + else: + redirector_url = instance.redirector try: r = requests.get(redirector_url.rstrip("/") + "/server-info") r.raise_for_status() @@ -579,7 +609,7 @@ def get_instance(dandi_instance_id): ) -def is_url(s): +def is_url(s: str) -> bool: """Very primitive url detection for now TODO: redo @@ -677,7 +707,7 @@ def get_mime_type(filename: str, strict: bool = False) -> str: return "application/x-" + encoding -def check_dandi_version(): +def check_dandi_version() -> None: if os.environ.get("DANDI_NO_ET"): return try: diff --git a/dandi/validate.py b/dandi/validate.py index 8f49c6ddb..34736873a 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,8 +1,15 @@ +from typing import Iterator, List, Optional, Tuple + from .files import find_dandi_files # TODO: provide our own "errors" records, which would also include warnings etc -def validate(*paths, schema_version=None, devel_debug=False, allow_any_path=False): +def validate( + *paths: str, + schema_version: Optional[str] = None, + devel_debug: bool = False, + allow_any_path: bool = False, +) -> Iterator[Tuple[str, List[str]]]: """Validate content Parameters @@ -17,7 +24,7 @@ def validate(*paths, schema_version=None, devel_debug=False, allow_any_path=Fals """ for df in find_dandi_files(*paths, dandiset_path=None, allow_all=allow_any_path): yield ( - df.filepath, + str(df.filepath), df.get_validation_errors( schema_version=schema_version, devel_debug=devel_debug ),