From 719c2f1f5d0d59c2d854fb4f33bd3b63d0bdc8bc Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 25 Jan 2022 09:49:46 -0500 Subject: [PATCH 1/4] Add type-checking with mypy --- setup.cfg | 31 +++++++++++++++++++++++++++++++ tox.ini | 11 ++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 73f0290c1..6474fade5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -126,3 +126,34 @@ parentdir_prefix = skip = dandi/_version.py,dandi/due.py,versioneer.py # Don't warn about "[l]ist" in the abbrev_prompt() docstring: ignore-regex = \[\w\]\w+ + +[mypy] +# TODO: Eventually uncomment these: +#allow_untyped_defs = False +#implicit_reexport = False +allow_incomplete_defs = False +ignore_missing_imports = True +no_implicit_optional = True +local_partial_types = True +pretty = True +show_error_codes = True +show_traceback = True +strict_equality = True +warn_redundant_casts = True +warn_return_any = True +warn_unreachable = True +plugins = pydantic.mypy +exclude = _version\.py|due\.py + +[mypy-dandi._version] +follow_imports = skip + +[mypy-dandi.due] +follow_imports = skip + +[mypy-ruamel.yaml] +implicit_reexport = True + +[pydantic-mypy] +init_forbid_extra = True +warn_untypes_fields = True diff --git a/tox.ini b/tox.ini index 9c6850357..a87a4831d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = lint,py3 +envlist = lint,typing,py3 [testenv] setenv = @@ -23,6 +23,15 @@ commands = codespell dandi setup.py flake8 --config=setup.cfg {posargs} dandi setup.py +[testenv:typing] +deps = + mypy~=0.900 + types-appdirs + types-python-dateutil + types-requests +commands = + mypy dandi + [testenv:docs] basepython = python3 deps = -rdocs/requirements.txt From c90e062435b82d9e7088ebd60ae8f18f0f6683de Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 25 Jan 2022 10:41:52 -0500 Subject: [PATCH 2/4] Get the annotations so far to type-check --- dandi/cli/base.py | 4 ++- dandi/consts.py | 9 +++++-- dandi/dandiapi.py | 41 +++++++++++++++++++----------- dandi/dandiarchive.py | 31 +++++++++++++---------- dandi/delete.py | 23 +++++++++++++---- dandi/download.py | 43 ++++++++++++++++++------------- dandi/files.py | 49 +++++++++++++++++++++++------------- dandi/metadata.py | 29 +++++++++++++-------- dandi/misctypes.py | 2 +- dandi/support/digests.py | 20 ++++++++------- dandi/tests/fixtures.py | 9 ++++--- dandi/tests/test_download.py | 2 +- dandi/utils.py | 10 +++++--- 13 files changed, 172 insertions(+), 100 deletions(-) diff --git a/dandi/cli/base.py b/dandi/cli/base.py index bf2b890d5..4d55b22f6 100644 --- a/dandi/cli/base.py +++ b/dandi/cli/base.py @@ -120,4 +120,6 @@ def wrapper(obj, *args, **kwargs): return wrapper -map_to_click_exceptions._do_map = not bool(os.environ.get("DANDI_DEVEL", None)) +map_to_click_exceptions._do_map = not bool( # type: ignore[attr-defined] + os.environ.get("DANDI_DEVEL", None) +) diff --git a/dandi/consts.py b/dandi/consts.py index 13642b3ef..9d2d87d47 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -1,6 +1,6 @@ -from collections import namedtuple from enum import Enum import os +from typing import NamedTuple, Optional #: A list of metadata fields which dandi extracts from .nwb files. #: Additional fields (such as ``number_of_*``) might be added by @@ -87,7 +87,12 @@ class EmbargoStatus(Enum): dandiset_metadata_file = "dandiset.yaml" dandiset_identifier_regex = f"^{DANDISET_ID_REGEX}$" -DandiInstance = namedtuple("DandiInstance", ("gui", "redirector", "api")) + +class DandiInstance(NamedTuple): + gui: Optional[str] + redirector: Optional[str] + api: Optional[str] + # So it could be easily mapped to external IP (e.g. from within VM) # to test against instance running outside of current environment diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index ed8daf62c..5082d699b 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -569,7 +569,10 @@ def json_dict(self) -> Dict[str, Any]: Convert to a JSONable `dict`, omitting the ``client`` attribute and using the same field names as in the API """ - return json.loads(self.json(exclude=self.JSON_EXCLUDE, by_alias=True)) + return cast( + Dict[str, Any], + json.loads(self.json(exclude=self.JSON_EXCLUDE, by_alias=True)), + ) class Config: allow_population_by_field_name = True @@ -640,7 +643,7 @@ def __init__( client: DandiAPIClient, identifier: str, version: Union[str, Version, None] = None, - data: Optional[Dict[str, Any]] = None, + data: Union[Dict[str, Any], RemoteDandisetData, None] = None, ) -> None: #: The `DandiAPIClient` instance that returned this `RemoteDandiset` #: and which the latter will use for API requests @@ -1076,9 +1079,12 @@ def upload_raw_asset( :param RemoteAsset replace_asset: If set, replace the given asset, which must have the same path as the new asset """ - from .files import dandi_file + from .files import LocalAsset, dandi_file - return dandi_file(filepath).upload( + df = dandi_file(filepath) + if not isinstance(df, LocalAsset): + raise ValueError(f"{filepath}: not an asset file") + return df.upload( self, metadata=asset_metadata, jobs=jobs, replacing=replace_asset ) @@ -1114,9 +1120,12 @@ def iter_upload_raw_asset( ``"done"`` and an ``"asset"`` key containing the resulting `RemoteAsset`. """ - from .files import dandi_file + from .files import LocalAsset, dandi_file - return dandi_file(filepath).iter_upload( + df = dandi_file(filepath) + if not isinstance(df, LocalAsset): + raise ValueError(f"{filepath}: not an asset file") + return df.iter_upload( self, metadata=asset_metadata, jobs=jobs, replacing=replace_asset ) @@ -1147,7 +1156,7 @@ class BaseRemoteAsset(APIBase): #: instead of performing an API call _metadata: Optional[Dict[str, Any]] = PrivateAttr(default_factory=None) - def __init__(self, **data: Any) -> None: + def __init__(self, **data: Any) -> None: # type: ignore[no-redef] super().__init__(**data) # Pydantic insists on not initializing any attributes that start with # underscores, so we have to do it ourselves. @@ -1172,7 +1181,7 @@ def from_metadata( identifier=metadata["identifier"], path=metadata["path"], size=metadata["contentSize"], - _metadata=metadata, + _metadata=metadata, # type: ignore[call-arg] ) @property @@ -1240,7 +1249,7 @@ def get_raw_digest( digest_type = digest_type.value metadata = self.get_raw_metadata() try: - return metadata["digest"][digest_type] + return cast(str, metadata["digest"][digest_type]) except KeyError: raise NotFoundError(f"No {digest_type} digest found in metadata") @@ -1277,6 +1286,7 @@ def get_content_url( If ``strip_query`` is true, any query parameters are removed from the final URL before returning it. """ + url: str for url in self.get_raw_metadata().get("contentUrl", []): if re.search(regex, url): break @@ -1303,7 +1313,7 @@ def get_content_url( def get_download_file_iter( self, chunk_size: int = MAX_CHUNK_SIZE - ) -> Callable[..., Iterator[bytes]]: + ) -> Callable[[int], Iterator[bytes]]: """ Returns a function that when called (optionally with an offset into the asset to start downloading at) returns a generator of chunks of the @@ -1346,7 +1356,7 @@ def download( """ downloader = self.get_download_file_iter(chunk_size=chunk_size) with open(filepath, "wb") as fp: - for chunk in downloader(): + for chunk in downloader(0): fp.write(chunk) @property @@ -1413,6 +1423,7 @@ def from_data( This is a low-level method that non-developers would normally only use when acquiring data using means outside of this library. """ + klass: Type[RemoteAsset] if data.get("blob") is not None: klass = RemoteBlobAsset if data.pop("zarr", None) is not None: @@ -1428,7 +1439,7 @@ def from_data( dandiset_id=dandiset.identifier, version_id=dandiset.version_id, **data, - _metadata=metadata, + _metadata=metadata, # type: ignore[call-arg] ) @property @@ -1589,12 +1600,12 @@ class ZarrListing(BaseModel): @property def dirnames(self) -> List[str]: """The basenames of the directory URLs in `directories`""" - return [PurePosixPath(unquote(url.path)).name for url in self.directories] + return [PurePosixPath(unquote(url.path or "")).name for url in self.directories] @property def filenames(self) -> List[str]: """The basenames of the file URLs in `files`""" - return [PurePosixPath(unquote(url.path)).name for url in self.files] + return [PurePosixPath(unquote(url.path or "")).name for url in self.files] @dataclass @@ -1794,7 +1805,7 @@ def get_listing(self) -> ZarrListing: def get_download_file_iter( self, chunk_size: int = MAX_CHUNK_SIZE - ) -> Callable[..., Iterator[bytes]]: + ) -> Callable[[int], Iterator[bytes]]: """ Returns a function that when called (optionally with an offset into the file to start downloading at) returns a generator of chunks of the file diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index f89464b0a..abe0e68a4 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -25,11 +25,13 @@ using the `navigate_url()` function. """ +from __future__ import annotations + from abc import ABC, abstractmethod from contextlib import contextmanager import re from time import sleep -from typing import Iterable, Iterator, Optional, Tuple +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, cast from urllib.parse import unquote as urlunquote from pydantic import AnyHttpUrl, BaseModel, parse_obj_as, validator @@ -72,7 +74,7 @@ class ParsedDandiURL(ABC, BaseModel): @validator("api_url") def _validate_api_url(cls, v: AnyHttpUrl) -> AnyHttpUrl: - return parse_obj_as(AnyHttpUrl, v.rstrip("/")) + return cast(AnyHttpUrl, parse_obj_as(AnyHttpUrl, v.rstrip("/"))) def get_client(self) -> DandiAPIClient: """ @@ -178,9 +180,9 @@ def get_assets( ) -> Iterator[BaseRemoteAsset]: """Returns all assets in the Dandiset""" with _maybe_strict(strict): - yield from self.get_dandiset(client, lazy=not strict).get_assets( - order=order - ) + d = self.get_dandiset(client, lazy=not strict) + assert d is not None + yield from d.get_assets(order=order) class SingleAssetURL(ParsedDandiURL): @@ -256,7 +258,9 @@ def get_assets( nothing is yielded if ``strict`` is false. """ with _maybe_strict(strict): - yield self.get_dandiset(client, lazy=not strict).get_asset(self.asset_id) + d = self.get_dandiset(client, lazy=not strict) + assert d is not None + yield d.get_asset(self.asset_id) def get_asset_ids(self, client: DandiAPIClient) -> Iterator[str]: """Yields the ID of the asset (regardless of whether it exists)""" @@ -273,9 +277,9 @@ def get_assets( ) -> Iterator[BaseRemoteAsset]: """Returns the assets whose paths start with `path`""" with _maybe_strict(strict): - yield from self.get_dandiset( - client, lazy=not strict - ).get_assets_with_path_prefix(self.path, order=order) + d = self.get_dandiset(client, lazy=not strict) + assert d is not None + yield from d.get_assets_with_path_prefix(self.path, order=order) class AssetItemURL(SingleAssetURL): @@ -297,6 +301,7 @@ def get_assets( """ try: dandiset = self.get_dandiset(client, lazy=not strict) + assert dandiset is not None # Force evaluation of the version here instead of when # get_asset_by_path() is called so we don't get nonexistent # dandisets with unspecified versions mixed up with nonexistent @@ -343,9 +348,9 @@ def get_assets( if not path.endswith("/"): path += "/" with _maybe_strict(strict): - yield from self.get_dandiset( - client, lazy=not strict - ).get_assets_with_path_prefix(path, order=order) + d = self.get_dandiset(client, lazy=not strict) + assert d is not None + yield from d.get_assets_with_path_prefix(path, order=order) @contextmanager @@ -403,7 +408,7 @@ class _dandi_url_parser: dandiset_id_grp = f"(?P{DANDISET_ID_REGEX})" # Should absorb port and "api/": server_grp = "(?P(?Phttps?)://(?P[^/]+)/(api/)?)" - known_urls = [ + known_urls: List[Tuple[re.Pattern[str], Dict[str, Any], str]] = [ # List of (regex, settings, display string) triples # # Settings: diff --git a/dandi/delete.py b/dandi/delete.py index 856e48be9..8b4acf50b 100644 --- a/dandi/delete.py +++ b/dandi/delete.py @@ -7,7 +7,7 @@ from .consts import DRAFT, dandiset_metadata_file from .dandiapi import DandiAPIClient, RemoteAsset, RemoteDandiset -from .dandiarchive import DandisetURL, ParsedDandiURL, parse_dandi_url +from .dandiarchive import BaseAssetIDURL, DandisetURL, ParsedDandiURL, parse_dandi_url from .exceptions import NotFoundError from .utils import get_instance, is_url @@ -47,8 +47,10 @@ def set_dandiset(self, api_url: str, dandiset_id: str) -> bool: raise elif not is_same_url(self.client.api_url, api_url): raise ValueError("Cannot delete assets from multiple API instances at once") - elif self.dandiset.identifier != dandiset_id: - raise ValueError("Cannot delete assets from multiple Dandisets at once") + else: + assert self.dandiset is not None + if self.dandiset.identifier != dandiset_id: + raise ValueError("Cannot delete assets from multiple Dandisets at once") return True def add_asset(self, asset: RemoteAsset) -> None: @@ -67,6 +69,7 @@ def register_asset( ) -> None: if not self.set_dandiset(api_url, dandiset_id): return + assert self.dandiset is not None try: asset = self.dandiset.get_asset_by_path(asset_path) except NotFoundError: @@ -84,6 +87,7 @@ def register_asset_folder( if not self.set_dandiset(api_url, dandiset_id): return any_assets = False + assert self.dandiset is not None for asset in self.dandiset.get_assets_with_path_prefix(folder_path): self.add_asset(asset) any_assets = True @@ -93,10 +97,15 @@ def register_asset_folder( ) def register_assets_url(self, url: str, parsed_url: ParsedDandiURL) -> None: + if isinstance(parsed_url, BaseAssetIDURL): + raise ValueError("Cannot delete an asset identified by just an ID") + assert parsed_url.dandiset_id is not None if not self.set_dandiset(parsed_url.api_url, parsed_url.dandiset_id): return any_assets = False + assert self.client is not None for a in parsed_url.get_assets(self.client): + assert isinstance(a, RemoteAsset) self.add_asset(a) any_assets = True if not any_assets and not self.skip_missing: @@ -110,6 +119,7 @@ def register_url(self, url: str) -> None: "Dandi API server does not support deletion of individual" " versions of a dandiset" ) + assert parsed_url.dandiset_id is not None self.register_dandiset(parsed_url.api_url, parsed_url.dandiset_id) else: if parsed_url.version_id is None: @@ -128,6 +138,8 @@ def register_local_path_equivalent(self, instance_name: str, filepath: str) -> N self.register_asset(api_url, dandiset_id, DRAFT, asset_path) def confirm(self) -> bool: + if self.dandiset is None: + raise ValueError("confirm() called before registering anything to delete") if self.deleting_dandiset: msg = f"Delete Dandiset {self.dandiset.identifier}?" else: @@ -139,6 +151,7 @@ def confirm(self) -> bool: def delete_dandiset(self) -> None: if self.deleting_dandiset: + assert self.dandiset is not None self.dandiset.delete() else: raise RuntimeError( @@ -196,8 +209,8 @@ def delete( style=pyout_style, columns=rec_fields, max_workers=jobs ) with out: - for gen in deleter.process_assets_pyout(): - out(gen) + for r in deleter.process_assets_pyout(): + out(r) def find_local_asset(filepath: str) -> Tuple[str, str]: diff --git a/dandi/download.py b/dandi/download.py index fd8c68996..33ab159d9 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1,5 +1,8 @@ +from __future__ import annotations + from collections import Counter, deque from dataclasses import dataclass, field +from datetime import datetime from enum import Enum from functools import partial import hashlib @@ -454,16 +457,16 @@ def _populate_dandiset_yaml(dandiset_path, dandiset, existing): def _download_file( - downloader, - path, - toplevel_path, - lock, - size=None, - mtime=None, - existing="error", - digests=None, + downloader: Callable[[int], Iterator[bytes]], + path: str, + toplevel_path: str, + lock: Lock, + size: Optional[int] = None, + mtime: Optional[datetime] = None, + existing: str = "error", + digests: Optional[Dict[str, str]] = None, digest_callback: Optional[Callable[[str, str], Any]] = None, -): +) -> Iterator[dict]: """ Common logic for downloading a single file. @@ -492,10 +495,9 @@ def _download_file( will be used to verify download """ if op.lexists(path): - block = f"File {path!r} already exists" annex_path = op.join(toplevel_path, ".git", "annex") if existing == "error": - raise FileExistsError(block) + raise FileExistsError(f"File {path!r} already exists") elif existing == "skip": yield _skip_file("already exists") return @@ -525,13 +527,15 @@ def _download_file( path, ) elif ( - "dandi-etag" in digests + digests is not None + and "dandi-etag" in digests and get_digest(path, "dandi-etag") == digests["dandi-etag"] ): yield _skip_file("already exists") return elif ( - "dandi-etag" not in digests + digests is not None + and "dandi-etag" not in digests and "md5" in digests and get_digest(path, "md5") == digests["md5"] ): @@ -612,12 +616,13 @@ def _download_file( # Problems will result if `size` is None but we've already # downloaded everything. break - for block in downloader(start_at=dldir.offset): + for block in downloader(dldir.offset): if digester: + assert downloaded_digest is not None downloaded_digest.update(block) downloaded += len(block) # TODO: yield progress etc - msg = {"done": downloaded} + out = {"done": downloaded} if size: if downloaded > size and not warned: warned = True @@ -627,9 +632,9 @@ def _download_file( downloaded, size, ) - msg["done%"] = 100 * downloaded / size if size else "100" + out["done%"] = 100 * downloaded / size if size else "100" # TODO: ETA etc - yield msg + yield out dldir.append(block) break except requests.exceptions.HTTPError as exc: @@ -654,8 +659,10 @@ def _download_file( time.sleep(random.random() * 5) if downloaded_digest and not resuming: + assert downloaded_digest is not None downloaded_digest = downloaded_digest.hexdigest() # we care only about hex if digest_callback is not None: + assert isinstance(algo, str) digest_callback(algo, downloaded_digest) if digest != downloaded_digest: msg = f"{algo}: downloaded {downloaded_digest} != {digest}" @@ -816,7 +823,7 @@ def digest_callback(path: str, algoname: str, d: str) -> None: remote_paths = set(map(str, entries)) zarr_basepath = Path(download_path) dirs = deque([zarr_basepath]) - empty_dirs = deque() + empty_dirs: deque[Path] = deque() while dirs: d = dirs.popleft() is_empty = True diff --git a/dandi/files.py b/dandi/files.py index 56bedf224..7746b185e 100644 --- a/dandi/files.py +++ b/dandi/files.py @@ -20,7 +20,17 @@ from pathlib import Path import re from threading import Lock -from typing import Any, BinaryIO, Dict, Generic, Iterator, List, Optional, Union +from typing import ( + Any, + BinaryIO, + ClassVar, + Dict, + Generic, + Iterator, + List, + Optional, + Union, +) from xml.etree.ElementTree import fromstring from dandischema.digests.dandietag import DandiETag @@ -46,7 +56,7 @@ from .misctypes import DUMMY_DIGEST, BasePath, Digest, P from .pynwb_utils import validate as pynwb_validate from .support.digests import get_dandietag, get_digest, get_zarr_checksum -from .utils import chunked, ensure_datetime, pluralize, yaml_load +from .utils import chunked, pluralize, yaml_load lgr = get_logger() @@ -55,7 +65,7 @@ _required_nwb_metadata_fields = ["subject_id"] -@dataclass +@dataclass # type: ignore[misc] # class DandiFile(ABC): """Abstract base class for local files & directories of interest to DANDI""" @@ -71,7 +81,7 @@ def size(self) -> int: def modified(self) -> datetime: """The time at which the file was last modified""" # TODO: Should this be overridden for LocalDirectoryAsset? - return ensure_datetime(self.filepath.stat().st_mtime) + return datetime.fromtimestamp(self.filepath.stat().st_mtime).astimezone() @abstractmethod def get_metadata( @@ -150,7 +160,7 @@ def get_validation_errors( return [] -@dataclass +@dataclass # type: ignore[misc] # class LocalAsset(DandiFile): """ Representation of a file or directory that can be uploaded to a DANDI @@ -246,7 +256,9 @@ def upload( dandiset, metadata, jobs=jobs, replacing=replacing ): if status["status"] == "done": - return status["asset"] + a = status["asset"] + assert isinstance(a, RemoteAsset) + return a raise AssertionError("iter_upload() finished without returning 'done'") @abstractmethod @@ -286,6 +298,8 @@ class LocalFileAsset(LocalAsset): an asset of a Dandiset """ + EXTENSIONS: ClassVar[List[str]] = [] + def get_digest(self) -> Digest: """Calculate a dandi-etag digest for the asset""" value = get_digest(self.filepath, digest="dandi-etag") @@ -441,7 +455,7 @@ def iter_upload( class NWBAsset(LocalFileAsset): """Representation of a local NWB file""" - EXTENSIONS = [".nwb"] + EXTENSIONS: ClassVar[List[str]] = [".nwb"] def get_metadata( self, @@ -470,7 +484,7 @@ def get_validation_errors( schema_version: Optional[str] = None, devel_debug: bool = False, ) -> List[str]: - errors = pynwb_validate(self.filepath, devel_debug=devel_debug) + errors: List[str] = pynwb_validate(self.filepath, devel_debug=devel_debug) if schema_version is not None: errors.extend( super().get_validation_errors( @@ -503,7 +517,7 @@ class GenericAsset(LocalFileAsset): Representation of a generic regular file, one that is not of any known type """ - EXTENSIONS = [] + EXTENSIONS: ClassVar[List[str]] = [] def get_metadata( self, @@ -522,6 +536,8 @@ class LocalDirectoryAsset(LocalAsset, Generic[P]): `dandi.misctypes.BasePath`. """ + EXTENSIONS: ClassVar[List[str]] = [] + @property @abstractmethod def filetree(self) -> P: @@ -623,7 +639,7 @@ def size(self) -> int: def modified(self) -> datetime: """The time at which the entry was last modified""" # TODO: Should this be overridden for directories? - return ensure_datetime(self.filepath.stat().st_mtime) + return datetime.fromtimestamp(self.filepath.stat().st_mtime).astimezone() @dataclass @@ -641,7 +657,7 @@ class ZarrStat: class ZarrAsset(LocalDirectoryAsset[LocalZarrEntry]): """Representation of a local Zarr directory""" - EXTENSIONS = [".ngff", ".zarr"] + EXTENSIONS: ClassVar[List[str]] = [".ngff", ".zarr"] @property def filetree(self) -> LocalZarrEntry: @@ -873,9 +889,8 @@ def find_dandi_files( (unless ``allow_all`` is true). """ - path_queue = deque() - for p in paths: - p = Path(p) + path_queue: deque[Path] = deque() + for p in map(Path, paths): if dandiset_path is not None: try: p.relative_to(dandiset_path) @@ -943,7 +958,7 @@ def dandi_file( raise UnknownAssetError("Empty directories cannot be assets") for dirclass in LocalDirectoryAsset.__subclasses__(): if filepath.suffix in dirclass.EXTENSIONS: - return dirclass(filepath=filepath, path=path) + return dirclass(filepath=filepath, path=path) # type: ignore[abstract] raise UnknownAssetError( f"Directory has unrecognized suffix {filepath.suffix!r}" ) @@ -952,8 +967,8 @@ def dandi_file( else: for fileclass in LocalFileAsset.__subclasses__(): if filepath.suffix in fileclass.EXTENSIONS: - return fileclass(filepath=filepath, path=path) - return GenericAsset(filepath=filepath, path=path) + return fileclass(filepath=filepath, path=path) # type: ignore[abstract] + return GenericAsset(filepath=filepath, path=path) def _upload_blob_part( diff --git a/dandi/metadata.py b/dandi/metadata.py index 9ccfc378c..8d4e5a1d3 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -2,8 +2,9 @@ from functools import lru_cache import os import os.path as op +from pathlib import Path import re -from typing import Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union from uuid import uuid4 from xml.dom.minidom import parseString @@ -389,7 +390,9 @@ def extract_sex(metadata): stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_exponential(exp_base=1.25, multiplier=1.25), ) -def parse_purlobourl(url: str, lookup: Optional[Tuple[str, ...]] = None): +def parse_purlobourl( + url: str, lookup: Optional[Tuple[str, ...]] = None +) -> Optional[Dict[str, str]]: """Parse an Ontobee URL to return properties of a Class node :param url: Ontobee URL @@ -527,14 +530,14 @@ def extract_wasDerivedFrom(metadata): ) -def extract_session(metadata: dict) -> list: +def extract_session(metadata: dict) -> Optional[List[models.Session]]: probe_ids = metadata.get("probe_ids", []) if isinstance(probe_ids, str): probe_ids = [probe_ids] - probes = [] - for val in probe_ids: - probes.append(models.Equipment(identifier=f"probe:{val}", name="Ecephys Probe")) - probes = probes or None + probes = [ + models.Equipment(identifier=f"probe:{val}", name="Ecephys Probe") + for val in probe_ids + ] or None session_id = None if metadata.get("session_id") is not None: session_id = str(metadata["session_id"]) @@ -789,7 +792,9 @@ def process_ndtypes(asset, nd_types): def nwb2asset( - nwb_path, digest: Optional[Digest] = None, schema_version=None + nwb_path: Union[str, Path], + digest: Optional[Digest] = None, + schema_version: Optional[str] = None, ) -> models.BareAsset: if schema_version is not None: current_version = models.get_schema_version() @@ -806,7 +811,7 @@ def nwb2asset( metadata["encodingFormat"] = "application/x-nwb" metadata["dateModified"] = get_utcnow_datetime() metadata["blobDateModified"] = ensure_datetime(os.stat(nwb_path).st_mtime) - metadata["path"] = nwb_path + metadata["path"] = str(nwb_path) if metadata["blobDateModified"] > metadata["dateModified"]: lgr.warning( "mtime %s of %s is in the future", metadata["blobDateModified"], nwb_path @@ -820,7 +825,9 @@ def nwb2asset( return asset -def get_default_metadata(path, digest: Optional[Digest] = None) -> models.BareAsset: +def get_default_metadata( + path: Union[str, Path], digest: Optional[Digest] = None +) -> models.BareAsset: start_time = datetime.now().astimezone() if digest is not None: digest_model = digest.asdict() @@ -837,7 +844,7 @@ def get_default_metadata(path, digest: Optional[Digest] = None) -> models.BareAs dateModified=dateModified, blobDateModified=blobDateModified, wasGeneratedBy=[get_generator(start_time, end_time)], - encodingFormat=get_mime_type(path), + encodingFormat=get_mime_type(str(path)), ) diff --git a/dandi/misctypes.py b/dandi/misctypes.py index 03f9d040d..ea546d5a7 100644 --- a/dandi/misctypes.py +++ b/dandi/misctypes.py @@ -55,7 +55,7 @@ def asdict(self) -> Dict[DigestType, str]: P = TypeVar("P", bound="BasePath") -@dataclass +@dataclass # type: ignore[misc] # class BasePath(ABC): """ An abstract base class for path-like objects that can be traversed with the diff --git a/dandi/support/digests.py b/dandi/support/digests.py index 504ad755d..3286b814a 100644 --- a/dandi/support/digests.py +++ b/dandi/support/digests.py @@ -12,7 +12,7 @@ import hashlib import logging from pathlib import Path -from typing import Dict, Optional +from typing import Dict, List, Optional, Union, cast from dandischema.digests.dandietag import DandiETag from dandischema.digests.zarr import get_checksum @@ -24,7 +24,7 @@ @auto_repr -class Digester(object): +class Digester: """Helper to compute multiple digests in one pass for a file""" # Loosely based on snippet by PM 2Ring 2014.10.23 @@ -35,7 +35,9 @@ class Digester(object): DEFAULT_DIGESTS = ["md5", "sha1", "sha256", "sha512"] - def __init__(self, digests=None, blocksize=1 << 16): + def __init__( + self, digests: Optional[List[str]] = None, blocksize: int = 1 << 16 + ) -> None: """ Parameters ---------- @@ -51,10 +53,10 @@ def __init__(self, digests=None, blocksize=1 << 16): self.blocksize = blocksize @property - def digests(self): + def digests(self) -> List[str]: return self._digests - def __call__(self, fpath): + def __call__(self, fpath: Union[str, Path]) -> Dict[str, str]: """ fpath : str File path for which a checksum shall be computed. @@ -80,15 +82,15 @@ def __call__(self, fpath): @checksums.memoize_path -def get_digest(filepath, digest="sha256") -> str: +def get_digest(filepath: Union[str, Path], digest: str = "sha256") -> str: if digest == "dandi-etag": - return get_dandietag(filepath).as_str() + return cast(str, get_dandietag(filepath).as_str()) else: return Digester([digest])(filepath)[digest] @checksums.memoize_path -def get_dandietag(filepath) -> DandiETag: +def get_dandietag(filepath: Union[str, Path]) -> DandiETag: return DandiETag.from_file(filepath) @@ -115,4 +117,4 @@ def get_zarr_checksum( dirs[path] = known[path] except KeyError: dirs[path] = get_zarr_checksum(p, basepath) - return get_checksum(files, dirs) + return cast(str, get_checksum(files, dirs)) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 89a1d81e2..1dd85bbb9 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -8,7 +8,7 @@ from subprocess import DEVNULL, check_output, run import tempfile from time import sleep -from typing import Any, Dict +from typing import Any, Dict, List, Optional from uuid import uuid4 from click.testing import CliRunner @@ -255,7 +255,9 @@ class DandiAPI: @property def api_url(self) -> str: - return self.instance.api + url = self.instance.api + assert isinstance(url, str) + return url @pytest.fixture(scope="session") @@ -284,7 +286,7 @@ class SampleDandiset: def client(self) -> DandiAPIClient: return self.api.client - def upload(self, paths=None, **kwargs) -> None: + def upload(self, paths: Optional[List[str]] = None, **kwargs: Any) -> None: with pytest.MonkeyPatch().context() as m: m.setenv("DANDI_API_KEY", self.api.api_key) upload( @@ -364,3 +366,4 @@ def tmp_home( monkeypatch.delenv("XDG_STATE_HOME", raising=False) monkeypatch.setenv("USERPROFILE", str(home)) monkeypatch.setenv("LOCALAPPDATA", str(home)) + return home diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index fdafdfd76..8729b038e 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -649,7 +649,7 @@ def test_progress_combiner( file_qty: int, inputs: List[Tuple[str, dict]], expected: List[dict] ) -> None: pc = ProgressCombiner(zarr_size=69105, file_qty=file_qty) - outputs = [] + outputs: List[dict] = [] for path, status in inputs: outputs.extend(pc.feed(path, status)) assert outputs == expected diff --git a/dandi/utils.py b/dandi/utils.py index dfe8b7dc6..d3de0c9db 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -328,7 +328,7 @@ def list_paths(dirpath: Union[str, Path], dirs: bool = False) -> List[Path]: return sorted(map(Path, find_files(r".*", [dirpath], dirs=dirs))) -_cp_supports_reflink = None +_cp_supports_reflink: Optional[bool] = None def copy_file(src, dst): @@ -598,14 +598,16 @@ def get_module_version(module: Union[str, types.ModuleType]) -> Optional[str]: ------- object """ + modobj: Optional[types.ModuleType] if isinstance(module, str): + modobj = sys.modules.get(module) mod_name = module - module = sys.modules.get(module) else: + modobj = module mod_name = module.__name__.split(".", 1)[0] - if module is not None: - version = getattr(module, "__version__", None) + if modobj is not None: + version = getattr(modobj, "__version__", None) else: version = None if version is None: From f634c31bc06c9c8edf83e2ba97c74bae059fb439 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 25 Jan 2022 10:45:32 -0500 Subject: [PATCH 3/4] Add a type-checking GitHub Action --- .github/workflows/typing.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/typing.yml diff --git a/.github/workflows/typing.yml b/.github/workflows/typing.yml new file mode 100644 index 000000000..ed9ea5224 --- /dev/null +++ b/.github/workflows/typing.yml @@ -0,0 +1,27 @@ +name: Type-check + +on: + - push + - pull_request + +jobs: + typing: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.7' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade tox + + - name: Run type checker + run: tox -e typing From 830373ba0991fec5f33c85c27749f6726a363546 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 25 Jan 2022 11:46:44 -0500 Subject: [PATCH 4/4] Move "type: ignore" comments around to get them to work across Python versions --- dandi/dandiapi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 5082d699b..bc9407775 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -1176,12 +1176,12 @@ def from_metadata( This is a low-level method that non-developers would normally only use when acquiring data using means outside of this library. """ - return BaseRemoteAsset( + return BaseRemoteAsset( # type: ignore[call-arg] client=client, identifier=metadata["identifier"], path=metadata["path"], size=metadata["contentSize"], - _metadata=metadata, # type: ignore[call-arg] + _metadata=metadata, ) @property @@ -1434,12 +1434,12 @@ def from_data( raise ValueError("Asset data contains both `blob` and `zarr`'") else: raise ValueError("Asset data contains neither `blob` nor `zarr`") - return klass( + return klass( # type: ignore[call-arg] client=dandiset.client, dandiset_id=dandiset.identifier, version_id=dandiset.version_id, **data, - _metadata=metadata, # type: ignore[call-arg] + _metadata=metadata, ) @property