From ba51aad98b9cb689a6fd6a20183baa7dd56b1c44 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 1 Aug 2022 11:32:51 -0400 Subject: [PATCH] Factor out common fields in nwb2asset() and get_default_metadata() --- dandi/metadata.py | 86 +++++++++++++++++------------------- dandi/tests/test_metadata.py | 32 ++++++-------- 2 files changed, 54 insertions(+), 64 deletions(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index 0d9b645cc..c1a13a60c 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -636,9 +636,7 @@ def extract_session(metadata: dict) -> Optional[List[models.Session]]: ] -def extract_digest( - metadata: dict, -) -> Optional[Dict[models.DigestType, str]]: +def extract_digest(metadata: dict) -> Optional[Dict[models.DigestType, str]]: if "digest" in metadata: return {models.DigestType[metadata["digest_type"]]: metadata["digest"]} else: @@ -868,9 +866,7 @@ class Neurodatum(TypedDict): } -def process_ndtypes( - asset: models.BareAsset, nd_types: Iterable[str] -) -> models.BareAsset: +def process_ndtypes(metadata: Dict[str, Any], nd_types: Iterable[str]) -> None: approach = set() technique = set() variables = set() @@ -883,12 +879,13 @@ def process_ndtypes( if neurodata_typemap[val]["technique"]: technique.add(neurodata_typemap[val]["technique"]) variables.add(val) - asset.approach = [models.ApproachType(name=val) for val in approach] - asset.measurementTechnique = [ + metadata["approach"] = [models.ApproachType(name=val) for val in approach] + metadata["measurementTechnique"] = [ models.MeasurementTechniqueType(name=val) for val in technique ] - asset.variableMeasured = [models.PropertyValue(value=val) for val in variables] - return asset + metadata["variableMeasured"] = [ + models.PropertyValue(value=val) for val in variables + ] def nwb2asset( @@ -904,48 +901,46 @@ def nwb2asset( ) start_time = datetime.now().astimezone() metadata = get_metadata(nwb_path) - if digest is not None: - metadata["digest"] = digest.value - metadata["digest_type"] = digest.algorithm.name - metadata["contentSize"] = op.getsize(nwb_path) - metadata["encodingFormat"] = "application/x-nwb" - metadata["dateModified"] = get_utcnow_datetime() - metadata["blobDateModified"] = ensure_datetime(os.stat(nwb_path).st_mtime) - metadata["path"] = str(nwb_path) - if metadata["blobDateModified"] > metadata["dateModified"]: - lgr.warning( - "mtime %s of %s is in the future", metadata["blobDateModified"], nwb_path - ) - asset = metadata2asset(metadata) - asset = process_ndtypes(asset, metadata["nd_types"]) + asset_md = prepare_metadata(metadata) + process_ndtypes(asset_md, metadata["nd_types"]) end_time = datetime.now().astimezone() - if asset.wasGeneratedBy is None: - asset.wasGeneratedBy = [] - asset.wasGeneratedBy.append(get_generator(start_time, end_time)) - return asset + add_common_metadata(asset_md, nwb_path, start_time, end_time, digest) + asset_md["encodingFormat"] = "application/x-nwb" + asset_md["path"] = str(nwb_path) + return models.BareAsset(**asset_md) def get_default_metadata( path: Union[str, Path], digest: Optional[Digest] = None ) -> models.BareAsset: - start_time = datetime.now().astimezone() + metadata: Dict[str, Any] = {} + start_time = end_time = datetime.now().astimezone() + add_common_metadata(metadata, path, start_time, end_time, digest) + return models.BareAsset.unvalidated(**metadata) + + +def add_common_metadata( + metadata: Dict[str, Any], + path: Union[str, Path], + start_time: datetime, + end_time: datetime, + digest: Optional[Digest] = None, +) -> None: if digest is not None: - digest_model = digest.asdict() + metadata["digest"] = digest.asdict() else: - digest_model = {} - dateModified = get_utcnow_datetime() - blobDateModified = ensure_datetime(os.stat(path).st_mtime) - if blobDateModified > dateModified: - lgr.warning("mtime %s of %s is in the future", blobDateModified, path) - end_time = datetime.now().astimezone() - return models.BareAsset.unvalidated( - contentSize=os.path.getsize(path), - digest=digest_model, - dateModified=dateModified, - blobDateModified=blobDateModified, - wasGeneratedBy=[get_generator(start_time, end_time)], - encodingFormat=get_mime_type(str(path)), + metadata["digest"] = {} + metadata["dateModified"] = get_utcnow_datetime() + metadata["blobDateModified"] = ensure_datetime(os.stat(path).st_mtime) + if metadata["blobDateModified"] > metadata["dateModified"]: + lgr.warning( + "mtime %s of %s is in the future", metadata["blobDateModified"], path + ) + metadata["contentSize"] = os.path.getsize(path) + metadata.setdefault("wasGeneratedBy", []).append( + get_generator(start_time, end_time) ) + metadata["encodingFormat"] = get_mime_type(str(path)) def get_generator(start_time: datetime, end_time: datetime) -> models.Activity: @@ -967,6 +962,5 @@ def get_generator(start_time: datetime, end_time: datetime) -> models.Activity: ) -def metadata2asset(metadata: dict) -> models.BareAsset: - bare_dict = extract_model(models.BareAsset, metadata).json_dict() - return models.BareAsset(**bare_dict) +def prepare_metadata(metadata: dict) -> Dict[str, Any]: + return cast(Dict[str, Any], extract_model(models.BareAsset, metadata).json_dict()) diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 7346219eb..d8e7e7e39 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -1,4 +1,3 @@ -from copy import deepcopy from datetime import datetime, timedelta import json from pathlib import Path @@ -7,7 +6,6 @@ from dandischema.consts import DANDI_SCHEMA_VERSION from dandischema.metadata import validate from dandischema.models import AgeReferenceType -from dandischema.models import BareAsset as BareAssetMeta from dandischema.models import Dandiset as DandisetMeta from dandischema.models import PropertyValue from dateutil.tz import tzutc @@ -19,9 +17,9 @@ extract_age, extract_species, get_metadata, - metadata2asset, parse_age, parse_purlobourl, + prepare_metadata, process_ndtypes, timedelta2duration, ) @@ -266,14 +264,12 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None: ), ], ) -def test_metadata2asset(filename: str, metadata: Dict[str, Any]) -> None: - data = metadata2asset(metadata) +def test_prepare_metadata(filename: str, metadata: Dict[str, Any]) -> None: + data = prepare_metadata(metadata) with (METADATA_DIR / filename).open() as fp: data_as_dict = json.load(fp) data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION - assert data == BareAssetMeta(**data_as_dict) - bare_dict = deepcopy(data_as_dict) - assert data.json_dict() == bare_dict + assert data == data_as_dict data_as_dict["identifier"] = "0b0a1a0b-e3ea-4cf6-be94-e02c830d54be" # as of schema-0.5.0 (https://github.com/dandi/dandischema/pull/52) # contentUrl is required, and validate below would map into Asset, @@ -633,17 +629,17 @@ def test_species(): ], ) def test_ndtypes(ndtypes, asset_dict): - asset = BareAssetMeta( - contentSize=1, - encodingFormat="application/x-nwb", - digest={"dandi:dandi-etag": "0" * 32 + "-1"}, - path="test.nwb", - ) - asset = process_ndtypes(asset, ndtypes) + metadata = { + "contentSize": 1, + "encodingFormat": "application/x-nwb", + "digest": {"dandi:dandi-etag": "0" * 32 + "-1"}, + "path": "test.nwb", + } + process_ndtypes(metadata, ndtypes) for key in ["approach", "measurementTechnique"]: if asset_dict.get(key) is None: - assert getattr(asset, key) == [] + assert metadata[key] == [] else: - assert getattr(asset, key)[0].name == asset_dict.get(key)[0] + assert metadata[key][0].name == asset_dict.get(key)[0] key = "variableMeasured" - assert getattr(asset, key)[0].value == asset_dict.get(key)[0] + assert metadata[key][0].value == asset_dict.get(key)[0]