Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 40 additions & 46 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,7 @@ def extract_session(metadata: dict) -> Optional[List[models.Session]]:
]


def extract_digest(
metadata: dict,
) -> Optional[Dict[models.DigestType, str]]:
def extract_digest(metadata: dict) -> Optional[Dict[models.DigestType, str]]:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see no content difference -- why is it jumping like this then -- did we change blacking settings since last time this file was modified?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I just noticed that the signature could fit on one line and felt compelled to fix it.

if "digest" in metadata:
return {models.DigestType[metadata["digest_type"]]: metadata["digest"]}
else:
Expand Down Expand Up @@ -868,9 +866,7 @@ class Neurodatum(TypedDict):
}


def process_ndtypes(
asset: models.BareAsset, nd_types: Iterable[str]
) -> models.BareAsset:
def process_ndtypes(metadata: Dict[str, Any], nd_types: Iterable[str]) -> None:
approach = set()
technique = set()
variables = set()
Expand All @@ -883,12 +879,13 @@ def process_ndtypes(
if neurodata_typemap[val]["technique"]:
technique.add(neurodata_typemap[val]["technique"])
variables.add(val)
asset.approach = [models.ApproachType(name=val) for val in approach]
asset.measurementTechnique = [
metadata["approach"] = [models.ApproachType(name=val) for val in approach]
metadata["measurementTechnique"] = [
models.MeasurementTechniqueType(name=val) for val in technique
]
asset.variableMeasured = [models.PropertyValue(value=val) for val in variables]
return asset
metadata["variableMeasured"] = [
models.PropertyValue(value=val) for val in variables
]


def nwb2asset(
Expand All @@ -904,48 +901,46 @@ def nwb2asset(
)
start_time = datetime.now().astimezone()
metadata = get_metadata(nwb_path)
if digest is not None:
metadata["digest"] = digest.value
metadata["digest_type"] = digest.algorithm.name
metadata["contentSize"] = op.getsize(nwb_path)
metadata["encodingFormat"] = "application/x-nwb"
metadata["dateModified"] = get_utcnow_datetime()
metadata["blobDateModified"] = ensure_datetime(os.stat(nwb_path).st_mtime)
metadata["path"] = str(nwb_path)
if metadata["blobDateModified"] > metadata["dateModified"]:
lgr.warning(
"mtime %s of %s is in the future", metadata["blobDateModified"], nwb_path
)
asset = metadata2asset(metadata)
asset = process_ndtypes(asset, metadata["nd_types"])
asset_md = prepare_metadata(metadata)
process_ndtypes(asset_md, metadata["nd_types"])
end_time = datetime.now().astimezone()
if asset.wasGeneratedBy is None:
asset.wasGeneratedBy = []
asset.wasGeneratedBy.append(get_generator(start_time, end_time))
return asset
add_common_metadata(asset_md, nwb_path, start_time, end_time, digest)
asset_md["encodingFormat"] = "application/x-nwb"
asset_md["path"] = str(nwb_path)
return models.BareAsset(**asset_md)


def get_default_metadata(
path: Union[str, Path], digest: Optional[Digest] = None
) -> models.BareAsset:
start_time = datetime.now().astimezone()
metadata: Dict[str, Any] = {}
start_time = end_time = datetime.now().astimezone()
add_common_metadata(metadata, path, start_time, end_time, digest)
return models.BareAsset.unvalidated(**metadata)


def add_common_metadata(
metadata: Dict[str, Any],
path: Union[str, Path],
start_time: datetime,
end_time: datetime,
digest: Optional[Digest] = None,
) -> None:
if digest is not None:
digest_model = digest.asdict()
metadata["digest"] = digest.asdict()
else:
digest_model = {}
dateModified = get_utcnow_datetime()
blobDateModified = ensure_datetime(os.stat(path).st_mtime)
if blobDateModified > dateModified:
lgr.warning("mtime %s of %s is in the future", blobDateModified, path)
end_time = datetime.now().astimezone()
return models.BareAsset.unvalidated(
contentSize=os.path.getsize(path),
digest=digest_model,
dateModified=dateModified,
blobDateModified=blobDateModified,
wasGeneratedBy=[get_generator(start_time, end_time)],
encodingFormat=get_mime_type(str(path)),
metadata["digest"] = {}
metadata["dateModified"] = get_utcnow_datetime()
metadata["blobDateModified"] = ensure_datetime(os.stat(path).st_mtime)
if metadata["blobDateModified"] > metadata["dateModified"]:
lgr.warning(
"mtime %s of %s is in the future", metadata["blobDateModified"], path
)
metadata["contentSize"] = os.path.getsize(path)
metadata.setdefault("wasGeneratedBy", []).append(
get_generator(start_time, end_time)
)
metadata["encodingFormat"] = get_mime_type(str(path))


def get_generator(start_time: datetime, end_time: datetime) -> models.Activity:
Expand All @@ -967,6 +962,5 @@ def get_generator(start_time: datetime, end_time: datetime) -> models.Activity:
)


def metadata2asset(metadata: dict) -> models.BareAsset:
bare_dict = extract_model(models.BareAsset, metadata).json_dict()
return models.BareAsset(**bare_dict)
def prepare_metadata(metadata: dict) -> Dict[str, Any]:
return cast(Dict[str, Any], extract_model(models.BareAsset, metadata).json_dict())
32 changes: 14 additions & 18 deletions dandi/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from copy import deepcopy
from datetime import datetime, timedelta
import json
from pathlib import Path
Expand All @@ -7,7 +6,6 @@
from dandischema.consts import DANDI_SCHEMA_VERSION
from dandischema.metadata import validate
from dandischema.models import AgeReferenceType
from dandischema.models import BareAsset as BareAssetMeta
from dandischema.models import Dandiset as DandisetMeta
from dandischema.models import PropertyValue
from dateutil.tz import tzutc
Expand All @@ -19,9 +17,9 @@
extract_age,
extract_species,
get_metadata,
metadata2asset,
parse_age,
parse_purlobourl,
prepare_metadata,
process_ndtypes,
timedelta2duration,
)
Expand Down Expand Up @@ -266,14 +264,12 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None:
),
],
)
def test_metadata2asset(filename: str, metadata: Dict[str, Any]) -> None:
data = metadata2asset(metadata)
def test_prepare_metadata(filename: str, metadata: Dict[str, Any]) -> None:
data = prepare_metadata(metadata)
with (METADATA_DIR / filename).open() as fp:
data_as_dict = json.load(fp)
data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION
assert data == BareAssetMeta(**data_as_dict)
bare_dict = deepcopy(data_as_dict)
assert data.json_dict() == bare_dict
assert data == data_as_dict
data_as_dict["identifier"] = "0b0a1a0b-e3ea-4cf6-be94-e02c830d54be"
# as of schema-0.5.0 (https://github.com/dandi/dandischema/pull/52)
# contentUrl is required, and validate below would map into Asset,
Expand Down Expand Up @@ -633,17 +629,17 @@ def test_species():
],
)
def test_ndtypes(ndtypes, asset_dict):
asset = BareAssetMeta(
contentSize=1,
encodingFormat="application/x-nwb",
digest={"dandi:dandi-etag": "0" * 32 + "-1"},
path="test.nwb",
)
asset = process_ndtypes(asset, ndtypes)
metadata = {
"contentSize": 1,
"encodingFormat": "application/x-nwb",
"digest": {"dandi:dandi-etag": "0" * 32 + "-1"},
"path": "test.nwb",
}
process_ndtypes(metadata, ndtypes)
for key in ["approach", "measurementTechnique"]:
if asset_dict.get(key) is None:
assert getattr(asset, key) == []
assert metadata[key] == []
else:
assert getattr(asset, key)[0].name == asset_dict.get(key)[0]
assert metadata[key][0].name == asset_dict.get(key)[0]
key = "variableMeasured"
assert getattr(asset, key)[0].value == asset_dict.get(key)[0]
assert metadata[key][0].value == asset_dict.get(key)[0]