Skip to content
Merged
44 changes: 38 additions & 6 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ def extract_anatomy(metadata):
def extract_model(modelcls, metadata, **kwargs):
m = modelcls.unvalidated()
for field in m.__fields__.keys():
if modelcls == models.BioSample and field == "wasDerivedFrom":
continue
value = kwargs.get(field, extract_field(field, metadata))
if value is not Ellipsis:
setattr(m, field, value)
Expand All @@ -237,7 +239,17 @@ def extract_model(modelcls, metadata, **kwargs):

def extract_wasDerivedFrom(metadata):
return [
extract_model(models.BioSample, metadata, identifier=metadata.get("subject_id"))
extract_model(
models.BioSample, metadata, identifier=metadata.get("tissue_sample_id")
)
]


def extract_wasAttributedTo(metadata):
return [
extract_model(
models.Participant, metadata, identifier=metadata.get("subject_id")
)
]


Expand All @@ -253,6 +265,7 @@ def extract_digest(metadata):

FIELD_EXTRACTORS = {
"wasDerivedFrom": extract_wasDerivedFrom,
"wasAttributedTo": extract_wasAttributedTo,
"age": extract_age,
"sex": extract_sex,
"assayType": extract_assay_type,
Expand Down Expand Up @@ -348,7 +361,7 @@ def toContributor(value):
roles.append("".join([val.capitalize() for val in tmp]))
else:
roles.append(tmp.pop())
contrib["roleName"] = roles
contrib["roleName"] = [getattr(models.RoleType, role) for role in roles]
del item["roles"]
if "awardNumber" in item:
contrib["awardNumber"] = item["awardNumber"]
Expand All @@ -362,9 +375,17 @@ def toContributor(value):
contrib["identifier"] = models.PropertyValue()
del item["orcid"]
if "affiliations" in item:
item["affiliation"] = item["affiliations"]
item["affiliation"] = [
models.Organization.unvalidated(**{"name": affiliate})
for affiliate in item["affiliations"]
]

del item["affiliations"]
contrib.update(**{f"{k}": v for k, v in item.items()})
if "awardNumber" in contrib:
contrib = models.Organization.unvalidated(**contrib)
else:
contrib = models.Person.unvalidated(**contrib)
out.append(contrib)
return out

Expand Down Expand Up @@ -404,14 +425,18 @@ def convertv1(data):
out = []
for item in value:
if isinstance(item, dict):
out.append({k: v for k, v in item.items()})
out.append(
models.Resource.unvalidated(
**{k: v for k, v in item.items()}
)
)
else:
present = False
for val in out:
if item in val.values():
present = True
if not present:
out.append({"url": item})
out.append(models.Resource.unvalidated(**{"url": item}))
value = out
if oldkey in [
"number_of_subjects",
Expand All @@ -423,7 +448,14 @@ def convertv1(data):
if isinstance(value, list):
for val in value:
if extrakey:
val[extrakey] = extra
if extrakey == "relation":
val.relation = getattr(models.RelationType, extra)
elif extrakey == "roleName":
val.roleName = [
getattr(models.RoleType, role) for role in extra
]
else:
val[extrakey] = extra
if isinstance(value, dict):
if extrakey:
value[extrakey] = extra
Expand Down
52 changes: 41 additions & 11 deletions dandi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,17 @@ class ContactPoint(DandiBaseModel):


class Contributor(DandiBaseModel):
identifier: Identifier = Field(
identifier: Optional[Identifier] = Field(
None,
title="A Common Identifier",
description="Use a common identifier such as ORCID for people or ROR for institutions",
nskey="schema",
)
name: str = Field(None, nskey="schema")
email: EmailStr = Field(None, nskey="schema")
url: AnyUrl = Field(None, nskey="schema")
roleName: List[RoleType] = Field(
title="Role", description="Role of the contributor", nskey="schema"
name: Optional[str] = Field(None, nskey="schema")
email: Optional[EmailStr] = Field(None, nskey="schema")
url: Optional[AnyUrl] = Field(None, nskey="schema")
roleName: Optional[List[RoleType]] = Field(
None, title="Role", description="Role of the contributor", nskey="schema"
)
includeInCitation: bool = Field(
True,
Expand All @@ -245,7 +245,7 @@ class Contributor(DandiBaseModel):
"when generating a citation for the item",
nskey="dandi",
)
awardNumber: Identifier = Field(
awardNumber: Optional[Identifier] = Field(
None,
title="Identifier for an award",
description="Identifier associated with a sponsored or gidt award",
Expand All @@ -254,8 +254,15 @@ class Contributor(DandiBaseModel):


class Organization(Contributor):
contactPoint: List[ContactPoint] = Field(
description="Contact for the organization", nskey="schema"
includeInCitation: bool = Field(
False,
title="Include Contributor in Citation",
description="A flag to indicate whether a contributor should be included "
"when generating a citation for the item",
nskey="dandi",
)
contactPoint: Optional[List[ContactPoint]] = Field(
None, description="Contact for the organization", nskey="schema"
)
_ldmeta = {
"rdfs:subClassOf": ["schema:Organization", "prov:Organization"],
Expand Down Expand Up @@ -396,9 +403,29 @@ class BioSample(DandiBaseModel):
)
anatomy: Optional[List[Anatomy]] = Field(
None,
description="UBERON based identifier for the location of the sample",
description="UBERON based identifier for what organ the sample belongs "
"to. Use the most specific descriptor.",
nskey="dandi",
)

wasDerivedFrom: Optional[List["BioSample"]] = Field(None, nskey="prov")
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this recursive formulation should allow us to have cell ids, slice ids, and tissue ids.


_ldmeta = {
"rdfs:subClassOf": ["schema:Thing", "prov:Entity"],
"rdfs:label": "Information about the biosample.",
"nskey": "dandi",
}


BioSample.update_forward_refs()


class Participant(DandiBaseModel):
"""Description about the sample that was studied"""
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

separating out participant details


identifier: Identifier = Field(nskey="schema")
source_id: Optional[Identifier] = Field(None, nskey="dandi")

strain: Optional[StrainType] = Field(
None, description="Identifier for the strain of the sample", nskey="dandi"
)
Expand Down Expand Up @@ -435,7 +462,7 @@ class BioSample(DandiBaseModel):

_ldmeta = {
"rdfs:subClassOf": ["schema:Thing", "prov:Entity"],
"rdfs:label": "Information about the biosample.",
"rdfs:label": "Information about the participant.",
"nskey": "dandi",
}

Expand Down Expand Up @@ -632,6 +659,9 @@ class AssetMeta(CommonModel):
)

wasDerivedFrom: Optional[List[BioSample]] = Field(None, nskey="prov")
wasAttributedTo: List[Participant] = Field(
None, description="Participant(s) to which this file belongs to", nskey="prov"
)
Comment on lines +662 to +664
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a given asset may have multiple participants potentially. how we deal with this in the name is something we will have to figure out.


# on publish or set by server
contentUrl: Optional[List[AnyUrl]] = Field(None, readOnly=True, nskey="schema")
Expand Down
Loading