Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ dependencies = [
"biopython",
"tqdm",
"click",
"cool-seq-tool>=0.4.0.dev1",
"ga4gh.vrs~=2.0.0-a6",
"cool-seq-tool==0.4.0.dev3",
"ga4gh.vrs==2.0.0-a6",
"gene_normalizer[etl,pg]==0.3.0-dev2",
"pydantic>=2",
"python-dotenv",
Expand Down
53 changes: 28 additions & 25 deletions src/dcd_mapping/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ScoresetMetadata,
TargetSequenceType,
TxSelectResult,
VrsVersion,
)

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -243,8 +244,9 @@ def _annotate_allele_mapping(
mapped_score: MappedScore,
tx_results: TxSelectResult | None,
metadata: ScoresetMetadata,
vrs_version: VrsVersion = VrsVersion.V_2,
) -> ScoreAnnotationWithLayer:
"""Perform annotations and create VRS 1.3 equivalents for allele mappings."""
"""Perform annotations and, if necessary, create VRS 1.3 equivalents for allele mappings."""
pre_mapped: Allele = mapped_score.pre_mapped
post_mapped: Allele = mapped_score.post_mapped

Expand Down Expand Up @@ -274,24 +276,27 @@ def _annotate_allele_mapping(
hgvs_string, syntax = _get_hgvs_string(post_mapped, accession)
post_mapped.expressions = [Expression(syntax=syntax, value=hgvs_string)]

pre_mapped_vod = _allele_to_vod(pre_mapped)
post_mapped_vod = _allele_to_vod(post_mapped)
if vrs_version == VrsVersion.V_1_3:
pre_mapped = _allele_to_vod(pre_mapped)
post_mapped = _allele_to_vod(post_mapped)

return ScoreAnnotationWithLayer(
pre_mapped=pre_mapped_vod,
post_mapped=post_mapped_vod,
pre_mapped_2_0=pre_mapped,
post_mapped_2_0=post_mapped,
pre_mapped=pre_mapped,
post_mapped=post_mapped,
vrs_version=vrs_version,
mavedb_id=mapped_score.accession_id,
score=float(mapped_score.score) if mapped_score.score else None,
annotation_layer=mapped_score.annotation_layer,
)


def _annotate_haplotype_mapping(
mapping: MappedScore, tx_results: TxSelectResult | None, metadata: ScoresetMetadata
mapping: MappedScore,
tx_results: TxSelectResult | None,
metadata: ScoresetMetadata,
vrs_version: VrsVersion = VrsVersion.V_2,
) -> ScoreAnnotationWithLayer:
"""Perform annotations and create VRS 1.3 equivalents for haplotype mappings."""
"""Perform annotations and, if necessary, create VRS 1.3 equivalents for haplotype mappings."""
pre_mapped: Haplotype = mapping.pre_mapped # type: ignore
post_mapped: Haplotype = mapping.post_mapped # type: ignore
# get vrs_ref_allele_seq for pre-mapped variants
Expand Down Expand Up @@ -324,14 +329,14 @@ def _annotate_haplotype_mapping(
hgvs, syntax = _get_hgvs_string(allele, accession)
allele.expressions = [Expression(syntax=syntax, value=hgvs)]

pre_mapped_converted = _haplotype_to_haplotype_1_3(pre_mapped)
post_mapped_converted = _haplotype_to_haplotype_1_3(post_mapped)
if vrs_version == VrsVersion.V_1_3:
pre_mapped = _haplotype_to_haplotype_1_3(pre_mapped)
post_mapped = _haplotype_to_haplotype_1_3(post_mapped)

return ScoreAnnotationWithLayer(
pre_mapped=pre_mapped_converted,
post_mapped=post_mapped_converted,
pre_mapped_2_0=pre_mapped,
post_mapped_2_0=post_mapped,
pre_mapped=pre_mapped,
post_mapped=post_mapped,
vrs_version=vrs_version,
mavedb_id=mapping.accession_id,
score=float(mapping.score) if mapping.score is not None else None,
annotation_layer=mapping.annotation_layer,
Expand All @@ -342,6 +347,7 @@ def annotate(
mapped_scores: list[MappedScore],
tx_results: TxSelectResult | None,
metadata: ScoresetMetadata,
vrs_version: VrsVersion = VrsVersion.V_2,
) -> list[ScoreAnnotationWithLayer]:
"""Given a list of mappings, add additional contextual data:

Expand All @@ -365,13 +371,17 @@ def annotate(
mapped_score.post_mapped, Haplotype
):
score_annotations.append(
_annotate_haplotype_mapping(mapped_score, tx_results, metadata)
_annotate_haplotype_mapping(
mapped_score, tx_results, metadata, vrs_version
)
)
elif isinstance(mapped_score.pre_mapped, Allele) and isinstance(
mapped_score.post_mapped, Allele
):
score_annotations.append(
_annotate_allele_mapping(mapped_score, tx_results, metadata)
_annotate_allele_mapping(
mapped_score, tx_results, metadata, vrs_version
)
)
else:
ValueError("inconsistent variant structure")
Expand Down Expand Up @@ -464,7 +474,6 @@ def save_mapped_output_json(
mappings: list[ScoreAnnotationWithLayer],
align_result: AlignmentResult,
tx_output: TxSelectResult | None,
include_vrs_2: bool = False,
preferred_layer_only: bool = False,
output_path: Path | None = None,
) -> Path:
Expand All @@ -474,7 +483,6 @@ def save_mapped_output_json(
:param mave_vrs_mappings: A dictionary of VrsObject1_x objects
:param align_result: Alignment information for a score set
:param tx_output: Transcript output for a score set
:param include_vrs_2: if true, also include VRS 2.0 mappings
:param output_path: specific location to save output to. Default to
<dcd_mapping_data_dir>/urn:mavedb:00000XXX-X-X_mapping_<ISO8601 datetime>.json
:return: output location
Expand Down Expand Up @@ -523,19 +531,14 @@ def save_mapped_output_json(
mapped_scores=mapped_scores,
)

if not include_vrs_2:
for m in output.mapped_scores:
m.pre_mapped_2_0 = None
m.post_mapped_2_0 = None

if not output_path:
now = datetime.datetime.now(tz=datetime.UTC).isoformat()
output_path = LOCAL_STORE_PATH / f"{urn}_mapping_{now}.json"

_logger.info("Saving mapping output to %s", output_path)
with output_path.open("w") as file:
json.dump(
json.loads(output.model_dump_json(exclude_unset=True, exclude_none=True)),
output.model_dump(exclude_unset=True, exclude_none=True),
file,
indent=4,
)
Expand Down
14 changes: 8 additions & 6 deletions src/dcd_mapping/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dcd_mapping.align import AlignmentError
from dcd_mapping.main import map_scoreset_urn
from dcd_mapping.resource_utils import ResourceAcquisitionError
from dcd_mapping.schemas import VrsVersion
from dcd_mapping.transcripts import TxSelectError
from dcd_mapping.vrs_map import VrsMapError

Expand All @@ -33,11 +34,12 @@
help="Desired location at which output file should be saved",
)
@click.option(
"--include_vrs_2",
"--vrs_version",
"-v",
is_flag=True,
default=False,
help="Include VRS 2.0 mappings",
type=click.Choice(["1.3", "2"]),
default="2",
show_default=True,
help="Version to use for output VRS objects",
)
@click.option(
"--prefer_genomic",
Expand All @@ -49,7 +51,7 @@ def cli(
urn: str,
debug: bool,
output: Path | None,
include_vrs_2: bool,
vrs_version: VrsVersion,
prefer_genomic: bool,
) -> None:
"""Get VRS mapping on preferred transcript for URN.
Expand All @@ -72,7 +74,7 @@ def cli(
_logger.debug("debug logging enabled")
try:
asyncio.run(
map_scoreset_urn(urn, output, include_vrs_2, prefer_genomic, silent=False)
map_scoreset_urn(urn, output, vrs_version, prefer_genomic, silent=False)
)
except (
LookupError,
Expand Down
12 changes: 6 additions & 6 deletions src/dcd_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dcd_mapping.schemas import (
ScoreRow,
ScoresetMetadata,
VrsVersion,
)
from dcd_mapping.transcripts import TxSelectError, select_transcript
from dcd_mapping.vrs_map import VrsMapError, vrs_map
Expand Down Expand Up @@ -123,7 +124,7 @@ async def map_scoreset(
metadata: ScoresetMetadata,
records: list[ScoreRow],
output_path: Path | None = None,
include_vrs_2: bool = False,
vrs_version: VrsVersion = VrsVersion.V_2,
prefer_genomic: bool = False,
silent: bool = True,
) -> None:
Expand Down Expand Up @@ -177,13 +178,12 @@ async def map_scoreset(
_emit_info("VRS mapping complete.", silent)

_emit_info("Annotating metadata and saving to file...", silent)
vrs_results = annotate(vrs_results, transcript, metadata)
vrs_results = annotate(vrs_results, transcript, metadata, vrs_version)
final_output = save_mapped_output_json(
metadata.urn,
vrs_results,
alignment_result,
transcript,
include_vrs_2,
prefer_genomic,
output_path,
)
Expand All @@ -193,15 +193,15 @@ async def map_scoreset(
async def map_scoreset_urn(
urn: str,
output_path: Path | None = None,
include_vrs_2: bool = False,
vrs_version: VrsVersion = VrsVersion.V_2,
prefer_genomic: bool = False,
silent: bool = True,
) -> None:
"""Perform end-to-end mapping for a scoreset.

:param urn: identifier for a scoreset.
:param output_path: optional path to save output at
:param include_vrs_2: if true, include VRS 2.0 mappings in output JSON
:param vrs_version: version of VRS objects to output (1.3 or 2)
:param silent: if True, suppress console information output
"""
try:
Expand All @@ -213,5 +213,5 @@ async def map_scoreset_urn(
click.echo(f"Error: {msg}")
raise e
await map_scoreset(
metadata, records, output_path, include_vrs_2, prefer_genomic, silent
metadata, records, output_path, vrs_version, prefer_genomic, silent
)
14 changes: 10 additions & 4 deletions src/dcd_mapping/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ class TargetType(str, Enum):
OTHER_NC = "Other noncoding"


class VrsVersion(str, Enum):
"""Define VRS versions"""

V_1_3 = "1.3"
V_2 = "2"


class UniProtRef(BaseModel):
"""Store metadata associated with MaveDB UniProt reference"""

Expand Down Expand Up @@ -157,10 +164,9 @@ class ScoreAnnotation(BaseModel):
This model defines what an individual mapping instance looks like in the final JSON.
"""

pre_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype
post_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype
pre_mapped_2_0: Allele | Haplotype | None = None
post_mapped_2_0: Allele | Haplotype | None = None
pre_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype | Allele | Haplotype
post_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype | Allele | Haplotype
vrs_version: VrsVersion
mavedb_id: StrictStr
relation: Literal["SO:is_homologous_to"] = "SO:is_homologous_to"
score: float | None
Expand Down