Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/api/routers/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_raw_scoreset_metadata,
get_scoreset_metadata,
get_scoreset_records,
patch_target_sequence_type,
with_mavedb_score_set,
)
from dcd_mapping.resource_utils import ResourceAcquisitionError
Expand Down Expand Up @@ -48,6 +49,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
try:
metadata = get_scoreset_metadata(urn, store_path)
records = get_scoreset_records(metadata, True, store_path)
metadata = patch_target_sequence_type(metadata, records)
except ScoresetNotSupportedError as e:
return JSONResponse(
content=ScoresetMapping(
Expand Down
5 changes: 5 additions & 0 deletions src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@ def align(
msg = f"BLAT result {target_label} matches multiple target gene names in scoreset {scoreset_metadata.urn}"
target_gene = scoreset_metadata.target_genes[target_label]
alignment_results[target_label] = _get_best_match(blat_result, target_gene)
# confirm that there is an alignment result for each target gene
for target_gene in scoreset_metadata.target_genes:
if target_gene not in alignment_results:
msg = f"No BLAT result found for target gene {target_gene} in scoreset {scoreset_metadata.urn}"
raise AlignmentError(msg)
return alignment_results


Expand Down
2 changes: 2 additions & 0 deletions src/dcd_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ScoresetNotSupportedError,
get_scoreset_metadata,
get_scoreset_records,
patch_target_sequence_type,
with_mavedb_score_set,
)
from dcd_mapping.resource_utils import ResourceAcquisitionError
Expand Down Expand Up @@ -332,6 +333,7 @@ async def map_scoreset_urn(
try:
metadata = get_scoreset_metadata(urn, store_path)
records = get_scoreset_records(metadata, silent, store_path)
metadata = patch_target_sequence_type(metadata, records)
except ScoresetNotSupportedError as e:
_emit_info(f"Score set not supported: {e}", silent, logging.ERROR)
final_output = write_scoreset_mapping_to_json(
Expand Down
24 changes: 24 additions & 0 deletions src/dcd_mapping/mavedb_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
ScoresetMapping,
ScoresetMetadata,
TargetGene,
TargetSequenceType,
UniProtRef,
)
from dcd_mapping.transcripts import _get_protein_sequence

__all__ = [
"get_scoreset_urns",
Expand Down Expand Up @@ -324,6 +326,28 @@ def get_scoreset_records(
return _load_scoreset_records(scores_csv, metadata)


def patch_target_sequence_type(
metadata: ScoresetMetadata, records: dict
) -> ScoresetMetadata:
"""If target sequence type is DNA but all variants are protein-level, change to protein.
This avoids BLAT errors in cases where the target sequence was codon-optimized
for a non-human organism
"""
for target_label, target in metadata.target_genes.items():
if target.target_sequence_type == TargetSequenceType.DNA:
all_protein = True
for record in records.get(target_label, []):
if record.hgvs_pro == "NA" or not record.hgvs_pro:
all_protein = False
break
if all_protein:
msg = f"Changing target sequence type for {metadata.urn} target {target_label} from DNA to protein because all variants are protein-level"
_logger.info(msg)
target.target_sequence = _get_protein_sequence(target.target_sequence)
target.target_sequence_type = TargetSequenceType.PROTEIN
return metadata


def with_mavedb_score_set(fn: Callable) -> Callable:
@wraps(fn)
async def wrapper(*args, **kwargs) -> ScoresetMapping: # noqa: ANN002
Expand Down
2 changes: 1 addition & 1 deletion src/dcd_mapping/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Provide dcd mapping version"""

dcd_mapping_version = "2025.1.0"
dcd_mapping_version = "2025.2.0"