-
Notifications
You must be signed in to change notification settings - Fork 0
Allow pre-map variant only #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
39f1d7d
07a9e9c
6a499fc
d1b1ce8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |
| ScoresetMetadata, | ||
| TargetSequenceType, | ||
| TxSelectResult, | ||
| VrsVersion, | ||
| ) | ||
|
|
||
| _logger = logging.getLogger(__name__) | ||
|
|
@@ -243,55 +244,60 @@ def _annotate_allele_mapping( | |
| mapped_score: MappedScore, | ||
| tx_results: TxSelectResult | None, | ||
| metadata: ScoresetMetadata, | ||
| vrs_version: VrsVersion = VrsVersion.V_2, | ||
| ) -> ScoreAnnotationWithLayer: | ||
| """Perform annotations and create VRS 1.3 equivalents for allele mappings.""" | ||
| """Perform annotations and, if necessary, create VRS 1.3 equivalents for allele mappings.""" | ||
| pre_mapped: Allele = mapped_score.pre_mapped | ||
| post_mapped: Allele = mapped_score.post_mapped | ||
|
|
||
| # get vrs_ref_allele_seq for pre-mapped variants | ||
| pre_mapped.extensions = [_get_vrs_ref_allele_seq(post_mapped, metadata, tx_results)] | ||
|
|
||
| # Determine reference sequence | ||
| if mapped_score.annotation_layer == AnnotationLayer.GENOMIC: | ||
| sequence_id = f"ga4gh:{mapped_score.post_mapped.location.sequenceReference.refgetAccession}" | ||
| accession = get_chromosome_identifier_from_vrs_id(sequence_id) | ||
| if accession is None: | ||
| raise ValueError | ||
| if accession.startswith("refseq:"): | ||
| accession = accession[7:] | ||
| else: | ||
| if tx_results is None: | ||
| raise ValueError # impossible by definition | ||
| accession = tx_results.np | ||
| pre_mapped.extensions = [_get_vrs_ref_allele_seq(pre_mapped, metadata, tx_results)] | ||
|
|
||
| if post_mapped: | ||
| # Determine reference sequence | ||
| if mapped_score.annotation_layer == AnnotationLayer.GENOMIC: | ||
| sequence_id = f"ga4gh:{mapped_score.post_mapped.location.sequenceReference.refgetAccession}" | ||
| accession = get_chromosome_identifier_from_vrs_id(sequence_id) | ||
| if accession is None: | ||
| raise ValueError | ||
| if accession.startswith("refseq:"): | ||
| accession = accession[7:] | ||
| else: | ||
| if tx_results is None: | ||
| raise ValueError # impossible by definition | ||
|
Comment on lines
+266
to
+267
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Same as above |
||
| accession = tx_results.np | ||
|
|
||
| sr = get_seqrepo() | ||
| loc = mapped_score.post_mapped.location | ||
| sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}" | ||
| ref = sr.get_sequence(sequence_id, loc.start, loc.end) | ||
| post_mapped.extensions = [ | ||
| Extension(type="Extension", name="vrs_ref_allele_seq", value=ref) | ||
| ] | ||
| hgvs_string, syntax = _get_hgvs_string(post_mapped, accession) | ||
| post_mapped.expressions = [Expression(syntax=syntax, value=hgvs_string)] | ||
| sr = get_seqrepo() | ||
| loc = mapped_score.post_mapped.location | ||
| sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}" | ||
| ref = sr.get_sequence(sequence_id, loc.start, loc.end) | ||
| post_mapped.extensions = [ | ||
| Extension(type="Extension", name="vrs_ref_allele_seq", value=ref) | ||
| ] | ||
| hgvs_string, syntax = _get_hgvs_string(post_mapped, accession) | ||
| post_mapped.expressions = [Expression(syntax=syntax, value=hgvs_string)] | ||
|
|
||
| pre_mapped_vod = _allele_to_vod(pre_mapped) | ||
| post_mapped_vod = _allele_to_vod(post_mapped) | ||
| if vrs_version == VrsVersion.V_1_3: | ||
| pre_mapped = _allele_to_vod(pre_mapped) | ||
| post_mapped = _allele_to_vod(post_mapped) if post_mapped else None | ||
|
|
||
| return ScoreAnnotationWithLayer( | ||
| pre_mapped=pre_mapped_vod, | ||
| post_mapped=post_mapped_vod, | ||
| pre_mapped_2_0=pre_mapped, | ||
| post_mapped_2_0=post_mapped, | ||
| pre_mapped=pre_mapped, | ||
| post_mapped=post_mapped, | ||
| vrs_version=vrs_version, | ||
| mavedb_id=mapped_score.accession_id, | ||
| score=float(mapped_score.score) if mapped_score.score else None, | ||
| annotation_layer=mapped_score.annotation_layer, | ||
| ) | ||
|
|
||
|
|
||
| def _annotate_haplotype_mapping( | ||
| mapping: MappedScore, tx_results: TxSelectResult | None, metadata: ScoresetMetadata | ||
| mapping: MappedScore, | ||
| tx_results: TxSelectResult | None, | ||
| metadata: ScoresetMetadata, | ||
| vrs_version: VrsVersion = VrsVersion.V_2, | ||
| ) -> ScoreAnnotationWithLayer: | ||
| """Perform annotations and create VRS 1.3 equivalents for haplotype mappings.""" | ||
| """Perform annotations and, if necessary, create VRS 1.3 equivalents for haplotype mappings.""" | ||
| pre_mapped: Haplotype = mapping.pre_mapped # type: ignore | ||
| post_mapped: Haplotype = mapping.post_mapped # type: ignore | ||
| # get vrs_ref_allele_seq for pre-mapped variants | ||
|
|
@@ -313,25 +319,26 @@ def _annotate_haplotype_mapping( | |
| raise ValueError # impossible by definition | ||
| accession = tx_results.np | ||
|
|
||
| sr = get_seqrepo() | ||
| for allele in post_mapped.members: | ||
| loc = allele.location | ||
| sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}" | ||
| ref = sr.get_sequence(sequence_id, loc.start, loc.end) # TODO type issues?? | ||
| allele.extensions = [ | ||
| Extension(type="Extension", name="vrs_ref_allele_seq", value=ref) | ||
| ] | ||
| hgvs, syntax = _get_hgvs_string(allele, accession) | ||
| allele.expressions = [Expression(syntax=syntax, value=hgvs)] | ||
|
|
||
| pre_mapped_converted = _haplotype_to_haplotype_1_3(pre_mapped) | ||
| post_mapped_converted = _haplotype_to_haplotype_1_3(post_mapped) | ||
| if post_mapped: | ||
| sr = get_seqrepo() | ||
| for allele in post_mapped.members: | ||
| loc = allele.location | ||
| sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}" | ||
| ref = sr.get_sequence(sequence_id, loc.start, loc.end) # TODO type issues?? | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this TODO still relevant? |
||
| allele.extensions = [ | ||
| Extension(type="Extension", name="vrs_ref_allele_seq", value=ref) | ||
| ] | ||
| hgvs, syntax = _get_hgvs_string(allele, accession) | ||
| allele.expressions = [Expression(syntax=syntax, value=hgvs)] | ||
|
|
||
| if vrs_version == VrsVersion.V_1_3: | ||
| pre_mapped = _haplotype_to_haplotype_1_3(pre_mapped) | ||
| post_mapped = _haplotype_to_haplotype_1_3(post_mapped) if post_mapped else None | ||
|
|
||
| return ScoreAnnotationWithLayer( | ||
| pre_mapped=pre_mapped_converted, | ||
| post_mapped=post_mapped_converted, | ||
| pre_mapped_2_0=pre_mapped, | ||
| post_mapped_2_0=post_mapped, | ||
| pre_mapped=pre_mapped, | ||
| post_mapped=post_mapped, | ||
| vrs_version=vrs_version, # TODO figure out how inserting an enum value works | ||
| mavedb_id=mapping.accession_id, | ||
| score=float(mapping.score) if mapping.score is not None else None, | ||
| annotation_layer=mapping.annotation_layer, | ||
|
|
@@ -342,6 +349,7 @@ def annotate( | |
| mapped_scores: list[MappedScore], | ||
| tx_results: TxSelectResult | None, | ||
| metadata: ScoresetMetadata, | ||
| vrs_version: VrsVersion = VrsVersion.V_2, | ||
| ) -> list[ScoreAnnotationWithLayer]: | ||
| """Given a list of mappings, add additional contextual data: | ||
|
|
||
|
|
@@ -361,17 +369,23 @@ def annotate( | |
| """ | ||
| score_annotations = [] | ||
| for mapped_score in mapped_scores: | ||
| if isinstance(mapped_score.pre_mapped, Haplotype) and isinstance( | ||
| mapped_score.post_mapped, Haplotype | ||
| if isinstance(mapped_score.pre_mapped, Haplotype) and ( | ||
| isinstance(mapped_score.post_mapped, Haplotype) | ||
| or mapped_score.post_mapped is None | ||
| ): | ||
| score_annotations.append( | ||
| _annotate_haplotype_mapping(mapped_score, tx_results, metadata) | ||
| _annotate_haplotype_mapping( | ||
| mapped_score, tx_results, metadata, vrs_version | ||
| ) | ||
| ) | ||
| elif isinstance(mapped_score.pre_mapped, Allele) and isinstance( | ||
| mapped_score.post_mapped, Allele | ||
| elif isinstance(mapped_score.pre_mapped, Allele) and ( | ||
| isinstance(mapped_score.post_mapped, Allele) | ||
| or mapped_score.post_mapped is None | ||
| ): | ||
| score_annotations.append( | ||
| _annotate_allele_mapping(mapped_score, tx_results, metadata) | ||
| _annotate_allele_mapping( | ||
| mapped_score, tx_results, metadata, vrs_version | ||
| ) | ||
| ) | ||
| else: | ||
| ValueError("inconsistent variant structure") | ||
|
|
@@ -464,7 +478,6 @@ def save_mapped_output_json( | |
| mappings: list[ScoreAnnotationWithLayer], | ||
| align_result: AlignmentResult, | ||
| tx_output: TxSelectResult | None, | ||
| include_vrs_2: bool = False, | ||
| preferred_layer_only: bool = False, | ||
| output_path: Path | None = None, | ||
| ) -> Path: | ||
|
|
@@ -474,7 +487,6 @@ def save_mapped_output_json( | |
| :param mave_vrs_mappings: A dictionary of VrsObject1_x objects | ||
| :param align_result: Alignment information for a score set | ||
| :param tx_output: Transcript output for a score set | ||
| :param include_vrs_2: if true, also include VRS 2.0 mappings | ||
| :param output_path: specific location to save output to. Default to | ||
| <dcd_mapping_data_dir>/urn:mavedb:00000XXX-X-X_mapping_<ISO8601 datetime>.json | ||
| :return: output location | ||
|
|
@@ -523,11 +535,6 @@ def save_mapped_output_json( | |
| mapped_scores=mapped_scores, | ||
| ) | ||
|
|
||
| if not include_vrs_2: | ||
| for m in output.mapped_scores: | ||
| m.pre_mapped_2_0 = None | ||
| m.post_mapped_2_0 = None | ||
|
|
||
| if not output_path: | ||
| now = datetime.datetime.now(tz=datetime.UTC).isoformat() | ||
| output_path = LOCAL_STORE_PATH / f"{urn}_mapping_{now}.json" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we add a message to these raised ValueErrors? Even if we don't expect them to occur the message offers the same thing as a comment but with useful output if it does ever end up popping up for a user.