diff --git a/src/genophenocorr/preprocessing/_test_variant.py b/src/genophenocorr/preprocessing/_test_variant.py index db3c276a..a8dd1652 100644 --- a/src/genophenocorr/preprocessing/_test_variant.py +++ b/src/genophenocorr/preprocessing/_test_variant.py @@ -1,3 +1,6 @@ +import json +import os + import pytest from pkg_resources import resource_filename @@ -126,3 +129,40 @@ def test_cache_from_older_file(oldfile_cache_annotator, pp_vc_finder, variant_an cached_file_results = oldfile_cache_annotator.annotate(var_coords) assert var_anno_results == cached_file_results + +class TestVepFunctionalAnnotator: + + TEST_DATA_DIR = resource_filename(__name__, os.path.join('test_data', 'vep_response')) + + def test__process_item_missense(self, variant_annotator: VepFunctionalAnnotator): + response = self._load_response_json('missense.json') + first = response[0] + + annotations = list(filter(lambda i: i is not None, map(lambda item: variant_annotator._process_item(item), first['transcript_consequences']))) + + ann_by_tx = {ann.transcript_id: ann for ann in annotations} + + assert {'NM_013275.6', 'NM_001256183.2', 'NM_001256182.2'} == set(ann_by_tx.keys()) + + preferred = ann_by_tx['NM_013275.6'] + assert preferred.transcript_id == 'NM_013275.6' + assert preferred.is_preferred == True + assert preferred.hgvsc_id == 'NM_013275.6:c.7407C>G' + assert preferred.variant_effects == ('stop_gained',) + + def test__process_item_deletion(self, variant_annotator: VepFunctionalAnnotator): + response = self._load_response_json('deletion.json') + first = response[0] + + annotations = [variant_annotator._process_item(item) for item in first['transcript_consequences']] + + # TODO - finish + for a in annotations: + if a is not None: + print(a) + print('Done') + + def _load_response_json(self, test_name: str): + response_fpath = os.path.join(self.TEST_DATA_DIR, test_name) + with open(response_fpath) as fh: + return json.load(fh) diff --git a/src/genophenocorr/preprocessing/_variant.py b/src/genophenocorr/preprocessing/_variant.py index a5c6400e..81f8af20 100644 --- a/src/genophenocorr/preprocessing/_variant.py +++ b/src/genophenocorr/preprocessing/_variant.py @@ -6,9 +6,8 @@ import hpotk import requests - -from genophenocorr.model import VariantCoordinates, TranscriptAnnotation, Variant, TranscriptInfoAware, TranscriptCoordinates - +from genophenocorr.model import VariantCoordinates, TranscriptAnnotation, Variant, TranscriptInfoAware, \ + TranscriptCoordinates from ._api import FunctionalAnnotator, ProteinMetadataService, TranscriptCoordinateService @@ -50,22 +49,25 @@ def verify_start_end_coordinates(vc: VariantCoordinates): class VepFunctionalAnnotator(FunctionalAnnotator): - """A class that peforms functional annotation of variant coordinates using Variant Effect Predictor (VEP) REST API. + """A class that performs functional annotation of variant coordinates using Variant Effect Predictor (VEP) REST API. Methods: annotate(variant_coordinates: VariantCoordinates): the variant to annotate. """ - def __init__(self, protein_annotator: ProteinMetadataService): + def __init__(self, protein_annotator: ProteinMetadataService, + include_computational_txs: bool = False): """Constructs all necessary attributes for a VepFunctionalAnnotator object Args: protein_annotator (ProteinMetadataService): A ProteinMetadataService object for ProteinMetadata creation + include_computational_txs (bool): Include computational transcripts, such as RefSEq `XM_`. """ self._logging = logging.getLogger(__name__) self._protein_annotator = protein_annotator self._url = 'https://rest.ensembl.org/vep/human/region/%s?LoF=1&canonical=1&domains=1&hgvs=1' \ '&mutfunc=1&numbers=1&protein=1&refseq=1&mane=1&transcript_version=1&variant_class=1' + self._include_computational_txs = include_computational_txs def annotate(self, variant_coordinates: VariantCoordinates) -> typing.Sequence[TranscriptAnnotation]: """Perform functional annotation using Variant Effect Predictor (VEP) REST API. @@ -79,46 +81,50 @@ def annotate(self, variant_coordinates: VariantCoordinates) -> typing.Sequence[T # canon_tx = None annotations = [] for trans in variant.get('transcript_consequences'): - trans_id = trans.get('transcript_id') - if not trans_id.startswith('NM'): - continue - # TODO - implement - is_preferred = False - # if trans.get('canonical') == 1: - # canon_tx = trans_id - hgvsc_id = trans.get('hgvsc') - consequences = trans.get('consequence_terms') - gene_name = trans.get('gene_symbol') - protein_id = trans.get('protein_id') - protein = self._protein_annotator.annotate(protein_id) - protein_effect_start = trans.get('protein_start') - protein_effect_end = trans.get('protein_end') - if protein_effect_start is None and protein_effect_end is not None: - protein_effect_start = 1 - if protein_effect_end is not None: - protein_effect_end = int(protein_effect_end) - if protein_effect_start is not None: - protein_effect_start = int(protein_effect_start) - exons_effected = trans.get('exon') - if exons_effected is not None: - exons_effected = exons_effected.split('/')[0].split('-') - if len(exons_effected) == 2: - exons_effected = range(int(exons_effected[0]), int(exons_effected[1]) + 1) - exons_effected = [int(x) for x in exons_effected] - annotations.append( - TranscriptAnnotation(gene_name, - trans_id, - hgvsc_id, - is_preferred, - consequences, - exons_effected, - protein, - protein_effect_start, - protein_effect_end) - ) + annotation = self._process_item(trans) + if annotation is not None: + annotations.append(annotation) return annotations + def _process_item(self, item) -> typing.Optional[TranscriptAnnotation]: + """ + Parse one transcript annotation from the JSON response. + """ + trans_id = item.get('transcript_id') + if not self._include_computational_txs and not trans_id.startswith('NM_'): + # Skipping a computational transcript + return None + is_preferred = True if 'canonical' in item and item['canonical'] else False + hgvsc_id = item.get('hgvsc') + consequences = item.get('consequence_terms') + gene_name = item.get('gene_symbol') + protein_id = item.get('protein_id') + protein = self._protein_annotator.annotate(protein_id) + protein_effect_start = item.get('protein_start') + protein_effect_end = item.get('protein_end') + if protein_effect_start is None and protein_effect_end is not None: + protein_effect_start = 1 + if protein_effect_end is not None: + protein_effect_end = int(protein_effect_end) + if protein_effect_start is not None: + protein_effect_start = int(protein_effect_start) + exons_effected = item.get('exon') + if exons_effected is not None: + exons_effected = exons_effected.split('/')[0].split('-') + if len(exons_effected) == 2: + exons_effected = range(int(exons_effected[0]), int(exons_effected[1]) + 1) + exons_effected = [int(x) for x in exons_effected] + return TranscriptAnnotation(gene_name, + trans_id, + hgvsc_id, + is_preferred, + consequences, + exons_effected, + protein, + protein_effect_start, + protein_effect_end) + def _query_vep(self, variant_coordinates) -> dict: api_url = self._url % (verify_start_end_coordinates(variant_coordinates)) r = requests.get(api_url, headers={'Content-Type': 'application/json'}) diff --git a/src/genophenocorr/preprocessing/test_data/vep_response/README.md b/src/genophenocorr/preprocessing/test_data/vep_response/README.md new file mode 100644 index 00000000..c18b1692 --- /dev/null +++ b/src/genophenocorr/preprocessing/test_data/vep_response/README.md @@ -0,0 +1,18 @@ +# VEP responses + +The folder contains JSON files with example VEP responses. + +## `missense.json` + +QUERY: +```shell +curl 'https://rest.ensembl.org/vep/human/region/16:89279135-89279135/C?LoF=1&canonical=1&domains=1&hgvs=1&mutfunc=1&numbers=1&protein=1&refseq=1&mane=1&transcript_version=1&variant_class=1' \ +-H 'Content-type:application/json' | python3 -m json.tool > missense.json +``` + +## `deletion.json` + +```shell +curl 'https://rest.ensembl.org/vep/human/region/16:89279135-89279135/C?LoF=1&canonical=1&domains=1&hgvs=1&mutfunc=1&numbers=1&protein=1&refseq=1&mane=1&transcript_version=1&variant_class=1' \ +-H 'Content-type:application/json' | python3 -m json.tool > deletion.json +``` \ No newline at end of file diff --git a/src/genophenocorr/preprocessing/test_data/vep_response/deletion.json b/src/genophenocorr/preprocessing/test_data/vep_response/deletion.json new file mode 100644 index 00000000..c8944040 --- /dev/null +++ b/src/genophenocorr/preprocessing/test_data/vep_response/deletion.json @@ -0,0 +1,876 @@ +[ + { + "end": 89279135, + "strand": 1, + "transcript_consequences": [ + { + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "cdna_start": 7939, + "transcript_id": "NM_001256182.2", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "exon": "10/14", + "cds_start": 7407, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "NM_001256182.2:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 7939, + "strand": -1, + "used_ref": "G", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "NP_001243111.1", + "gene_id": "29123", + "cds_end": 7407, + "lof": "HC", + "hgvsp": "NP_001243111.1:p.Tyr2469Ter" + }, + { + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "transcript_id": "NM_001256183.2", + "cdna_start": 7865, + "cds_start": 7407, + "exon": "9/13", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "NM_001256183.2:c.7407C>G", + "protein_end": 2469, + "codons": "taC/taG", + "cdna_end": 7865, + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1, + "gene_id": "29123", + "protein_id": "NP_001243112.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "NP_001243112.1:p.Tyr2469Ter", + "cds_end": 7407 + }, + { + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "hgvsc": "NM_013275.6:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "canonical": 1, + "cds_start": 7407, + "exon": "9/13", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "transcript_id": "NM_013275.6", + "cdna_start": 7868, + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "hgvsp": "NP_037407.4:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407, + "mane_select": "ENST00000301030.10", + "gene_id": "29123", + "protein_id": "NP_037407.4", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "used_ref": "G", + "strand": -1, + "cdna_end": 7868, + "codons": "taC/taG", + "gene_symbol": "ANKRD11" + }, + { + "cdna_end": 8051, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1, + "gene_id": "29123", + "protein_id": "XP_011521355.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_011521355.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407, + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "transcript_id": "XM_011523053.3", + "cdna_start": 8051, + "cds_start": 7407, + "exon": "10/14", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "hgvsc": "XM_011523053.3:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ] + }, + { + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_011521359.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.944529456771232,GERP_DIST:62.6038060903549,BP_DIST:431,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_id": "29123", + "protein_id": "XP_011521359.1", + "strand": -1, + "used_ref": "G", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 7868, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_011523057.3:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "exon": "9/13", + "cds_start": 7407, + "transcript_id": "XM_011523057.3", + "cdna_start": 7868, + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH" + }, + { + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "transcript_id": "XM_017023184.2", + "cdna_start": 8051, + "cds_start": 7407, + "exon": "10/14", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_017023184.2:c.7407C>G", + "protein_end": 2469, + "codons": "taC/taG", + "cdna_end": 8051, + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G", + "gene_id": "29123", + "protein_id": "XP_016878673.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "XP_016878673.1:p.Tyr2469Ter", + "cds_end": 7407 + }, + { + "gene_symbol": "ANKRD11", + "cdna_end": 7780, + "codons": "taC/taG", + "used_ref": "G", + "strand": -1, + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_id": "29123", + "protein_id": "XP_016878676.1", + "cds_end": 7407, + "hgvsp": "XP_016878676.1:p.Tyr2469Ter", + "lof": "HC", + "amino_acids": "Y/*", + "protein_start": 2469, + "impact": "HIGH", + "transcript_id": "XM_017023187.2", + "cdna_start": 7780, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "exon": "8/12", + "cds_start": 7407, + "protein_end": 2469, + "hgvsc": "XM_017023187.2:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "biotype": "protein_coding" + }, + { + "gene_id": "29123", + "protein_id": "XP_047289966.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289966.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407, + "cdna_end": 7936, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1, + "cds_start": 7407, + "exon": "10/14", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434010.1:c.7407C>G", + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "transcript_id": "XM_047434010.1", + "cdna_start": 7936 + }, + { + "used_ref": "G", + "strand": -1, + "codons": "taC/taG", + "cdna_end": 7687, + "gene_symbol": "ANKRD11", + "lof": "HC", + "hgvsp": "XP_047289967.1:p.Tyr2469Ter", + "cds_end": 7407, + "gene_id": "29123", + "protein_id": "XP_047289967.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "transcript_id": "XM_047434011.1", + "cdna_start": 7687, + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434011.1:c.7407C>G", + "protein_end": 2469, + "cds_start": 7407, + "exon": "9/13" + }, + { + "strand": -1, + "used_ref": "G", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 8077, + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289968.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289968.1", + "gene_id": "29123", + "cdna_start": 8077, + "transcript_id": "XM_047434012.1", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434012.1:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "exon": "10/14", + "cds_start": 7407 + }, + { + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289970.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289970.1", + "gene_id": "29123", + "strand": -1, + "used_ref": "G", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 8028, + "hgvsc": "XM_047434014.1:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "exon": "10/14", + "cds_start": 7407, + "transcript_id": "XM_047434014.1", + "cdna_start": 8028, + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH" + }, + { + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "cdna_start": 8150, + "transcript_id": "XM_047434015.1", + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "biotype": "protein_coding", + "variant_allele": "C", + "hgvsc": "XM_047434015.1:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "protein_end": 2469, + "cds_start": 7407, + "exon": "10/14", + "strand": -1, + "used_ref": "G", + "codons": "taC/taG", + "cdna_end": 8150, + "gene_symbol": "ANKRD11", + "lof": "HC", + "hgvsp": "XP_047289971.1:p.Tyr2469Ter", + "cds_end": 7407, + "protein_id": "XP_047289971.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT" + }, + { + "cds_end": 7407, + "hgvsp": "XP_047289972.1:p.Tyr2469Ter", + "lof": "HC", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289972.1", + "gene_id": "29123", + "used_ref": "G", + "strand": -1, + "gene_symbol": "ANKRD11", + "cdna_end": 7870, + "codons": "taC/taG", + "protein_end": 2469, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434016.1:c.7407C>G", + "variant_allele": "C", + "biotype": "protein_coding", + "exon": "10/14", + "cds_start": 7407, + "transcript_id": "XM_047434016.1", + "cdna_start": 7870, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "amino_acids": "Y/*", + "protein_start": 2469, + "impact": "HIGH" + }, + { + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "cdna_start": 7854, + "transcript_id": "XM_047434017.1", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "exon": "8/12", + "cds_start": 7407, + "hgvsc": "XM_047434017.1:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 7854, + "used_ref": "G", + "strand": -1, + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_id": "29123", + "protein_id": "XP_047289973.1", + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289973.1:p.Tyr2469Ter" + }, + { + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "transcript_id": "XM_047434018.1", + "cdna_start": 12847, + "cds_start": 7407, + "exon": "11/15", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "hgvsc": "XM_047434018.1:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "cdna_end": 12847, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G", + "protein_id": "XP_047289974.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289974.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407 + }, + { + "gene_id": "29123", + "protein_id": "XP_047289975.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289975.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407, + "cdna_end": 12851, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G", + "cds_start": 7407, + "exon": "11/15", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434019.1:c.7407C>G", + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "cdna_start": 12851, + "transcript_id": "XM_047434019.1" + }, + { + "cds_start": 7407, + "exon": "12/16", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434020.1:c.7407C>G", + "protein_end": 2469, + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "transcript_id": "XM_047434020.1", + "cdna_start": 12918, + "gene_id": "29123", + "protein_id": "XP_047289976.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "XP_047289976.1:p.Tyr2469Ter", + "cds_end": 7407, + "codons": "taC/taG", + "cdna_end": 12918, + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G" + }, + { + "codons": "taC/taG", + "cdna_end": 11486, + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1, + "gene_id": "29123", + "protein_id": "XP_047289977.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "XP_047289977.1:p.Tyr2469Ter", + "cds_end": 7407, + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "cdna_start": 11486, + "transcript_id": "XM_047434021.1", + "cds_start": 7407, + "exon": "11/15", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434021.1:c.7407C>G", + "protein_end": 2469 + }, + { + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_id": "29123", + "protein_id": "XP_047289978.1", + "cds_end": 7407, + "hgvsp": "XP_047289978.1:p.Tyr2469Ter", + "lof": "HC", + "gene_symbol": "ANKRD11", + "cdna_end": 11483, + "codons": "taC/taG", + "strand": -1, + "used_ref": "G", + "exon": "11/15", + "cds_start": 7407, + "protein_end": 2469, + "hgvsc": "XM_047434022.1:c.7407C>G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "biotype": "protein_coding", + "amino_acids": "Y/*", + "protein_start": 2469, + "impact": "HIGH", + "transcript_id": "XM_047434022.1", + "cdna_start": 11483, + "gene_symbol_source": "EntrezGene", + "given_ref": "G" + }, + { + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434023.1:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "exon": "10/14", + "cds_start": 7407, + "cdna_start": 7982, + "transcript_id": "XM_047434023.1", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289979.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_id": "29123", + "protein_id": "XP_047289979.1", + "used_ref": "G", + "strand": -1, + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 7982 + }, + { + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "cdna_start": 8180, + "transcript_id": "XM_047434024.1", + "cds_start": 7407, + "exon": "11/15", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434024.1:c.7407C>G", + "cdna_end": 8180, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1, + "gene_id": "29123", + "protein_id": "XP_047289980.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289980.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407 + }, + { + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289981.1", + "gene_id": "29123", + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289981.1:p.Tyr2469Ter", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 7979, + "used_ref": "G", + "strand": -1, + "exon": "10/14", + "cds_start": 7407, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434025.1:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "cdna_start": 7979, + "transcript_id": "XM_047434025.1", + "given_ref": "G", + "gene_symbol_source": "EntrezGene" + }, + { + "protein_id": "XP_047289982.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289982.1:p.Tyr2469Ter", + "lof": "HC", + "cds_end": 7407, + "cdna_end": 11860, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G", + "cds_start": 7407, + "exon": "13/17", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2469, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434026.1:c.7407C>G", + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "transcript_id": "XM_047434026.1", + "cdna_start": 11860 + }, + { + "cds_start": 7407, + "exon": "10/14", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434027.1:c.7407C>G", + "protein_end": 2469, + "impact": "HIGH", + "protein_start": 2469, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "cdna_start": 8080, + "transcript_id": "XM_047434027.1", + "protein_id": "XP_047289983.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "XP_047289983.1:p.Tyr2469Ter", + "cds_end": 7407, + "codons": "taC/taG", + "cdna_end": 8080, + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1 + }, + { + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434028.1:c.7407C>G", + "protein_end": 2469, + "biotype": "protein_coding", + "variant_allele": "C", + "exon": "9/13", + "cds_start": 7407, + "transcript_id": "XM_047434028.1", + "cdna_start": 8970, + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "amino_acids": "Y/*", + "impact": "HIGH", + "cds_end": 7407, + "lof": "HC", + "hgvsp": "XP_047289984.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289984.1", + "gene_id": "29123", + "strand": -1, + "used_ref": "G", + "gene_symbol": "ANKRD11", + "codons": "taC/taG", + "cdna_end": 8970 + }, + { + "lof": "HC", + "hgvsp": "XP_047289985.1:p.Tyr2435Ter", + "cds_end": 7305, + "protein_id": "XP_047289985.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "used_ref": "G", + "strand": -1, + "codons": "taC/taG", + "cdna_end": 7780, + "gene_symbol": "ANKRD11", + "biotype": "protein_coding", + "variant_allele": "C", + "hgvsc": "XM_047434029.1:c.7305C>G", + "consequence_terms": [ + "stop_gained" + ], + "protein_end": 2435, + "cds_start": 7305, + "exon": "9/13", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "transcript_id": "XM_047434029.1", + "cdna_start": 7780, + "impact": "HIGH", + "protein_start": 2435, + "amino_acids": "Y/*" + }, + { + "impact": "HIGH", + "protein_start": 2435, + "amino_acids": "Y/*", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "cdna_start": 7895, + "transcript_id": "XM_047434030.1", + "cds_start": 7305, + "exon": "10/14", + "biotype": "protein_coding", + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434030.1:c.7305C>G", + "protein_end": 2435, + "codons": "taC/taG", + "cdna_end": 7895, + "gene_symbol": "ANKRD11", + "strand": -1, + "used_ref": "G", + "protein_id": "XP_047289986.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "hgvsp": "XP_047289986.1:p.Tyr2435Ter", + "cds_end": 7305 + }, + { + "cds_start": 7305, + "exon": "9/13", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2435, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434031.1:c.7305C>G", + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2435, + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "cdna_start": 7593, + "transcript_id": "XM_047434031.1", + "gene_id": "29123", + "protein_id": "XP_047289987.1", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289987.1:p.Tyr2435Ter", + "lof": "HC", + "cds_end": 7305, + "cdna_end": 7593, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "strand": -1 + }, + { + "hgvsp": "XP_047289988.1:p.Tyr2426Ter", + "lof": "HC", + "cds_end": 7278, + "protein_id": "XP_047289988.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.925600915681038,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "strand": -1, + "used_ref": "G", + "cdna_end": 7802, + "codons": "taC/taG", + "gene_symbol": "ANKRD11", + "variant_allele": "C", + "biotype": "protein_coding", + "protein_end": 2426, + "consequence_terms": [ + "stop_gained" + ], + "hgvsc": "XM_047434032.1:c.7278C>G", + "cds_start": 7278, + "exon": "9/13", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "cdna_start": 7802, + "transcript_id": "XM_047434032.1", + "impact": "HIGH", + "amino_acids": "Y/*", + "protein_start": 2426 + } + ], + "start": 89279135, + "colocated_variants": [ + { + "somatic": 1, + "end": 89279135, + "start": 89279135, + "var_synonyms": { + "COSMIC": [ + "COSM9359179" + ] + }, + "id": "COSV104615518", + "phenotype_or_disease": 1, + "seq_region_name": "16", + "allele_string": "COSMIC_MUTATION", + "strand": 1 + } + ], + "most_severe_consequence": "stop_gained", + "seq_region_name": "16", + "variant_class": "SNV", + "id": "16_89279135_G/C", + "assembly_name": "GRCh38", + "input": "16 89279135 89279135 G/C 1", + "allele_string": "G/C" + } +] diff --git a/src/genophenocorr/preprocessing/test_data/vep_response/missense.json b/src/genophenocorr/preprocessing/test_data/vep_response/missense.json new file mode 100644 index 00000000..85b3df22 --- /dev/null +++ b/src/genophenocorr/preprocessing/test_data/vep_response/missense.json @@ -0,0 +1,876 @@ +[ + { + "variant_class": "SNV", + "end": 89279135, + "input": "16 89279135 89279135 G/C 1", + "id": "16_89279135_G/C", + "start": 89279135, + "transcript_consequences": [ + { + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407, + "given_ref": "G", + "strand": -1, + "cdna_start": 7939, + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH", + "hgvsc": "NM_001256182.2:c.7407C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "protein_start": 2469, + "exon": "10/14", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "NM_001256182.2", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "NP_001243111.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "NP_001243111.1:p.Tyr2469Ter", + "lof": "HC", + "cdna_end": 7939, + "codons": "taC/taG" + }, + { + "biotype": "protein_coding", + "exon": "9/13", + "transcript_id": "NM_001256183.2", + "gene_id": "29123", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "hgvsp": "NP_001243112.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "NP_001243112.1", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7865, + "variant_allele": "C", + "cds_end": 7407, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "cdna_start": 7865, + "strand": -1, + "given_ref": "G", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "NM_001256183.2:c.7407C>G" + }, + { + "biotype": "protein_coding", + "exon": "9/13", + "transcript_id": "NM_013275.6", + "gene_id": "29123", + "protein_start": 2469, + "hgvsp": "NP_037407.4:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "mane_select": "ENST00000301030.10", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "NP_037407.4", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7868, + "variant_allele": "C", + "cds_end": 7407, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cdna_start": 7868, + "strand": -1, + "given_ref": "G", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "gene_symbol": "ANKRD11", + "canonical": 1, + "amino_acids": "Y/*", + "hgvsc": "NM_013275.6:c.7407C>G" + }, + { + "protein_start": 2469, + "transcript_id": "XM_011523053.3", + "gene_id": "29123", + "biotype": "protein_coding", + "exon": "10/14", + "protein_id": "XP_011521355.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_011521355.1:p.Tyr2469Ter", + "lof": "HC", + "cdna_end": 8051, + "codons": "taC/taG", + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "given_ref": "G", + "cdna_start": 8051, + "strand": -1, + "impact": "HIGH", + "cds_start": 7407, + "protein_end": 2469, + "hgvsc": "XM_011523053.3:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11" + }, + { + "biotype": "protein_coding", + "exon": "9/13", + "gene_id": "29123", + "transcript_id": "XM_011523057.3", + "protein_start": 2469, + "hgvsp": "XP_011521359.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "lof_info": "PERCENTILE:0.944529456771232,GERP_DIST:62.6038060903549,BP_DIST:431,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_011521359.1", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7868, + "variant_allele": "C", + "cds_end": 7407, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "strand": -1, + "cdna_start": 7868, + "given_ref": "G", + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "XM_011523057.3:c.7407C>G" + }, + { + "codons": "taC/taG", + "cdna_end": 8051, + "lof": "HC", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_016878673.1:p.Tyr2469Ter", + "protein_id": "XP_016878673.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "transcript_id": "XM_017023184.2", + "gene_id": "29123", + "exon": "10/14", + "biotype": "protein_coding", + "protein_start": 2469, + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "hgvsc": "XM_017023184.2:c.7407C>G", + "impact": "HIGH", + "cds_start": 7407, + "protein_end": 2469, + "strand": -1, + "cdna_start": 8051, + "given_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ] + }, + { + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_016878676.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_016878676.1:p.Tyr2469Ter", + "protein_start": 2469, + "exon": "8/12", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_017023187.2", + "cdna_end": 7780, + "codons": "taC/taG", + "lof": "HC", + "given_ref": "G", + "cdna_start": 7780, + "strand": -1, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "variant_allele": "C", + "cds_end": 7407, + "hgvsc": "XM_017023187.2:c.7407C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH" + }, + { + "variant_allele": "C", + "cds_end": 7407, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "strand": -1, + "cdna_start": 7936, + "given_ref": "G", + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "hgvsc": "XM_047434010.1:c.7407C>G", + "biotype": "protein_coding", + "exon": "10/14", + "transcript_id": "XM_047434010.1", + "gene_id": "29123", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289966.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289966.1", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7936 + }, + { + "protein_id": "XP_047289967.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289967.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "transcript_id": "XM_047434011.1", + "gene_id": "29123", + "exon": "9/13", + "biotype": "protein_coding", + "cdna_end": 7687, + "codons": "taC/taG", + "lof": "HC", + "given_ref": "G", + "strand": -1, + "cdna_start": 7687, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "cds_end": 7407, + "variant_allele": "C", + "hgvsc": "XM_047434011.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "impact": "HIGH", + "cds_start": 7407, + "protein_end": 2469 + }, + { + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289968.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289968.1", + "exon": "10/14", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434012.1", + "protein_start": 2469, + "codons": "taC/taG", + "cdna_end": 8077, + "lof": "HC", + "strand": -1, + "cdna_start": 8077, + "given_ref": "G", + "variant_allele": "C", + "cds_end": 7407, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "hgvsc": "XM_047434012.1:c.7407C>G", + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH" + }, + { + "transcript_id": "XM_047434014.1", + "gene_id": "29123", + "exon": "10/14", + "biotype": "protein_coding", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289970.1:p.Tyr2469Ter", + "protein_id": "XP_047289970.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 8028, + "cds_end": 7407, + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cdna_start": 8028, + "strand": -1, + "given_ref": "G", + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469, + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "XM_047434014.1:c.7407C>G" + }, + { + "hgvsc": "XM_047434015.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "impact": "HIGH", + "cds_start": 7407, + "protein_end": 2469, + "given_ref": "G", + "cdna_start": 8150, + "strand": -1, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "cds_end": 7407, + "variant_allele": "C", + "cdna_end": 8150, + "codons": "taC/taG", + "lof": "HC", + "protein_id": "XP_047289971.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289971.1:p.Tyr2469Ter", + "protein_start": 2469, + "gene_id": "29123", + "transcript_id": "XM_047434015.1", + "exon": "10/14", + "biotype": "protein_coding" + }, + { + "hgvsc": "XM_047434016.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "given_ref": "G", + "strand": -1, + "cdna_start": 7870, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "variant_allele": "C", + "cds_end": 7407, + "cdna_end": 7870, + "codons": "taC/taG", + "lof": "HC", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289972.1", + "hgvsp": "XP_047289972.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "exon": "10/14", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434016.1" + }, + { + "lof": "HC", + "cdna_end": 7854, + "codons": "taC/taG", + "protein_start": 2469, + "biotype": "protein_coding", + "exon": "8/12", + "transcript_id": "XM_047434017.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289973.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289973.1:p.Tyr2469Ter", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "hgvsc": "XM_047434017.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407, + "given_ref": "G", + "cdna_start": 7854, + "strand": -1 + }, + { + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407, + "given_ref": "G", + "cdna_start": 12847, + "strand": -1, + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "hgvsc": "XM_047434018.1:c.7407C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "protein_start": 2469, + "biotype": "protein_coding", + "exon": "11/15", + "transcript_id": "XM_047434018.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289974.1", + "hgvsp": "XP_047289974.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "lof": "HC", + "cdna_end": 12847, + "codons": "taC/taG" + }, + { + "strand": -1, + "cdna_start": 12851, + "given_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "XM_047434019.1:c.7407C>G", + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469, + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289975.1:p.Tyr2469Ter", + "protein_id": "XP_047289975.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "transcript_id": "XM_047434019.1", + "gene_id": "29123", + "exon": "11/15", + "biotype": "protein_coding", + "protein_start": 2469, + "codons": "taC/taG", + "cdna_end": 12851, + "lof": "HC" + }, + { + "given_ref": "G", + "cdna_start": 12918, + "strand": -1, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "hgvsc": "XM_047434020.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469, + "protein_id": "XP_047289976.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289976.1:p.Tyr2469Ter", + "protein_start": 2469, + "transcript_id": "XM_047434020.1", + "gene_id": "29123", + "exon": "12/16", + "biotype": "protein_coding", + "cdna_end": 12918, + "codons": "taC/taG", + "lof": "HC" + }, + { + "codons": "taC/taG", + "cdna_end": 11486, + "lof": "HC", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289977.1:p.Tyr2469Ter", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289977.1", + "exon": "11/15", + "biotype": "protein_coding", + "transcript_id": "XM_047434021.1", + "gene_id": "29123", + "protein_start": 2469, + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "hgvsc": "XM_047434021.1:c.7407C>G", + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH", + "strand": -1, + "cdna_start": 11486, + "given_ref": "G", + "variant_allele": "C", + "cds_end": 7407, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ] + }, + { + "cdna_end": 11483, + "codons": "taC/taG", + "lof": "HC", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289978.1", + "hgvsp": "XP_047289978.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "biotype": "protein_coding", + "exon": "11/15", + "gene_id": "29123", + "transcript_id": "XM_047434022.1", + "hgvsc": "XM_047434022.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "given_ref": "G", + "strand": -1, + "cdna_start": 11483, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407 + }, + { + "given_ref": "G", + "strand": -1, + "cdna_start": 7982, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407, + "hgvsc": "XM_047434023.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "protein_end": 2469, + "impact": "HIGH", + "cds_start": 7407, + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289979.1", + "hgvsp": "XP_047289979.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "exon": "10/14", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434023.1", + "cdna_end": 7982, + "codons": "taC/taG", + "lof": "HC" + }, + { + "hgvsc": "XM_047434024.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "impact": "HIGH", + "cds_start": 7407, + "protein_end": 2469, + "given_ref": "G", + "cdna_start": 8180, + "strand": -1, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "cdna_end": 8180, + "codons": "taC/taG", + "lof": "HC", + "protein_id": "XP_047289980.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289980.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "transcript_id": "XM_047434024.1", + "gene_id": "29123", + "exon": "11/15", + "biotype": "protein_coding" + }, + { + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469, + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "XM_047434025.1:c.7407C>G", + "cds_end": 7407, + "variant_allele": "C", + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "strand": -1, + "cdna_start": 7979, + "given_ref": "G", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7979, + "gene_id": "29123", + "transcript_id": "XM_047434025.1", + "biotype": "protein_coding", + "exon": "10/14", + "protein_start": 2469, + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289981.1:p.Tyr2469Ter", + "protein_id": "XP_047289981.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT" + }, + { + "protein_start": 2469, + "biotype": "protein_coding", + "exon": "13/17", + "transcript_id": "XM_047434026.1", + "gene_id": "29123", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289982.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289982.1:p.Tyr2469Ter", + "lof": "HC", + "cdna_end": 11860, + "codons": "taC/taG", + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7407, + "given_ref": "G", + "strand": -1, + "cdna_start": 11860, + "protein_end": 2469, + "cds_start": 7407, + "impact": "HIGH", + "hgvsc": "XM_047434026.1:c.7407C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*" + }, + { + "protein_id": "XP_047289983.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289983.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "protein_start": 2469, + "gene_id": "29123", + "transcript_id": "XM_047434027.1", + "biotype": "protein_coding", + "exon": "10/14", + "cdna_end": 8080, + "codons": "taC/taG", + "lof": "HC", + "given_ref": "G", + "cdna_start": 8080, + "strand": -1, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "cds_end": 7407, + "variant_allele": "C", + "hgvsc": "XM_047434027.1:c.7407C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469 + }, + { + "lof": "HC", + "cdna_end": 8970, + "codons": "taC/taG", + "protein_start": 2469, + "transcript_id": "XM_047434028.1", + "gene_id": "29123", + "exon": "9/13", + "biotype": "protein_coding", + "protein_id": "XP_047289984.1", + "lof_info": "PERCENTILE:0.926801801801802,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "hgvsp": "XP_047289984.1:p.Tyr2469Ter", + "gene_symbol_source": "EntrezGene", + "cds_start": 7407, + "impact": "HIGH", + "protein_end": 2469, + "hgvsc": "XM_047434028.1:c.7407C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "cds_end": 7407, + "variant_allele": "C", + "given_ref": "G", + "cdna_start": 8970, + "strand": -1 + }, + { + "cdna_end": 7780, + "codons": "taC/taG", + "lof": "HC", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289985.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289985.1:p.Tyr2435Ter", + "protein_start": 2435, + "exon": "9/13", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434029.1", + "hgvsc": "XM_047434029.1:c.7305C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "protein_end": 2435, + "cds_start": 7305, + "impact": "HIGH", + "given_ref": "G", + "strand": -1, + "cdna_start": 7780, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7305 + }, + { + "hgvsc": "XM_047434030.1:c.7305C>G", + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "protein_end": 2435, + "cds_start": 7305, + "impact": "HIGH", + "given_ref": "G", + "strand": -1, + "cdna_start": 7895, + "used_ref": "G", + "consequence_terms": [ + "stop_gained" + ], + "variant_allele": "C", + "cds_end": 7305, + "cdna_end": 7895, + "codons": "taC/taG", + "lof": "HC", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289986.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289986.1:p.Tyr2435Ter", + "protein_start": 2435, + "biotype": "protein_coding", + "exon": "10/14", + "gene_id": "29123", + "transcript_id": "XM_047434030.1" + }, + { + "hgvsc": "XM_047434031.1:c.7305C>G", + "amino_acids": "Y/*", + "gene_symbol": "ANKRD11", + "protein_end": 2435, + "cds_start": 7305, + "impact": "HIGH", + "given_ref": "G", + "cdna_start": 7593, + "strand": -1, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "variant_allele": "C", + "cds_end": 7305, + "cdna_end": 7593, + "codons": "taC/taG", + "lof": "HC", + "lof_info": "PERCENTILE:0.925855513307985,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289987.1", + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289987.1:p.Tyr2435Ter", + "protein_start": 2435, + "exon": "9/13", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434031.1" + }, + { + "variant_allele": "C", + "cds_end": 7278, + "consequence_terms": [ + "stop_gained" + ], + "used_ref": "G", + "strand": -1, + "cdna_start": 7802, + "given_ref": "G", + "protein_end": 2426, + "impact": "HIGH", + "cds_start": 7278, + "gene_symbol": "ANKRD11", + "amino_acids": "Y/*", + "hgvsc": "XM_047434032.1:c.7278C>G", + "exon": "9/13", + "biotype": "protein_coding", + "gene_id": "29123", + "transcript_id": "XM_047434032.1", + "protein_start": 2426, + "gene_symbol_source": "EntrezGene", + "hgvsp": "XP_047289988.1:p.Tyr2426Ter", + "lof_info": "PERCENTILE:0.925600915681038,GERP_DIST:-4.79979183673861,BP_DIST:581,DIST_FROM_LAST_EXON:396,50_BP_RULE:PASS,PHYLOCSF_TOO_SHORT", + "protein_id": "XP_047289988.1", + "lof": "HC", + "codons": "taC/taG", + "cdna_end": 7802 + } + ], + "assembly_name": "GRCh38", + "colocated_variants": [ + { + "phenotype_or_disease": 1, + "var_synonyms": { + "COSMIC": [ + "COSM9359179" + ] + }, + "start": 89279135, + "id": "COSV104615518", + "strand": 1, + "seq_region_name": "16", + "allele_string": "COSMIC_MUTATION", + "somatic": 1, + "end": 89279135 + } + ], + "seq_region_name": "16", + "strand": 1, + "allele_string": "G/C", + "most_severe_consequence": "stop_gained" + } +]