Skip to content

Commit

Permalink
added variant check routs & heredicare consensus classifications are …
Browse files Browse the repository at this point in the history
…now shown in classification symbol
  • Loading branch information
MarvinDo committed Oct 18, 2023
1 parent c4ff099 commit b5d14f8
Show file tree
Hide file tree
Showing 12 changed files with 548 additions and 102 deletions.
133 changes: 111 additions & 22 deletions src/common/db_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,14 +729,14 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use
if page_size != 'unlimited':
command = command + " LIMIT %s, %s"
actual_information += (offset, page_size)
print(command % actual_information)
#print(command % actual_information)
self.cursor.execute(command, actual_information)
variants_raw = self.cursor.fetchall()

# get variant objects
variants = []
for variant_raw in variants_raw:
variant = self.get_variant(variant_id=variant_raw[0], include_annotations = False, include_heredicare_classifications = False, include_clinvar = False, include_assays = False, include_literature = False)
variant = self.get_variant(variant_id=variant_raw[0], include_annotations = False, include_heredicare_classifications = True, include_clinvar = False, include_assays = False, include_literature = False)
variants.append(variant)

if page_size == 'unlimited':
Expand Down Expand Up @@ -1708,7 +1708,6 @@ def get_variant(self, variant_id,
is_hidden = True if variant_raw[5] == 1 else False

annotations = None
all_heredicare_annotations = None
if include_annotations:
annotations = models.AllAnnotations()
annotations_raw = self.get_recent_annotations(variant_id)
Expand All @@ -1729,23 +1728,6 @@ def get_variant(self, variant_id,

annotations.flag_linked_annotations()

heredicare_annotations_raw = self.get_heredicare_annotations(variant_id)

all_heredicare_annotations = []
for annot in heredicare_annotations_raw:
#id, vid, n_fam, n_pat, consensus_class, comment, date
heredicare_annotation_id = annot[0]
vid = annot[1]
n_fam = annot[2]
n_pat = annot[3]
consensus_class = annot[4]
comment = annot[5]
classification_date = annot[6]

classification = models.HeredicareClassification(id = heredicare_annotation_id, selected_class = consensus_class, comment = comment, classification_date = classification_date, center = "VUSTF", vid = vid)
new_heredicare_annotation = models.HeredicareAnnotation(id = heredicare_annotation_id, vid = vid, n_fam = n_fam, n_pat = n_pat, vustf_classification = classification)
all_heredicare_annotations.append(new_heredicare_annotation)


# add all consensus classifications
consensus_classifications = None
Expand Down Expand Up @@ -1857,6 +1839,7 @@ def get_variant(self, variant_id,
user_classifications.append(new_user_classification)

heredicare_classifications = None
all_heredicare_annotations = None
if include_heredicare_classifications:
heredicare_classifications_raw = self.get_heredicare_center_classifications(variant_id)
if heredicare_classifications_raw is not None:
Expand All @@ -1869,6 +1852,23 @@ def get_variant(self, variant_id,
date = cl_raw[5].strftime('%Y-%m-%d')
new_heredicare_classification = models.HeredicareClassification(id = id, selected_class = selected_class, comment = comment, center = center, classification_date = date, vid="")
heredicare_classifications.append(new_heredicare_classification)


heredicare_annotations_raw = self.get_heredicare_annotations(variant_id)
all_heredicare_annotations = []
for annot in heredicare_annotations_raw:
#id, vid, n_fam, n_pat, consensus_class, comment, date
heredicare_annotation_id = annot[0]
vid = annot[1]
n_fam = annot[2]
n_pat = annot[3]
consensus_class = annot[4]
comment = annot[5]
classification_date = annot[6]

classification = models.HeredicareClassification(id = heredicare_annotation_id, selected_class = consensus_class, comment = comment, classification_date = classification_date, center = "VUSTF", vid = vid)
new_heredicare_annotation = models.HeredicareAnnotation(id = heredicare_annotation_id, vid = vid, n_fam = n_fam, n_pat = n_pat, vustf_classification = classification)
all_heredicare_annotations.append(new_heredicare_annotation)

# add clinvar annotation
clinvar = None
Expand Down Expand Up @@ -2270,7 +2270,7 @@ def clear_heredicare_annotation(self, variant_id):
self.conn.commit()

def get_enumtypes(self, tablename, columnname):
allowed_tablenames = ["consensus_classification", "user_classification"]
allowed_tablenames = ["consensus_classification", "user_classification", "variant"]
if tablename in allowed_tablenames:
command = "SHOW COLUMNS FROM " + tablename + " WHERE FIELD = %s"
else:
Expand All @@ -2281,4 +2281,93 @@ def get_enumtypes(self, tablename, columnname):
column_type = column_type.strip('enum()')
allowed_enum = column_type.split(',')
allowed_enum = [x.strip('\'') for x in allowed_enum]
return allowed_enum
return allowed_enum



def get_gencode_basic_transcripts(self, gene_id):
if gene_id is None:
return None
command = "SELECT name FROM transcript WHERE gene_id = %s AND (is_gencode_basic=1 or is_mane_select=1 or is_mane_plus_clinical=1)"
self.cursor.execute(command, (gene_id, ))
result = self.cursor.fetchall()
return [x[0] for x in result if x[0].startswith("ENST")]



# this function returns a list of consequence objects of the preferred transcripts
# (can be multiple if there are eg. 2 mane select transcripts for this variant)
def get_preferred_transcripts(self, gene_id, return_all=False):
result = []
command = "SELECT name, biotype, length, is_gencode_basic, is_mane_select, is_mane_plus_clinical, is_ensembl_canonical FROM transcript WHERE gene_id = %s"
self.cursor.execute(command, (gene_id, ))
result_raw = self.cursor.fetchall()
transcripts = []
for elem in result_raw:
if elem[0].startswith("ENST"):
source = "ensembl" if elem[0].startswith("ENST") else "refseq"
new_elem = {"name": elem[0],
"biotype": elem[1],
"length": elem[2],
"is_gencode_basic": elem[3],
"is_mane_select": elem[4],
"is_mane_plus_clinical": elem[5],
"is_ensembl_canonical": elem[6],
"source": source
}
transcripts.append(new_elem)

if len(transcripts) > 0:
transcripts = self.order_transcripts(transcripts)

if not return_all:
result.append(transcripts.pop(0)) # always append the first one

for transcript in transcripts: # scan for all mane select transcripts
if transcript["is_mane_select"]:
result.append(transcript)
else:
break # we can do this because the list is sorted
else:
result = transcripts
else: # the variant does not have any consequences
return None
return result

def order_transcripts(self, consequences):
keyfunc = cmp_to_key(mycmp = self.sort_transcripts)
consequences.sort(key = keyfunc) # sort by preferred transcript
return consequences

def sort_transcripts(self, a, b):
# sort by ensembl/refseq
if a["source"] == 'ensembl' and b["source"] == 'refseq':
return -1
elif a["source"] == 'refseq' and b["source"] == 'ensembl':
return 1
elif a["source"] == b["source"]:

# sort by mane select
if a["is_mane_select"] is None or b["is_mane_select"] is None:
return 1
elif a["is_mane_select"] and not b["is_mane_select"]:
return -1
elif not a["is_mane_select"] and b["is_mane_select"]:
return 1
elif a["is_mane_select"] == b["is_mane_select"]:

# sort by biotype
if a["biotype"] == 'protein coding' and b["biotype"] != 'protein coding':
return -1
elif a["biotype"] != 'protein coding' and b["biotype"] == 'protein coding':
return 1
elif (a["biotype"] != 'protein coding' and b["biotype"] != 'protein coding') or (a["biotype"] == 'protein coding' and b["biotype"] == 'protein coding'):

# sort by length
if a["length"] > b["length"]:
return -1
elif a["length"] < b["length"]:
return 1
else:
return 0

68 changes: 59 additions & 9 deletions src/common/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,55 @@ def left_align_vcf(infile, outfile, ref_genome = 'GRCh38'):

return returncode, err_msg, command_output


def hgvsc_to_vcf(hgvs, reference = None):
## DEPRECATED
#def hgvsc_to_vcf(hgvs, reference = None):
# #tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf"
# tmp_file_path = get_random_temp_file("_hgvs2vcf")
# tmp_file = open(tmp_file_path + ".tsv", "w")
# tmp_file.write("#reference hgvs_c\n")
# if reference is None:
# reference, hgvs = split_hgvs(hgvs)
# tmp_file.write(reference + "\t" + hgvs + "\n")
# tmp_file.close()
#
# command = [os.path.join(paths.ngs_bits_path, "HgvsToVcf")]
# command.extend(['-in', tmp_file_path + '.tsv', '-ref', paths.ref_genome_path, '-out', tmp_file_path + '.vcf'])
# returncode, err_msg, command_output = execute_command(command, "HgvsToVcf", use_prefix_error_log=False)
#
# chr = None
# pos = None
# ref = None
# alt = None
# tmp_file = open(tmp_file_path + '.vcf', "r")
# for line in tmp_file: # this assumes a single-entry vcf
# if line.strip() == '' or line.startswith('#'):
# continue
# parts = line.split('\t')
# chr = parts[0]
# pos = parts[1]
# ref = parts[3]
# alt = parts[4]
#
#
# rm(tmp_file_path + ".tsv")
# rm(tmp_file_path + ".vcf")
# return chr, pos, ref, alt, err_msg


def hgvsc_to_vcf(hgvs_strings, references = None):
#tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf"
tmp_file_path = get_random_temp_file("_hgvs2vcf")
tmp_file = open(tmp_file_path + ".tsv", "w")
tmp_file.write("#reference hgvs_c\n")
if reference is None:
reference, hgvs = split_hgvs(hgvs)
tmp_file.write(reference + "\t" + hgvs + "\n")
if references is None:
references = []
for hgvs in hgvs_strings:
reference, hgvs = split_hgvs(hgvs)
references.append(reference)
tmp_file.write(reference + "\t" + hgvs + "\n")
else:
for hgvs, reference in zip(hgvs_strings, references):
tmp_file.write(reference + "\t" + hgvs + "\n")
tmp_file.close()

command = [os.path.join(paths.ngs_bits_path, "HgvsToVcf")]
Expand All @@ -295,21 +335,31 @@ def hgvsc_to_vcf(hgvs, reference = None):
pos = None
ref = None
alt = None
first_iter = True
tmp_file = open(tmp_file_path + '.vcf', "r")
for line in tmp_file: # this assumes a single-entry vcf
if line.strip() == '' or line.startswith('#'):
continue
parts = line.split('\t')
chr = parts[0]
pos = parts[1]
ref = parts[3]
alt = parts[4]

#print(parts)

if first_iter:
chr = parts[0]
pos = parts[1]
ref = parts[3]
alt = parts[4]
first_iter = False
else:
if chr != parts[0] or pos != parts[1] or ref != parts[3] or alt != parts[4]: # check that all are equal
return None, None, None, None, "HGVSc recovered vcf-style variant among transcripts is unequal: " + str(references)


rm(tmp_file_path + ".tsv")
rm(tmp_file_path + ".vcf")
return chr, pos, ref, alt, err_msg


# function for splitting hgvs in refrence transcript and variant
def split_hgvs(hgvs):
hgvs = hgvs.strip()
Expand Down
42 changes: 33 additions & 9 deletions src/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,15 +627,6 @@ def get_heredicare_consensus_classifications(self):
if heredicare_annotation.vustf_classification.selected_class is not None:
result.append(heredicare_annotation.vustf_classification)
return result

def get_heredicare_consensus_classification_severeity(self):
result = []
for heredicare_annotation in self.heredicare_annotations:
current_classification = heredicare_annotation.vustf_classification
if current_classification.selected_class is not None:
class_num = current_classification.selected_class_to_num()
result.append(class_num)
return list(set(result))


def get_total_heredicare_counts(self):
Expand Down Expand Up @@ -718,6 +709,38 @@ def get_user_classifications(self, user_id):
def to_json(self):
return json.dumps(asdict(self))


def get_most_recent_heredicare_consensus_classification(self):
result = None
if self.heredicare_annotations is None:
return None
for heredicare_annotation in self.heredicare_annotations:
current_classification = heredicare_annotation.vustf_classification
if current_classification.selected_class is not None:
if result is None:
result = current_classification
elif current_classification.classification_date > result.classification_date:
result = current_classification
return result

# the most recent consensus class or if that does not exist the most recent heredicare consensus classification
def get_consensus_class(self):
the_class = "-"
source = "heredivar"
most_recent_consensus_classification = self.get_recent_consensus_classification()
if most_recent_consensus_classification is None:
heredicare_classification = self.get_most_recent_heredicare_consensus_classification()
if heredicare_classification is not None:
the_class = heredicare_classification.selected_class_to_num()
source = "heredicare"
else:
the_class = most_recent_consensus_classification.selected_class
source = "heredivar"

return the_class, source


# the most recent conensus classification independent of schemes
def get_recent_consensus_classification(self):
result = None
if self.consensus_classifications is not None:
Expand All @@ -729,6 +752,7 @@ def get_recent_consensus_classification(self):
result = classification
return result

# the most recent consensus classification for each scheme
def get_recent_consensus_classification_all_schemes(self, convert_to_dict = False):
result = None
if self.consensus_classifications is not None:
Expand Down
9 changes: 9 additions & 0 deletions src/frontend_celery/webapp/static/css/colors.css
Original file line number Diff line number Diff line change
Expand Up @@ -210,4 +210,13 @@ input.invalid {
.classification-gradient {
background: linear-gradient(90deg, rgba(255,255,255,0) 0%, rgba(255,255,255,0.499019676229867) 20%, rgba(255,255,255,0.633473457742472) 40%, rgba(255,255,255,1) 100%);
background-color: currentColor;
}




.white_border {
stroke: #d4d4d4;
stroke-width: 0.5px;
stroke-linejoin: round;
}
4 changes: 4 additions & 0 deletions src/frontend_celery/webapp/static/css/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -437,4 +437,8 @@ footer {
.class_label .the_c {
position: absolute;
z-index:1;
}
.classM_text {
white-space: pre;
font-size: 7px;
}
Loading

0 comments on commit b5d14f8

Please sign in to comment.