added variant check routs & heredicare consensus classifications are …

…now shown in classification symbol
GC-HBOC · Oct 18, 2023 · b5d14f8 · b5d14f8
1 parent c4ff099
commit b5d14f8
Show file tree

Hide file tree

Showing 12 changed files with 548 additions and 102 deletions.
diff --git a/src/common/db_IO.py b/src/common/db_IO.py
@@ -729,14 +729,14 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use
         if page_size != 'unlimited':
             command = command + " LIMIT %s, %s"
             actual_information += (offset, page_size)
-        print(command % actual_information)
+        #print(command % actual_information)
         self.cursor.execute(command, actual_information)
         variants_raw = self.cursor.fetchall()
 
         # get variant objects
         variants = []
         for variant_raw in variants_raw:
-            variant = self.get_variant(variant_id=variant_raw[0], include_annotations = False, include_heredicare_classifications = False, include_clinvar = False, include_assays = False, include_literature = False)
+            variant = self.get_variant(variant_id=variant_raw[0], include_annotations = False, include_heredicare_classifications = True, include_clinvar = False, include_assays = False, include_literature = False)
             variants.append(variant)
 
         if page_size == 'unlimited':
@@ -1708,7 +1708,6 @@ def get_variant(self, variant_id,
         is_hidden = True if variant_raw[5] == 1 else False
 
         annotations = None
-        all_heredicare_annotations = None
         if include_annotations:
             annotations = models.AllAnnotations()
             annotations_raw = self.get_recent_annotations(variant_id)
@@ -1729,23 +1728,6 @@ def get_variant(self, variant_id,
 
             annotations.flag_linked_annotations()
 
-            heredicare_annotations_raw = self.get_heredicare_annotations(variant_id)
-
-            all_heredicare_annotations = []
-            for annot in heredicare_annotations_raw:
-                #id, vid, n_fam, n_pat, consensus_class, comment, date
-                heredicare_annotation_id = annot[0]
-                vid = annot[1]
-                n_fam = annot[2]
-                n_pat = annot[3]
-                consensus_class = annot[4]
-                comment = annot[5]
-                classification_date = annot[6]
-
-                classification = models.HeredicareClassification(id = heredicare_annotation_id, selected_class = consensus_class, comment = comment, classification_date = classification_date, center = "VUSTF", vid = vid)
-                new_heredicare_annotation = models.HeredicareAnnotation(id = heredicare_annotation_id, vid = vid, n_fam = n_fam, n_pat = n_pat, vustf_classification = classification)
-                all_heredicare_annotations.append(new_heredicare_annotation)
-
 
         # add all consensus classifications
         consensus_classifications = None
@@ -1857,6 +1839,7 @@ def get_variant(self, variant_id,
                     user_classifications.append(new_user_classification)
 
         heredicare_classifications = None
+        all_heredicare_annotations = None
         if include_heredicare_classifications:
             heredicare_classifications_raw = self.get_heredicare_center_classifications(variant_id)
             if heredicare_classifications_raw is not None:
@@ -1869,6 +1852,23 @@ def get_variant(self, variant_id,
                     date = cl_raw[5].strftime('%Y-%m-%d')
                     new_heredicare_classification = models.HeredicareClassification(id = id, selected_class = selected_class, comment = comment, center = center, classification_date = date, vid="")
                     heredicare_classifications.append(new_heredicare_classification)
+
+
+            heredicare_annotations_raw = self.get_heredicare_annotations(variant_id)
+            all_heredicare_annotations = []
+            for annot in heredicare_annotations_raw:
+                #id, vid, n_fam, n_pat, consensus_class, comment, date
+                heredicare_annotation_id = annot[0]
+                vid = annot[1]
+                n_fam = annot[2]
+                n_pat = annot[3]
+                consensus_class = annot[4]
+                comment = annot[5]
+                classification_date = annot[6]
+
+                classification = models.HeredicareClassification(id = heredicare_annotation_id, selected_class = consensus_class, comment = comment, classification_date = classification_date, center = "VUSTF", vid = vid)
+                new_heredicare_annotation = models.HeredicareAnnotation(id = heredicare_annotation_id, vid = vid, n_fam = n_fam, n_pat = n_pat, vustf_classification = classification)
+                all_heredicare_annotations.append(new_heredicare_annotation)
 
         # add clinvar annotation
         clinvar = None
@@ -2270,7 +2270,7 @@ def clear_heredicare_annotation(self, variant_id):
         self.conn.commit()
 
     def get_enumtypes(self, tablename, columnname):
-        allowed_tablenames = ["consensus_classification", "user_classification"]
+        allowed_tablenames = ["consensus_classification", "user_classification", "variant"]
         if tablename in allowed_tablenames:
             command = "SHOW COLUMNS FROM " + tablename + " WHERE FIELD = %s"
         else:
@@ -2281,4 +2281,93 @@ def get_enumtypes(self, tablename, columnname):
         column_type = column_type.strip('enum()')
         allowed_enum = column_type.split(',')
         allowed_enum = [x.strip('\'') for x in allowed_enum]
-        return allowed_enum
+        return allowed_enum
+
+
+
+    def get_gencode_basic_transcripts(self, gene_id):
+        if gene_id is None:
+            return None
+        command = "SELECT name FROM transcript WHERE gene_id = %s AND (is_gencode_basic=1 or is_mane_select=1 or is_mane_plus_clinical=1)"
+        self.cursor.execute(command, (gene_id, ))
+        result = self.cursor.fetchall()
+        return [x[0] for x in result if x[0].startswith("ENST")]
+
+
+
+    # this function returns a list of consequence objects of the preferred transcripts 
+    # (can be multiple if there are eg. 2 mane select transcripts for this variant)
+    def get_preferred_transcripts(self, gene_id, return_all=False):
+        result = []
+        command = "SELECT name, biotype, length, is_gencode_basic, is_mane_select, is_mane_plus_clinical, is_ensembl_canonical FROM transcript WHERE gene_id = %s"
+        self.cursor.execute(command, (gene_id, ))
+        result_raw = self.cursor.fetchall()
+        transcripts = []
+        for elem in result_raw:
+            if elem[0].startswith("ENST"):
+                source = "ensembl" if elem[0].startswith("ENST") else "refseq"
+                new_elem = {"name": elem[0],
+                            "biotype": elem[1],
+                            "length": elem[2],
+                            "is_gencode_basic": elem[3],
+                            "is_mane_select": elem[4],
+                            "is_mane_plus_clinical": elem[5],
+                            "is_ensembl_canonical": elem[6],
+                            "source": source
+                        }
+                transcripts.append(new_elem)
+
+        if len(transcripts) > 0:
+            transcripts = self.order_transcripts(transcripts)
+
+            if not return_all:
+                result.append(transcripts.pop(0)) # always append the first one
+
+                for transcript in transcripts: # scan for all mane select transcripts
+                    if transcript["is_mane_select"]:
+                        result.append(transcript)
+                    else:
+                        break # we can do this because the list is sorted
+            else:
+                result = transcripts
+        else: # the variant does not have any consequences
+            return None
+        return result
+
+    def order_transcripts(self, consequences):
+        keyfunc = cmp_to_key(mycmp = self.sort_transcripts)
+        consequences.sort(key = keyfunc) # sort by preferred transcript
+        return consequences
+
+    def sort_transcripts(self, a, b):
+        # sort by ensembl/refseq
+        if a["source"] == 'ensembl' and b["source"] == 'refseq':
+            return -1
+        elif a["source"] == 'refseq' and b["source"] == 'ensembl':
+            return 1
+        elif a["source"] == b["source"]:
+
+            # sort by mane select
+            if a["is_mane_select"] is None or b["is_mane_select"] is None:
+                return 1
+            elif a["is_mane_select"] and not b["is_mane_select"]:
+                return -1
+            elif not a["is_mane_select"] and b["is_mane_select"]:
+                return 1
+            elif a["is_mane_select"] == b["is_mane_select"]:
+
+                # sort by biotype
+                if a["biotype"] == 'protein coding' and b["biotype"] != 'protein coding':
+                    return -1
+                elif a["biotype"] != 'protein coding' and b["biotype"] == 'protein coding':
+                    return 1
+                elif (a["biotype"] != 'protein coding' and b["biotype"] != 'protein coding') or (a["biotype"] == 'protein coding' and b["biotype"] == 'protein coding'):
+
+                    # sort by length
+                    if a["length"] > b["length"]:
+                        return -1
+                    elif a["length"] < b["length"]:
+                        return 1
+                    else:
+                        return 0
+
diff --git a/src/common/functions.py b/src/common/functions.py
@@ -276,15 +276,55 @@ def left_align_vcf(infile, outfile, ref_genome = 'GRCh38'):
 
     return returncode, err_msg, command_output
 
-
-def hgvsc_to_vcf(hgvs, reference = None):
+## DEPRECATED
+#def hgvsc_to_vcf(hgvs, reference = None):
+#    #tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf"
+#    tmp_file_path = get_random_temp_file("_hgvs2vcf")
+#    tmp_file = open(tmp_file_path + ".tsv", "w")
+#    tmp_file.write("#reference	hgvs_c\n")
+#    if reference is None:
+#        reference, hgvs = split_hgvs(hgvs)
+#    tmp_file.write(reference + "\t" + hgvs + "\n")
+#    tmp_file.close()
+#
+#    command = [os.path.join(paths.ngs_bits_path, "HgvsToVcf")]
+#    command.extend(['-in', tmp_file_path + '.tsv', '-ref', paths.ref_genome_path, '-out', tmp_file_path + '.vcf'])
+#    returncode, err_msg, command_output = execute_command(command, "HgvsToVcf", use_prefix_error_log=False)
+#    
+#    chr = None
+#    pos = None
+#    ref = None
+#    alt = None
+#    tmp_file = open(tmp_file_path + '.vcf', "r")
+#    for line in tmp_file: # this assumes a single-entry vcf
+#        if line.strip() == '' or line.startswith('#'):
+#            continue
+#        parts = line.split('\t')
+#        chr = parts[0]
+#        pos = parts[1]
+#        ref = parts[3]
+#        alt = parts[4]
+#    
+#
+#    rm(tmp_file_path + ".tsv")
+#    rm(tmp_file_path + ".vcf")
+#    return chr, pos, ref, alt, err_msg
+
+
+def hgvsc_to_vcf(hgvs_strings, references = None):
     #tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf"
     tmp_file_path = get_random_temp_file("_hgvs2vcf")
     tmp_file = open(tmp_file_path + ".tsv", "w")
     tmp_file.write("#reference	hgvs_c\n")
-    if reference is None:
-        reference, hgvs = split_hgvs(hgvs)
-    tmp_file.write(reference + "\t" + hgvs + "\n")
+    if references is None:
+        references = []
+        for hgvs in hgvs_strings:
+            reference, hgvs = split_hgvs(hgvs)
+            references.append(reference)
+            tmp_file.write(reference + "\t" + hgvs + "\n")
+    else:
+        for hgvs, reference in zip(hgvs_strings, references):
+            tmp_file.write(reference + "\t" + hgvs + "\n")
     tmp_file.close()
 
     command = [os.path.join(paths.ngs_bits_path, "HgvsToVcf")]
@@ -295,21 +335,31 @@ def hgvsc_to_vcf(hgvs, reference = None):
     pos = None
     ref = None
     alt = None
+    first_iter = True
     tmp_file = open(tmp_file_path + '.vcf', "r")
     for line in tmp_file: # this assumes a single-entry vcf
         if line.strip() == '' or line.startswith('#'):
             continue
         parts = line.split('\t')
-        chr = parts[0]
-        pos = parts[1]
-        ref = parts[3]
-        alt = parts[4]
+
+        #print(parts)
+
+        if first_iter:
+            chr = parts[0]
+            pos = parts[1]
+            ref = parts[3]
+            alt = parts[4]
+            first_iter = False
+        else:
+            if chr != parts[0] or pos != parts[1] or ref != parts[3] or alt != parts[4]: # check that all are equal
+                return None, None, None, None, "HGVSc recovered vcf-style variant among transcripts is unequal: " + str(references)
 
 
     rm(tmp_file_path + ".tsv")
     rm(tmp_file_path + ".vcf")
     return chr, pos, ref, alt, err_msg
 
+
 # function for splitting hgvs in refrence transcript and variant
 def split_hgvs(hgvs):
     hgvs = hgvs.strip()

diff --git a/src/common/models.py b/src/common/models.py
@@ -627,15 +627,6 @@ def get_heredicare_consensus_classifications(self):
             if heredicare_annotation.vustf_classification.selected_class is not None:
                 result.append(heredicare_annotation.vustf_classification)
         return result
-
-    def get_heredicare_consensus_classification_severeity(self):
-        result = []
-        for heredicare_annotation in self.heredicare_annotations:
-            current_classification = heredicare_annotation.vustf_classification
-            if current_classification.selected_class is not None:
-                class_num = current_classification.selected_class_to_num()
-                result.append(class_num)
-        return list(set(result))
 
 
     def get_total_heredicare_counts(self):
@@ -718,6 +709,38 @@ def get_user_classifications(self, user_id):
     def to_json(self):
         return json.dumps(asdict(self))
 
+
+    def get_most_recent_heredicare_consensus_classification(self):
+        result = None
+        if self.heredicare_annotations is None:
+            return None
+        for heredicare_annotation in self.heredicare_annotations:
+            current_classification = heredicare_annotation.vustf_classification
+            if current_classification.selected_class is not None:
+                if result is None:
+                    result = current_classification
+                elif current_classification.classification_date > result.classification_date:
+                    result = current_classification
+        return result
+
+    # the most recent consensus class or if that does not exist the most recent heredicare consensus classification
+    def get_consensus_class(self):
+        the_class = "-"
+        source = "heredivar"
+        most_recent_consensus_classification  = self.get_recent_consensus_classification()
+        if most_recent_consensus_classification is None:
+            heredicare_classification = self.get_most_recent_heredicare_consensus_classification()
+            if heredicare_classification is not None:
+                the_class = heredicare_classification.selected_class_to_num()
+                source = "heredicare"
+        else:
+            the_class = most_recent_consensus_classification.selected_class
+            source = "heredivar"
+
+        return the_class, source
+
+
+    # the most recent conensus classification independent of schemes
     def get_recent_consensus_classification(self):
         result = None
         if self.consensus_classifications is not None:
@@ -729,6 +752,7 @@ def get_recent_consensus_classification(self):
                         result = classification
         return result
 
+    # the most recent consensus classification for each scheme
     def get_recent_consensus_classification_all_schemes(self, convert_to_dict = False):
         result = None
         if self.consensus_classifications is not None:

diff --git a/src/frontend_celery/webapp/static/css/colors.css b/src/frontend_celery/webapp/static/css/colors.css
@@ -210,4 +210,13 @@ input.invalid {
 .classification-gradient {
     background: linear-gradient(90deg, rgba(255,255,255,0) 0%, rgba(255,255,255,0.499019676229867) 20%, rgba(255,255,255,0.633473457742472) 40%, rgba(255,255,255,1) 100%);
     background-color: currentColor;
+}
+
+
+
+
+.white_border {
+    stroke: #d4d4d4;
+    stroke-width: 0.5px;
+    stroke-linejoin: round;
 }
diff --git a/src/frontend_celery/webapp/static/css/styles.css b/src/frontend_celery/webapp/static/css/styles.css
@@ -437,4 +437,8 @@ footer {
 .class_label .the_c {
     position: absolute; 
     z-index:1;
+}
+.classM_text {
+    white-space: pre;
+    font-size: 7px;
 }