Skip to content

Commit

Permalink
changelog update & minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Jun 17, 2024
1 parent 85471c9 commit 739fae4
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 113 deletions.
9 changes: 7 additions & 2 deletions src/annotation_service/annotation_jobs/heredicare_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,17 @@ def save_to_db(self, info, variant_id, conn):
err_msg += str(message)
conn.delete_external_id(vid, heredicare_vid_annotation_type_id, variant_id)
conn.delete_unknown_heredicare_annotations()
else:
# we need to check the length of the heredicare variant
# if it is 0 the variant is unknown to heredicare
# this might happen when a variant was just submitted to heredicare and it is not processed yet. Then, the vid is already known by heredivar but heredicare doesnt have information about this variant
# In this case we simply skip the heredicare annotation
# It is however good have the vid in heredivar right after insert because then we do not need a reimport after every heredicare insert
elif len(heredicare_variant) > 0:
#print(heredicare_variant)
n_fam = heredicare_variant["N_FAM"]
n_pat = heredicare_variant["N_PAT"]
consensus_class = heredicare_variant["PATH_TF"] if heredicare_variant["PATH_TF"] != "-1" else None
comment = heredicare_variant.get("VUSTF_21", heredicare_variant["VUSTF_15"])
comment = heredicare_variant.get("VUSTF_21", heredicare_variant["VUSTF_15"]) # use vustf21, but if it is missing fallback to vustf15 - fallback can be removed later once the production heredicare api has the vustf21 field
comment = comment.strip() if comment is not None else None
classification_date = heredicare_variant["VUSTF_DATUM"] if heredicare_variant["VUSTF_DATUM"] != '' else None
lr_cooc = heredicare_variant["LR_COOC"]
Expand Down
5 changes: 4 additions & 1 deletion src/common/db_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -3512,7 +3512,7 @@ def update_publish_heredicare_queue_celery_task_id(self, publish_heredicare_queu
self.cursor.execute(command, (celery_task_id, publish_heredicare_queue_id))
self.conn.commit()

def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, status, message, finished_at = None, submission_id = None, consensus_classification_id = None):
def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, status, message, finished_at = None, submission_id = None, consensus_classification_id = None, vid = None):
command = "UPDATE publish_heredicare_queue SET status = %s, message = %s"
actual_information = (status, message)
if finished_at is not None:
Expand All @@ -3524,6 +3524,9 @@ def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, st
if consensus_classification_id is not None:
command += ", consensus_classification_id = %s"
actual_information += (consensus_classification_id, )
if vid is not None:
command += ", vid = %s"
actual_information += (vid, )
command += " WHERE id = %s"
actual_information += (publish_heredicare_queue_id, )
self.cursor.execute(command, actual_information)
Expand Down
91 changes: 30 additions & 61 deletions src/common/heredicare_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,28 +136,12 @@ def introspect_token(self, project_type):
def get_vid_list(self):
status = "success"
message = ""
vids = []
project_type = "download"

status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary
if status == 'error':
return vids, status, message

url = self.get_url(project_type, "vid_list")
bearer, timestamp = self.get_saved_bearer(project_type)
header = {"Authorization": "Bearer " + bearer}

resp = requests.get(url, headers=header)
if resp.status_code == 401: # unauthorized
message = "ERROR: HerediCare API get vid list endpoint returned an HTTP 401, unauthorized error. Attempting retry."
status = "retry"
elif resp.status_code != 200: # any other kind of error
message = "ERROR: HerediCare API get vid list endpoint returned an HTTP " + str(resp.status_code) + " error: " + + self.extract_error_message(resp.text)
status = "error"
else: # request was successful
vids = resp.json()['items']
status, message, all_vids = self.iterate_pagination(url, project_type)

return vids, status, message
return all_vids, status, message

def filter_vid_list(self, vids, min_date):
all_vids = []
Expand Down Expand Up @@ -189,32 +173,7 @@ def get_variant(self, vid):
project_type = "download"

url = self.get_url(project_type, "variant", path_args = [str(vid)])
has_next = True
all_items = []
while has_next:
status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary
if status == 'error':
return [], status, message
bearer, timestamp = self.get_saved_bearer(project_type)
header = {"Authorization": "Bearer " + bearer}
resp = requests.get(url, headers=header)
if resp.status_code == 401: # unauthorized
message = "ERROR: HerediCare API get variant info endpoint returned an HTTP 401, unauthorized error. Attempting retry."
status = "retry"
break
elif resp.status_code != 200: # any other kind of error
message = "ERROR: HerediCare API get variant info endpoint returned an HTTP " + str(resp.status_code) + " error: " + + self.extract_error_message(resp.text)
status = "error"
break
else: # request was successful
resp = resp.json()
new_items = resp["items"]
all_items.extend(new_items)
has_next = resp["hasMore"]
for link in resp["links"]:
if link["rel"] == "next":
url = link["href"]

status, message, all_items = self.iterate_pagination(url, project_type)
variant = self.convert_heredicare_variant_raw(all_items)

return variant, status, message
Expand All @@ -228,20 +187,16 @@ def convert_heredicare_variant_raw(self, variant_items):
variant[item_name] = item_value
return variant


def get_post_regexes(self):
def iterate_pagination(self, start_url, project_type, items_key = "items"):
status = "success"
message = ""
all_items = []
project_type = "upload"

url = self.get_url(project_type, "post_info") # first url
result = []
url = start_url
has_next = True

while has_next:
status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary
if status == 'error':
return [], status, message
break
bearer, timestamp = self.get_saved_bearer(project_type)
header = {"Authorization": "Bearer " + bearer}
resp = requests.get(url, headers=header)
Expand All @@ -255,12 +210,30 @@ def get_post_regexes(self):
break
else: # request was successful
resp = resp.json()
new_items = resp["items"]
all_items.extend(new_items)
new_items = resp[items_key]
result.extend(new_items)
has_next = resp["hasMore"]
for link in resp["links"]:
if link["rel"] == "next":
url = link["href"]
if has_next:
found_next = False
for link in resp["links"]:
if link["rel"] == "next":
url = link["href"]
found_next = True
if not found_next:
message = "ERROR: response 'hasMore' attribute is true but no 'next' url found."
status = "error"
has_next = False # prevent infinite loop in case the next url is missing for some reason
break
return status, message, result

def get_post_regexes(self):
status = "success"
message = ""
all_items = []
project_type = "upload"

url = self.get_url(project_type, "post_info") # first url
status, message, all_items = self.iterate_pagination(url, project_type)

#with open("/mnt/storage2/users/ahdoebm1/HerediVar/src/common/heredicare_interface_debug/post_fields.json", "w") as f:
# functions.prettyprint_json(all_items, func = f.write)
Expand All @@ -269,10 +242,6 @@ def get_post_regexes(self):
for item in all_items:
item_name = item["item_name"]
regex = item["item_regex_format"]
#if item_name == "PATH_TF":
# regex = r"^(-1|[1-4]|1[1-5]|2[01]|32|34)$"
#elif item_name == "VUSTF_DATUM":
# regex = r"^(0[1-9]|[12]\d|3[01])\.(0[1-9]|1[012])\.20\d\d$"
result[item_name] = regex

return result, status, message
Expand Down
7 changes: 4 additions & 3 deletions src/frontend_celery/webapp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def start_variant_import(user_id, user_roles, conn: Connection): # starts the ce
@celery.task(bind=True, retry_backoff=5, max_retries=3, time_limit=600)
def heredicare_variant_import(self, user_id, user_roles, min_date, import_queue_id):
"""Background task for fetching variants from HerediCare"""
#from frontend_celery.webapp.utils.variant_importer import import_variants
self.update_state(state='PROGRESS')

try:
Expand Down Expand Up @@ -96,6 +95,7 @@ def import_variants(conn: Connection, user_id, user_roles, min_date, import_queu

heredicare_interface = Heredicare()
vids_heredicare, status, message = heredicare_interface.get_vid_list()
print(len(vids_heredicare))
if status == "success":
vids_heredicare, all_vids_heredicare, status, message = heredicare_interface.filter_vid_list(vids_heredicare, min_date)
#all_vids_heredicare, status, message = heredicare_interface.get_vid_list()
Expand All @@ -109,6 +109,7 @@ def import_variants(conn: Connection, user_id, user_roles, min_date, import_queu

print("Total HerediCare: " + str(len(all_vids_heredicare)))
print("Filtered HerediCare: " + str(len(vids_heredicare)))
print("Total vids HerediVar: " + str(len(vids_heredivar)))

print("Intersection of filtered heredicare and heredivar vids: " + str(len(intersection)))
print("Deleted vids (unknown to heredicare but known to heredivar): " + str(len(heredivar_exclusive_vids)))
Expand Down Expand Up @@ -922,8 +923,8 @@ def abort_annotation_task(annotation_queue_id, celery_task_id, conn:Connection):
if annotation_queue_id is not None:
celery.control.revoke(celery_task_id, terminate = True)

#row_id, status, error_msg
conn.update_annotation_queue(annotation_queue_id, "aborted", "")
#row_id, status, error_msg
conn.update_annotation_queue(annotation_queue_id, "aborted", "")


def purge_celery():
Expand Down
42 changes: 1 addition & 41 deletions src/frontend_celery/webapp/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ <h4>Overview</h4>
<h4>Changelog</h4>


<div class="underline">v 1.11.1 ()</div>
<div class="underline">v 1.12 (17.06.2024)</div>
<div class="btn-pvs">!! Scores from likelihood ratio tests are currently not available from HerediCaRe.</div>
<div class="btn-pvs">!! HerediCaRe upload is currently disabled.</div>
<div>
Expand Down Expand Up @@ -124,46 +124,6 @@ <h4>Changelog</h4>
</div>


<div class="underline">v 1.11 (31.05.2024)</div>
<div class="btn-pvs">!! Scores from likelihood ratio tests are currently not available from HerediCaRe.</div>
<div class="btn-pvs">!! HerediCaRe upload is currently disabled.</div>
<div>
General changes:
<ul>
<li>Update downloads page. Added full data download and database versioning. A new HerediVar full VCF version is calculated every month.</li>
<li>Added download archive for previous full VCF downloads</li>
<li>
Added HerediCaRe upload and improved CliVar upload:
<ul>
<li>Single variants from variant details page</li>
<li>Multiple variants from user variant lists page</li>
<li>All variants which recieved an update since the last upload from the admin dashboard</li>
</ul>
</li>
<li>Updated gene specific PTEN ACMG scheme to v3.1.0</li>
<li>The consequences in downloaded vcf files and the variant details page does not contain the pfam protein domain anymore. Instead it was moved to a transcript specific variant annotation field (pfam_domains= in vcf downloads).</li>
<li>Moved consequence calculation from VEP to VcfAnnotateConsequence tool from the ngs-bits library <a href="https://github.com/imgag/ngs-bits/tree/master">GitHub</a>. VcfAnnotateConsequence improves the quality of consequences.</li>
<li>Improved the comment that is uploaded to external databases. It now also includes unselected criteria and selected literature.</li>
<li>4M classifications are now uploaded to ClinVar without scheme. Reason: 4M classification is not specified in ACMG spec.</li>
<li>Enabled consensus classifications without scheme and their upload to ClinVar.</li>
<li>Implemented variant upload to HerediCaRe.</li>
<li>Increased security by hardening Keycloak Database and removing popovers with enabled HTML content</li>
<li>Added class R (established risk allele) for selection in classifications without scheme</li>
</ul>
Bugfixes:
<ul>
<li>The variant type search would only show small variants even though both, small and structural variants are selected after querying the second search within a user defined list. This issue was resolved.</li>
<li>Previously, the variant search would result in an error when searching for genomic ranges and ticking at least one variant type.</li>
<li>Improved genomic range search and its documentation</li>
<li>The variant list search was not working properly. This issue was resolved.</li>
<li>When a variant did not have an annotation, fetching the annotation status would result in an error.</li>
<li>Fixed a bug where the assay quantification was inserted to HerediClassify as a percentage instead of a decimal number.</li>
<li>Fixed a bug that caused a variant to not be found when the 'chr' prefix was omitted in the variant string search</li>
<li>Fixed a bug that caused variants with multiple genes to not automatically select the best classification scheme</li>
<li>Criterium PM5 was previously only moderate strength although the specifications suggest only supporting strength for this criterium in ATM gene specific ACMG scheme. This issue was resolved.</li>
</ul>
</div>


<a class="btn btn-outline-primary" href="{{url_for('main.changelog')}}">previous changelog</a>
</div>
Expand Down
42 changes: 42 additions & 0 deletions src/frontend_celery/webapp/templates/main/changelog.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,48 @@
<h1 class="bst"> {% block title %} Changelog {% endblock %} </h1>



<div class="underline">v 1.11 (31.05.2024)</div>
<div class="btn-pvs">!! Scores from likelihood ratio tests are currently not available from HerediCaRe.</div>
<div class="btn-pvs">!! HerediCaRe upload is currently disabled.</div>
<div>
General changes:
<ul>
<li>Update downloads page. Added full data download and database versioning. A new HerediVar full VCF version is calculated every month.</li>
<li>Added download archive for previous full VCF downloads</li>
<li>
Added HerediCaRe upload and improved CliVar upload:
<ul>
<li>Single variants from variant details page</li>
<li>Multiple variants from user variant lists page</li>
<li>All variants which recieved an update since the last upload from the admin dashboard</li>
</ul>
</li>
<li>Updated gene specific PTEN ACMG scheme to v3.1.0</li>
<li>The consequences in downloaded vcf files and the variant details page does not contain the pfam protein domain anymore. Instead it was moved to a transcript specific variant annotation field (pfam_domains= in vcf downloads).</li>
<li>Moved consequence calculation from VEP to VcfAnnotateConsequence tool from the ngs-bits library <a href="https://github.com/imgag/ngs-bits/tree/master">GitHub</a>. VcfAnnotateConsequence improves the quality of consequences.</li>
<li>Improved the comment that is uploaded to external databases. It now also includes unselected criteria and selected literature.</li>
<li>4M classifications are now uploaded to ClinVar without scheme. Reason: 4M classification is not specified in ACMG spec.</li>
<li>Enabled consensus classifications without scheme and their upload to ClinVar.</li>
<li>Implemented variant upload to HerediCaRe.</li>
<li>Increased security by hardening Keycloak Database and removing popovers with enabled HTML content</li>
<li>Added class R (established risk allele) for selection in classifications without scheme</li>
</ul>
Bugfixes:
<ul>
<li>The variant type search would only show small variants even though both, small and structural variants are selected after querying the second search within a user defined list. This issue was resolved.</li>
<li>Previously, the variant search would result in an error when searching for genomic ranges and ticking at least one variant type.</li>
<li>Improved genomic range search and its documentation</li>
<li>The variant list search was not working properly. This issue was resolved.</li>
<li>When a variant did not have an annotation, fetching the annotation status would result in an error.</li>
<li>Fixed a bug where the assay quantification was inserted to HerediClassify as a percentage instead of a decimal number.</li>
<li>Fixed a bug that caused a variant to not be found when the 'chr' prefix was omitted in the variant string search</li>
<li>Fixed a bug that caused variants with multiple genes to not automatically select the best classification scheme</li>
<li>Criterium PM5 was previously only moderate strength although the specifications suggest only supporting strength for this criterium in ATM gene specific ACMG scheme. This issue was resolved.</li>
</ul>
</div>


<div class="underline">v 1.10.2 (15.04.2024)</div>
<div>
Bugfixes:
Expand Down
2 changes: 1 addition & 1 deletion src/frontend_celery/webapp/templates/upload/publish.html
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ <h4>ClinVar</h4>
<div class="bst">
<h4>HerediCaRe</h4>
<div class="form-check bsl">
<input class="form-check-input" type="checkbox" value="on" name="publish_heredicare" id="publish_heredicare_check" data-bs-toggle="collapse" href="#publish_heredicare_collapse" role="button" aria-expanded="false" aria-controls="publish_heredicare_collapse" autocomplete="off">
<input class="form-check-input" type="checkbox" value="on" name="publish_heredicare" id="publish_heredicare_check" data-bs-toggle="collapse" href="#publish_heredicare_collapse" role="button" aria-expanded="false" aria-controls="publish_heredicare_collapse" autocomplete="off" disabled>
<label class="form-check-label" for="publish_heredicare_check">Publish to HerediCaRe</label>
</div>

Expand Down
Loading

0 comments on commit 739fae4

Please sign in to comment.