Skip to content

Commit

Permalink
Merge pull request #19 from TranslatorSRI/gscore
Browse files Browse the repository at this point in the history
Updating g-score to use product method
  • Loading branch information
maximusunc authored Feb 16, 2024
2 parents bb04828 + 08ca8ff commit a5017cb
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 35 deletions.
1 change: 1 addition & 0 deletions app/clinical_evidence/compute_clinical_evidence.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Clinical Evidence Scoring."""

import json
import logging
import numpy as np
Expand Down
19 changes: 10 additions & 9 deletions app/clinical_evidence/merge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Script for combining multiple KGX node and edge files."""

import glob
import json
import jsonlines
Expand Down Expand Up @@ -63,9 +64,9 @@
}
if "association" in edge:
# from ehr
save_edge[
"predicate"
] = f"biolink:{edge['association']['predicate']}"
save_edge["predicate"] = (
f"biolink:{edge['association']['predicate']}"
)
for attribute in edge["association"]["edge_attributes"]:
if (
attribute["attribute_type_id"]
Expand All @@ -91,16 +92,16 @@
== "biolink:log_odds_ratio_95_ci"
and type(sub_attribute["value"]) != str
):
save_edge[
"log_odds_ratio_95_ci"
] = sub_attribute["value"]
save_edge["log_odds_ratio_95_ci"] = (
sub_attribute["value"]
)
if (
sub_attribute["attribute_type_id"]
== "biolink:total_sample_size"
):
save_edge[
"total_sample_size"
] = sub_attribute["value"]
save_edge["total_sample_size"] = (
sub_attribute["value"]
)

elif "biolink:supporting_data_source" in edge:
# from icees
Expand Down
1 change: 1 addition & 0 deletions app/clinical_evidence/normalize.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Simple script for normalizing kgx node and edge files."""

import httpx
import jsonlines

Expand Down
24 changes: 15 additions & 9 deletions app/novelty/compute_novelty.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def novelty_score(fda_status, recency, similarity):
score = score * 0.85
else:
score = 0

return score


Expand Down Expand Up @@ -420,9 +420,11 @@ async def compute_novelty(message, logger):
try:
similarity_map = await molecular_sim(known, unknown, message)
df["similarity"] = df.apply(
lambda row: similarity_map[row["drug"]][0][1]
if row["drug"] in similarity_map.keys()
else np.nan,
lambda row: (
similarity_map[row["drug"]][0][1]
if row["drug"] in similarity_map.keys()
else np.nan
),
axis=1,
)
except Exception as e:
Expand All @@ -433,11 +435,15 @@ async def compute_novelty(message, logger):
# Step 3:
# calculating the recency
df["recency"] = df.apply(
lambda row: recency_function_exp(
row["number_of_publ"], row["age_oldest_pub"], 100, 50
)
if not (np.isnan(row["number_of_publ"]) or np.isnan(row["age_oldest_pub"]))
else np.nan,
lambda row: (
recency_function_exp(
row["number_of_publ"], row["age_oldest_pub"], 100, 50
)
if not (
np.isnan(row["number_of_publ"]) or np.isnan(row["age_oldest_pub"])
)
else np.nan
),
axis=1,
)
#
Expand Down
25 changes: 8 additions & 17 deletions app/ordering_components.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Compute scores for each result in the given message."""

import redis
from tqdm import tqdm
import traceback
Expand All @@ -18,26 +19,16 @@

def get_confidence(result, message, logger):
"""
This function iterates through the results from multiple ARAs,
If only a single score is non-zero the result is thresholded to be in [0,1-eps]
If a result has non-zero scores from multiple ARAs,
then all the scores are added together and thresholded to be in [0,1]
eps is set to 0.001
This function iterates through the answers from multiple ARAs,
It multiplies values of (1- score(ara[i])) for each ara
Finally this product value is subtracted from 1
"""
score_sum = 0.0
non_zero_count = 0
eps = 0.001
score_product = 1
for analysis in result.get("analyses") or []:
if analysis.get("score") is not None:
score_sum += analysis["score"]
if analysis["score"] > 0:
non_zero_count += 1
if non_zero_count == 1 and score_sum > 1 - eps:
score_sum = 1 - eps
elif non_zero_count > 1 and score_sum > 1:
score_sum = 1
return score_sum
score_product = score_product * (1 - analysis["score"])
confidence_score = 1 - score_product
return confidence_score


def get_clinical_evidence(result, message, logger, db_conn):
Expand Down
1 change: 1 addition & 0 deletions tests/clinical_response.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Mock Redis."""

import fakeredis
import json

Expand Down
1 change: 1 addition & 0 deletions tests/test_clinical_evidence.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Test Clinical Evidence function."""

import logging

from app.clinical_evidence.compute_clinical_evidence import compute_clinical_evidence
Expand Down

0 comments on commit a5017cb

Please sign in to comment.