Merge pull request #19 from TranslatorSRI/gscore

Updating g-score to use product method
TranslatorSRI · Feb 16, 2024 · a5017cb · a5017cb
2 parents bb04828 + 08ca8ff
commit a5017cb
Show file tree

Hide file tree

Showing 7 changed files with 37 additions and 35 deletions.
diff --git a/app/clinical_evidence/compute_clinical_evidence.py b/app/clinical_evidence/compute_clinical_evidence.py
@@ -1,4 +1,5 @@
 """Clinical Evidence Scoring."""
+
 import json
 import logging
 import numpy as np

diff --git a/app/clinical_evidence/merge.py b/app/clinical_evidence/merge.py
@@ -1,4 +1,5 @@
 """Script for combining multiple KGX node and edge files."""
+
 import glob
 import json
 import jsonlines
@@ -63,9 +64,9 @@
                         }
                         if "association" in edge:
                             # from ehr
-                            save_edge[
-                                "predicate"
-                            ] = f"biolink:{edge['association']['predicate']}"
+                            save_edge["predicate"] = (
+                                f"biolink:{edge['association']['predicate']}"
+                            )
                             for attribute in edge["association"]["edge_attributes"]:
                                 if (
                                     attribute["attribute_type_id"]
@@ -91,16 +92,16 @@
                                             == "biolink:log_odds_ratio_95_ci"
                                             and type(sub_attribute["value"]) != str
                                         ):
-                                            save_edge[
-                                                "log_odds_ratio_95_ci"
-                                            ] = sub_attribute["value"]
+                                            save_edge["log_odds_ratio_95_ci"] = (
+                                                sub_attribute["value"]
+                                            )
                                         if (
                                             sub_attribute["attribute_type_id"]
                                             == "biolink:total_sample_size"
                                         ):
-                                            save_edge[
-                                                "total_sample_size"
-                                            ] = sub_attribute["value"]
+                                            save_edge["total_sample_size"] = (
+                                                sub_attribute["value"]
+                                            )
 
                         elif "biolink:supporting_data_source" in edge:
                             # from icees

diff --git a/app/clinical_evidence/normalize.py b/app/clinical_evidence/normalize.py
@@ -1,4 +1,5 @@
 """Simple script for normalizing kgx node and edge files."""
+
 import httpx
 import jsonlines
 

diff --git a/app/novelty/compute_novelty.py b/app/novelty/compute_novelty.py
@@ -374,7 +374,7 @@ def novelty_score(fda_status, recency, similarity):
                 score = score * 0.85
     else:
         score = 0
-     
+
     return score
 
 
@@ -420,9 +420,11 @@ async def compute_novelty(message, logger):
         try:
             similarity_map = await molecular_sim(known, unknown, message)
             df["similarity"] = df.apply(
-                lambda row: similarity_map[row["drug"]][0][1]
-                if row["drug"] in similarity_map.keys()
-                else np.nan,
+                lambda row: (
+                    similarity_map[row["drug"]][0][1]
+                    if row["drug"] in similarity_map.keys()
+                    else np.nan
+                ),
                 axis=1,
             )
         except Exception as e:
@@ -433,11 +435,15 @@ async def compute_novelty(message, logger):
         # Step 3:
         # calculating the recency
         df["recency"] = df.apply(
-            lambda row: recency_function_exp(
-                row["number_of_publ"], row["age_oldest_pub"], 100, 50
-            )
-            if not (np.isnan(row["number_of_publ"]) or np.isnan(row["age_oldest_pub"]))
-            else np.nan,
+            lambda row: (
+                recency_function_exp(
+                    row["number_of_publ"], row["age_oldest_pub"], 100, 50
+                )
+                if not (
+                    np.isnan(row["number_of_publ"]) or np.isnan(row["age_oldest_pub"])
+                )
+                else np.nan
+            ),
             axis=1,
         )
         #

diff --git a/app/ordering_components.py b/app/ordering_components.py
@@ -1,4 +1,5 @@
 """Compute scores for each result in the given message."""
+
 import redis
 from tqdm import tqdm
 import traceback
@@ -18,26 +19,16 @@
 
 def get_confidence(result, message, logger):
     """
-    This function iterates through the results from multiple ARAs,
-    If only a single score is non-zero the result is thresholded to be in [0,1-eps]
-    If a result has non-zero scores from multiple ARAs,
-    then all the scores are added together and thresholded to be in [0,1]
-
-    eps is set to 0.001
+    This function iterates through the answers from multiple ARAs,
+    It multiplies values of (1- score(ara[i])) for each ara
+    Finally this product value is subtracted from 1
     """
-    score_sum = 0.0
-    non_zero_count = 0
-    eps = 0.001
+    score_product = 1
     for analysis in result.get("analyses") or []:
         if analysis.get("score") is not None:
-            score_sum += analysis["score"]
-            if analysis["score"] > 0:
-                non_zero_count += 1
-    if non_zero_count == 1 and score_sum > 1 - eps:
-        score_sum = 1 - eps
-    elif non_zero_count > 1 and score_sum > 1:
-        score_sum = 1
-    return score_sum
+            score_product = score_product * (1 - analysis["score"])
+    confidence_score = 1 - score_product
+    return confidence_score
 
 
 def get_clinical_evidence(result, message, logger, db_conn):

diff --git a/tests/clinical_response.py b/tests/clinical_response.py
@@ -1,4 +1,5 @@
 """Mock Redis."""
+
 import fakeredis
 import json
 

diff --git a/tests/test_clinical_evidence.py b/tests/test_clinical_evidence.py
@@ -1,4 +1,5 @@
 """Test Clinical Evidence function."""
+
 import logging
 
 from app.clinical_evidence.compute_clinical_evidence import compute_clinical_evidence