reworked t8df_to_task_map,

* reworked t8df_to_task_map * added more tests * switched ove rto key provider as in the latest prepare branch from MELLODDY Tuner
melloddy · Feb 22, 2023 · 760616b · 760616b
1 parent 9926350
commit 760616b
Show file tree

Hide file tree

Showing 11 changed files with 316 additions and 144 deletions.
diff --git a/melloddy_predictor/predictor_single.py b/melloddy_predictor/predictor_single.py
diff --git a/tests/begin_to_end_test/sc_output/cls_model-class.npy b/tests/begin_to_end_test/sc_output/cls_model-class.npy
diff --git a/tests/begin_to_end_test/sc_output/clsaux_model-class.npy b/tests/begin_to_end_test/sc_output/clsaux_model-class.npy
diff --git a/tests/begin_to_end_test/sc_output/hyb_model-class.npy b/tests/begin_to_end_test/sc_output/hyb_model-class.npy
diff --git a/tests/begin_to_end_test/sc_output/hyb_model-regr.npy b/tests/begin_to_end_test/sc_output/hyb_model-regr.npy
diff --git a/tests/begin_to_end_test/sc_output/reg_model-regr.npy b/tests/begin_to_end_test/sc_output/reg_model-regr.npy
diff --git a/tests/begin_to_end_test/sc_output/trunk_cls.npy b/tests/begin_to_end_test/sc_output/trunk_cls.npy
diff --git a/tests/begin_to_end_test/sc_output/trunk_clsaux.npy b/tests/begin_to_end_test/sc_output/trunk_clsaux.npy
diff --git a/tests/begin_to_end_test/sc_output/trunk_hyb.npy b/tests/begin_to_end_test/sc_output/trunk_hyb.npy
diff --git a/tests/begin_to_end_test/sc_output/trunk_reg.npy b/tests/begin_to_end_test/sc_output/trunk_reg.npy
diff --git a/tests/test_single_predictor.py b/tests/test_single_predictor.py
@@ -7,12 +7,12 @@
 import pytest
 
 
-from melloddy_tuner.utils.single_row_prep2pred import SingleRowPreparator
-from  melloddy_predictor.predictor_single import PredictorSingle
+from melloddy_tuner.utils.single_row_prep2pred import SingleRowPreparator, KeyProviderFromJsonFile
+from  melloddy_predictor.predictor_single import PredictorSingle, ScModelType, t8df_to_task_map
 
 
 
-from pandas._testing import assert_frame_equal
+from pandas._testing import assert_frame_equal, assert_series_equal
 from scipy.sparse import save_npz, load_npz
 
 TEST_FILE_DIR = os.path.dirname(__file__)
@@ -33,8 +33,12 @@ def ref_row_mapping_table():
     return pd.read_csv(os.path.join(TEST_FILE_DIR,"begin_to_end_test/mt_output/mapping_table.csv"))
 
 @pytest.fixture
-def srprep():
-    return SingleRowPreparator(secret = ENCRYPTION_KEY, params = PREPARATION_PARAMETER)
+def kprovider():
+    return KeyProviderFromJsonFile(ENCRYPTION_KEY)
+
+@pytest.fixture
+def srprep(kprovider):
+    return SingleRowPreparator(key_provider = kprovider, params = PREPARATION_PARAMETER)
 
 @pytest.fixture
 def ref_output_ydata():
@@ -45,20 +49,37 @@ def ref_output_ydata():
                     "hyb": np.load(os.path.join(TEST_FILE_DIR,"begin_to_end_test/sc_output/hyb_model-regr.npy"))}
     return {"class" : y_refs_class, "regr" : y_refs_regr}
 
+@pytest.fixture
+def ref_output_trunk():
+    return {mtype : np.load(os.path.join(TEST_FILE_DIR,"begin_to_end_test/sc_output/trunk_{}.npy".format(mtype))) for mtype in ["cls","clsaux","reg","hyb"]}
+
+
 @pytest.fixture
 def class_task_map():
-    return {'class_570':570,'class_581':581,'class_2276':2276}
+    return {"class_570":570,"class_581":581,"class_2276":2276}
 
 @pytest.fixture
 def regr_task_map():
     return {"regr_633":633,"regr_740":740,"regr_2":2}
 
+@pytest.fixture
+def ref_name_arrays(class_task_map,regr_task_map):
+    return {"cls" : pd.Series(class_task_map).index.values,\
+           "clsaux" : pd.Series(class_task_map).index.values,\
+           "reg" : pd.Series(regr_task_map).index.values,\
+           "hyb" : np.concatenate([pd.Series(class_task_map).index.values,pd.Series(regr_task_map).index.values])
+          }
+
+@pytest.fixture
+def ref_model_types():
+    return {"cls": ScModelType.classification, "clsaux": ScModelType.classification, "reg": ScModelType.regression, "hyb":  ScModelType.hybrid}
+
 @pytest.fixture
 def test_preds(class_task_map, regr_task_map):
-    return {'cls' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = class_task_map),\
-           'clsaux' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_clsaux_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_clsaux_model/hyperparameters.json"), class_task_map = class_task_map),\
-           'reg' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_reg_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_reg_model/hyperparameters.json"), regr_task_map = regr_task_map),\
-           'hyb' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_hyb_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_hyb_model/hyperparameters.json"), class_task_map = class_task_map, regr_task_map = regr_task_map)
+    return {"cls" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = class_task_map),\
+           "clsaux" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_clsaux_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_clsaux_model/hyperparameters.json"), class_task_map = class_task_map),\
+           "reg" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_reg_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_reg_model/hyperparameters.json"), regr_task_map = regr_task_map),\
+           "hyb" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_hyb_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_hyb_model/hyperparameters.json"), class_task_map = class_task_map, regr_task_map = regr_task_map)
           }
 
 @pytest.fixture
@@ -73,6 +94,28 @@ def input_failing_smiles_df():
 def ix_rename_map(ref_row_mapping_table):
     return ref_row_mapping_table.set_index("cont_descriptor_vector_id")["input_compound_id"]
 
+@pytest.fixture
+def get_benzene_x_csr(srprep):
+    return srprep.descriptor_calc.calculate_single_csr('c1ccccc1')
+
+@pytest.fixture
+def get_benzene_y_ref():
+    return {"cls":pd.Series({"class_570" : 0.516933, "class_581" :   0.433307, "class_2276" :   0.565609},dtype="float32"),
+            "clsaux": pd.Series({"class_570" : 0.412029, "class_581" : 0.489868, "class_2276" :  0.504993},dtype="float32"),
+            "reg": pd.Series({"regr_633" : 5.097863, "regr_740" : 5.743073, "regr_2" : 7.306094},dtype="float64"),
+            "hyb": pd.Series({"class_570" : 0.821179, "class_581" : 0.209964, "class_2276" : 0.560037, "regr_633" : 5.118069, "regr_740" : 5.721944, "regr_2" : 7.383655},dtype="float64")}
+
+@pytest.fixture
+def cls_t8df_head():
+    int_cols = ['cont_classification_task_id', 'classification_task_id', 'num_total_actives', 'num_fold_min_actives', 'num_total_inactives', 'num_fold_min_inactives', 'n_tasks', 'retained_tasks']
+    T8c = pd.read_csv(os.path.join(MODELS_PATH,"example_cls_model/T8c.csv"))
+    T8c[int_cols] = T8c[int_cols].astype("Int64")
+    return T8c[T8c["cont_classification_task_id"] <10]  
+
+@pytest.fixture
+def test_pred_multi_ix(cls_t8df_head):
+    multi_ix_task_map = t8df_to_task_map(cls_t8df_head,task_type = "classification",threshold_multi_ix=True)
+    return PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = multi_ix_task_map)
 
 def test_dense_tasks_prediction(srprep, input_smiles_df, ref_output_xdata, ref_output_ydata, ix_rename_map, test_preds):
     #generate x-data
@@ -127,7 +170,7 @@ def test_named_task_predictions(srprep, input_smiles_df, test_preds, class_task_
     y_refs_select_class_df = pd.DataFrame(y_refs_selected_class_tasks, columns = list(class_task_map.keys())).rename(index=ix_rename_map)
     y_refs_select_regr_df = pd.DataFrame(y_refs_selected_regr_tasks, columns = list(regr_task_map.keys())).rename(index=ix_rename_map)
     ref_hyb_res_slice_df_reconstructed = pd.concat([y_refs_select_class_df, y_refs_select_regr_df],axis=1)
-    ref_hyb_res_slice_df_reconstructed.index.names = ['input_compound_id']
+    ref_hyb_res_slice_df_reconstructed.index.names = ["input_compound_id"]
     assert_frame_equal(test_hyb_res_slice_df.sort_index().astype("float32"), ref_hyb_res_slice_df_reconstructed.sort_index().astype("float32"))
 
 def test_failing_predictions(srprep, input_failing_smiles_df, test_preds):
@@ -137,3 +180,53 @@ def test_failing_predictions(srprep, input_failing_smiles_df, test_preds):
             x = srprep.process_smiles(smi)
             y = test_preds["hyb"].predict_decorated_series_from_tensor(x)
             y_res_slice[k] = y
+
+def test_get_mapped_task_names(test_preds, ref_name_arrays):
+    for mtype, my_pred in test_preds.items():
+        assert (my_pred.get_mapped_task_names() == ref_name_arrays[mtype]).all()
+
+def test_get_model_type(test_preds, ref_model_types):
+    for mtype, my_pred in test_preds.items():
+        assert my_pred.get_model_type() == ref_model_types[mtype]
+
+def test_limit_to_type(srprep, test_preds):
+    x = srprep.process_smiles('c1ccccc1')
+    #provoke failure with invalid type
+    with pytest.raises(ValueError):
+        y = test_preds["hyb"].predict_decorated_series_from_tensor(x,limit_to_type=5)
+    #now test a valid type
+    y = test_preds["hyb"].predict_decorated_series_from_tensor(x,limit_to_type=ScModelType.regression)
+    y_ref = pd.Series({"regr_633":  5.118069, "regr_740" :  5.721944, "regr_2"  : 7.383655})
+    assert_series_equal(y, y_ref)
+
+def test_csr_predictions(get_benzene_x_csr, get_benzene_y_ref, test_preds):
+    for mtype, my_pred in test_preds.items():
+        y_test = my_pred.predict_decorated_series_from_csr(get_benzene_x_csr)
+        assert_series_equal(y_test, get_benzene_y_ref[mtype])
+
+def test_trunk_output(test_preds, srprep, input_smiles_df, ref_output_trunk):
+    for mtype, my_pred in test_preds.items():
+        assert np.allclose(np.concatenate([my_pred.predict_trunk_from_tensor(srprep.process_smiles(smi)) for k,smi in input_smiles_df.set_index("input_compound_id")["smiles"].items()]),ref_output_trunk[mtype])
+
+
+def test_task_map_generator(cls_t8df_head):
+    task_map_test1 = t8df_to_task_map(cls_t8df_head,task_type = "classification")
+    labels = {"assay_517_class_7.00":  0,  "assay_924_class_6.50":  1,  "assay_924_class_7.00": 2, "assay_924_class_7.50":  3, "assay_1160_class_6.50": 4,\
+          "assay_1160_class_7.00": 5,  "assay_1512_class_7.50": 6,  "assay_1512_class_8.00": 7, "assay_1512_class_8.50": 8, "assay_1520_class_8.00": 9}
+    task_map_ref1 = pd.Series(labels ,name='cont_classification_task_id',dtype="int64").rename_axis("task_labels")
+    assert_series_equal(task_map_test1, task_map_ref1)
+
+    task_map_test2 = t8df_to_task_map(cls_t8df_head,task_type = "classification",threshold_multi_ix=True)
+    labels2 = {"assay_517_class":{7.0 : 0},"assay_924_class":{6.5 : 1, 7.0 : 2, 7.5 : 3}, "assay_1160_class" : {6.5 : 4, 7.0 : 5},\
+               "assay_1512_class" : {7.5 : 6, 8.0 : 7, 8.5 : 8}, "assay_1520_class" :{ 8.0 : 9}}
+    task_map_ref2 = pd.concat({key:pd.Series(val,name='cont_classification_task_id',dtype="int64") for key, val in labels2.items()}).rename_axis(["task_labels","threshold"])
+    assert_series_equal(task_map_test2, task_map_ref2)
+
+def test_multi_ix_predictions(srprep,test_pred_multi_ix):
+    x = srprep.process_smiles("c1ccccc1")
+    y_multi_ix_test = test_pred_multi_ix.predict_decorated_series_from_tensor(x)
+    values_multi_ix = {"assay_517_class":{7.0 : 0.531071},"assay_924_class":{6.5 : 0.583757, 7.0 : 0.542668, 7.5 : 0.474523}, "assay_1160_class" : {6.5 : 0.530777, 7.0 : 0.428757},\
+                       "assay_1512_class" : {7.5 :  0.472368, 8.0 : 0.367206, 8.5 : 0.306637}, "assay_1520_class" :{ 8.0 : 0.499579}}
+    y_multi_ix_ref = pd.concat({key:pd.Series(val,dtype="float32") for key, val in values_multi_ix.items()}).rename_axis(["task_labels","threshold"])
+    assert_series_equal(y_multi_ix_test,y_multi_ix_ref)
+