Skip to content

Commit

Permalink
reworked t8df_to_task_map,
Browse files Browse the repository at this point in the history
* reworked t8df_to_task_map
* added more tests
* switched ove rto key provider as in the latest prepare branch from MELLODDY Tuner
  • Loading branch information
AnsgarSchuffenhauer committed Feb 22, 2023
1 parent 9926350 commit 760616b
Show file tree
Hide file tree
Showing 11 changed files with 316 additions and 144 deletions.
345 changes: 212 additions & 133 deletions melloddy_predictor/predictor_single.py

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/begin_to_end_test/sc_output/trunk_cls.npy
Binary file not shown.
Binary file not shown.
Binary file added tests/begin_to_end_test/sc_output/trunk_hyb.npy
Binary file not shown.
Binary file added tests/begin_to_end_test/sc_output/trunk_reg.npy
Binary file not shown.
115 changes: 104 additions & 11 deletions tests/test_single_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import pytest


from melloddy_tuner.utils.single_row_prep2pred import SingleRowPreparator
from melloddy_predictor.predictor_single import PredictorSingle
from melloddy_tuner.utils.single_row_prep2pred import SingleRowPreparator, KeyProviderFromJsonFile
from melloddy_predictor.predictor_single import PredictorSingle, ScModelType, t8df_to_task_map



from pandas._testing import assert_frame_equal
from pandas._testing import assert_frame_equal, assert_series_equal
from scipy.sparse import save_npz, load_npz

TEST_FILE_DIR = os.path.dirname(__file__)
Expand All @@ -33,8 +33,12 @@ def ref_row_mapping_table():
return pd.read_csv(os.path.join(TEST_FILE_DIR,"begin_to_end_test/mt_output/mapping_table.csv"))

@pytest.fixture
def srprep():
return SingleRowPreparator(secret = ENCRYPTION_KEY, params = PREPARATION_PARAMETER)
def kprovider():
return KeyProviderFromJsonFile(ENCRYPTION_KEY)

@pytest.fixture
def srprep(kprovider):
return SingleRowPreparator(key_provider = kprovider, params = PREPARATION_PARAMETER)

@pytest.fixture
def ref_output_ydata():
Expand All @@ -45,20 +49,37 @@ def ref_output_ydata():
"hyb": np.load(os.path.join(TEST_FILE_DIR,"begin_to_end_test/sc_output/hyb_model-regr.npy"))}
return {"class" : y_refs_class, "regr" : y_refs_regr}

@pytest.fixture
def ref_output_trunk():
return {mtype : np.load(os.path.join(TEST_FILE_DIR,"begin_to_end_test/sc_output/trunk_{}.npy".format(mtype))) for mtype in ["cls","clsaux","reg","hyb"]}


@pytest.fixture
def class_task_map():
return {'class_570':570,'class_581':581,'class_2276':2276}
return {"class_570":570,"class_581":581,"class_2276":2276}

@pytest.fixture
def regr_task_map():
return {"regr_633":633,"regr_740":740,"regr_2":2}

@pytest.fixture
def ref_name_arrays(class_task_map,regr_task_map):
return {"cls" : pd.Series(class_task_map).index.values,\
"clsaux" : pd.Series(class_task_map).index.values,\
"reg" : pd.Series(regr_task_map).index.values,\
"hyb" : np.concatenate([pd.Series(class_task_map).index.values,pd.Series(regr_task_map).index.values])
}

@pytest.fixture
def ref_model_types():
return {"cls": ScModelType.classification, "clsaux": ScModelType.classification, "reg": ScModelType.regression, "hyb": ScModelType.hybrid}

@pytest.fixture
def test_preds(class_task_map, regr_task_map):
return {'cls' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = class_task_map),\
'clsaux' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_clsaux_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_clsaux_model/hyperparameters.json"), class_task_map = class_task_map),\
'reg' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_reg_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_reg_model/hyperparameters.json"), regr_task_map = regr_task_map),\
'hyb' : PredictorSingle(model= os.path.join(MODELS_PATH,"example_hyb_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_hyb_model/hyperparameters.json"), class_task_map = class_task_map, regr_task_map = regr_task_map)
return {"cls" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = class_task_map),\
"clsaux" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_clsaux_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_clsaux_model/hyperparameters.json"), class_task_map = class_task_map),\
"reg" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_reg_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_reg_model/hyperparameters.json"), regr_task_map = regr_task_map),\
"hyb" : PredictorSingle(model= os.path.join(MODELS_PATH,"example_hyb_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_hyb_model/hyperparameters.json"), class_task_map = class_task_map, regr_task_map = regr_task_map)
}

@pytest.fixture
Expand All @@ -73,6 +94,28 @@ def input_failing_smiles_df():
def ix_rename_map(ref_row_mapping_table):
return ref_row_mapping_table.set_index("cont_descriptor_vector_id")["input_compound_id"]

@pytest.fixture
def get_benzene_x_csr(srprep):
return srprep.descriptor_calc.calculate_single_csr('c1ccccc1')

@pytest.fixture
def get_benzene_y_ref():
return {"cls":pd.Series({"class_570" : 0.516933, "class_581" : 0.433307, "class_2276" : 0.565609},dtype="float32"),
"clsaux": pd.Series({"class_570" : 0.412029, "class_581" : 0.489868, "class_2276" : 0.504993},dtype="float32"),
"reg": pd.Series({"regr_633" : 5.097863, "regr_740" : 5.743073, "regr_2" : 7.306094},dtype="float64"),
"hyb": pd.Series({"class_570" : 0.821179, "class_581" : 0.209964, "class_2276" : 0.560037, "regr_633" : 5.118069, "regr_740" : 5.721944, "regr_2" : 7.383655},dtype="float64")}

@pytest.fixture
def cls_t8df_head():
int_cols = ['cont_classification_task_id', 'classification_task_id', 'num_total_actives', 'num_fold_min_actives', 'num_total_inactives', 'num_fold_min_inactives', 'n_tasks', 'retained_tasks']
T8c = pd.read_csv(os.path.join(MODELS_PATH,"example_cls_model/T8c.csv"))
T8c[int_cols] = T8c[int_cols].astype("Int64")
return T8c[T8c["cont_classification_task_id"] <10]

@pytest.fixture
def test_pred_multi_ix(cls_t8df_head):
multi_ix_task_map = t8df_to_task_map(cls_t8df_head,task_type = "classification",threshold_multi_ix=True)
return PredictorSingle(model= os.path.join(MODELS_PATH,"example_cls_model/model.pth"), conf=os.path.join(MODELS_PATH,"example_cls_model/hyperparameters.json"), class_task_map = multi_ix_task_map)

def test_dense_tasks_prediction(srprep, input_smiles_df, ref_output_xdata, ref_output_ydata, ix_rename_map, test_preds):
#generate x-data
Expand Down Expand Up @@ -127,7 +170,7 @@ def test_named_task_predictions(srprep, input_smiles_df, test_preds, class_task_
y_refs_select_class_df = pd.DataFrame(y_refs_selected_class_tasks, columns = list(class_task_map.keys())).rename(index=ix_rename_map)
y_refs_select_regr_df = pd.DataFrame(y_refs_selected_regr_tasks, columns = list(regr_task_map.keys())).rename(index=ix_rename_map)
ref_hyb_res_slice_df_reconstructed = pd.concat([y_refs_select_class_df, y_refs_select_regr_df],axis=1)
ref_hyb_res_slice_df_reconstructed.index.names = ['input_compound_id']
ref_hyb_res_slice_df_reconstructed.index.names = ["input_compound_id"]
assert_frame_equal(test_hyb_res_slice_df.sort_index().astype("float32"), ref_hyb_res_slice_df_reconstructed.sort_index().astype("float32"))

def test_failing_predictions(srprep, input_failing_smiles_df, test_preds):
Expand All @@ -137,3 +180,53 @@ def test_failing_predictions(srprep, input_failing_smiles_df, test_preds):
x = srprep.process_smiles(smi)
y = test_preds["hyb"].predict_decorated_series_from_tensor(x)
y_res_slice[k] = y

def test_get_mapped_task_names(test_preds, ref_name_arrays):
for mtype, my_pred in test_preds.items():
assert (my_pred.get_mapped_task_names() == ref_name_arrays[mtype]).all()

def test_get_model_type(test_preds, ref_model_types):
for mtype, my_pred in test_preds.items():
assert my_pred.get_model_type() == ref_model_types[mtype]

def test_limit_to_type(srprep, test_preds):
x = srprep.process_smiles('c1ccccc1')
#provoke failure with invalid type
with pytest.raises(ValueError):
y = test_preds["hyb"].predict_decorated_series_from_tensor(x,limit_to_type=5)
#now test a valid type
y = test_preds["hyb"].predict_decorated_series_from_tensor(x,limit_to_type=ScModelType.regression)
y_ref = pd.Series({"regr_633": 5.118069, "regr_740" : 5.721944, "regr_2" : 7.383655})
assert_series_equal(y, y_ref)

def test_csr_predictions(get_benzene_x_csr, get_benzene_y_ref, test_preds):
for mtype, my_pred in test_preds.items():
y_test = my_pred.predict_decorated_series_from_csr(get_benzene_x_csr)
assert_series_equal(y_test, get_benzene_y_ref[mtype])

def test_trunk_output(test_preds, srprep, input_smiles_df, ref_output_trunk):
for mtype, my_pred in test_preds.items():
assert np.allclose(np.concatenate([my_pred.predict_trunk_from_tensor(srprep.process_smiles(smi)) for k,smi in input_smiles_df.set_index("input_compound_id")["smiles"].items()]),ref_output_trunk[mtype])


def test_task_map_generator(cls_t8df_head):
task_map_test1 = t8df_to_task_map(cls_t8df_head,task_type = "classification")
labels = {"assay_517_class_7.00": 0, "assay_924_class_6.50": 1, "assay_924_class_7.00": 2, "assay_924_class_7.50": 3, "assay_1160_class_6.50": 4,\
"assay_1160_class_7.00": 5, "assay_1512_class_7.50": 6, "assay_1512_class_8.00": 7, "assay_1512_class_8.50": 8, "assay_1520_class_8.00": 9}
task_map_ref1 = pd.Series(labels ,name='cont_classification_task_id',dtype="int64").rename_axis("task_labels")
assert_series_equal(task_map_test1, task_map_ref1)

task_map_test2 = t8df_to_task_map(cls_t8df_head,task_type = "classification",threshold_multi_ix=True)
labels2 = {"assay_517_class":{7.0 : 0},"assay_924_class":{6.5 : 1, 7.0 : 2, 7.5 : 3}, "assay_1160_class" : {6.5 : 4, 7.0 : 5},\
"assay_1512_class" : {7.5 : 6, 8.0 : 7, 8.5 : 8}, "assay_1520_class" :{ 8.0 : 9}}
task_map_ref2 = pd.concat({key:pd.Series(val,name='cont_classification_task_id',dtype="int64") for key, val in labels2.items()}).rename_axis(["task_labels","threshold"])
assert_series_equal(task_map_test2, task_map_ref2)

def test_multi_ix_predictions(srprep,test_pred_multi_ix):
x = srprep.process_smiles("c1ccccc1")
y_multi_ix_test = test_pred_multi_ix.predict_decorated_series_from_tensor(x)
values_multi_ix = {"assay_517_class":{7.0 : 0.531071},"assay_924_class":{6.5 : 0.583757, 7.0 : 0.542668, 7.5 : 0.474523}, "assay_1160_class" : {6.5 : 0.530777, 7.0 : 0.428757},\
"assay_1512_class" : {7.5 : 0.472368, 8.0 : 0.367206, 8.5 : 0.306637}, "assay_1520_class" :{ 8.0 : 0.499579}}
y_multi_ix_ref = pd.concat({key:pd.Series(val,dtype="float32") for key, val in values_multi_ix.items()}).rename_axis(["task_labels","threshold"])
assert_series_equal(y_multi_ix_test,y_multi_ix_ref)

0 comments on commit 760616b

Please sign in to comment.