-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
132 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import os | ||
import ast | ||
|
||
from data_loaders.dataset import DataLoader | ||
|
||
class Ames(DataLoader): | ||
def __init__(self): | ||
# We're using the Mordred fingerprint | ||
# This dataset was generated using Mordred.py | ||
dataset_dir = os.path.dirname(os.path.abspath(__file__)) + "/../Fingerprints/Ames_Mordred.csv" | ||
|
||
data = pd.read_csv(dataset_dir) | ||
fingerprint_arrays = data["fingerprint_Mordred"].apply(lambda array_str: np.array(ast.literal_eval(array_str))).tolist() | ||
self.x_values = np.stack(fingerprint_arrays) | ||
self.y_values = data["Y"].to_numpy() | ||
self.name = "Ames" | ||
|
||
def size(self): | ||
return len(self.x_values) | ||
|
||
def x(self, dataset_slice_indices: np.ndarray) -> np.ndarray: | ||
return self.x_values[dataset_slice_indices] | ||
|
||
def y(self, dataset_slice_indices): | ||
return self.y_values[dataset_slice_indices] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import numpy as np | ||
|
||
# This class will be used to load the dataset and perform feature extraction | ||
# When we do bayesian optimization, we'll call the dataloader to get the x and y values for each dataset slice | ||
class DataLoader: | ||
def __init__(self): | ||
self.name = "" | ||
# You should perform feature extraction and data cleaning here | ||
pass | ||
|
||
def size(self) -> int: | ||
pass | ||
|
||
# for a given slice of the dataset, we'll return the x values | ||
# these slices are indexes of the original dataset. e.g. if you pass in [1, 2, 9], The features at index 1, 2, and 9 will be returned | ||
def x(self, dataset_slice: np.ndarray) -> np.ndarray: | ||
pass | ||
|
||
# for a given slice of the dataset, we'll return the y values | ||
def y(self, dataset_slice: np.ndarray) -> np.ndarray: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from data_loaders.dataset import DataLoader | ||
from transformer.fingerprints import FingerprintsTransformer | ||
import os | ||
|
||
class halflife(DataLoader): | ||
def __init__(self): | ||
# path = r"datasets\cleaned_datasets\halflife_dataset.csv" | ||
path = os.path.dirname(os.path.abspath(__file__)) + "/../datasets/cleaned_datasets/halflife_dataset.csv" | ||
self.data = pd.read_csv(path) | ||
transformer = FingerprintsTransformer(self.data, "Drug", "ECFP") | ||
|
||
self.name = "halflife" | ||
self.x_values = transformer.to_np() | ||
self.y_values = self.data["Y"].to_numpy() | ||
|
||
def size(self): | ||
return len(self.x_values) | ||
|
||
def x(self, dataset_slice_indices: np.ndarray) -> np.ndarray: | ||
return self.x_values[dataset_slice_indices] | ||
|
||
def y(self, dataset_slice_indices): | ||
return self.y_values[dataset_slice_indices] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import ast | ||
|
||
from data_loaders.dataset import DataLoader | ||
from functools import lru_cache | ||
|
||
class LD50(DataLoader): | ||
fingerprint = 'ECFP' | ||
|
||
def __init__(self): | ||
path = os.path.join( | ||
os.path.dirname(os.path.abspath(__file__)), | ||
"../datasets/fingerprint_datasets/LD50_Zhu_ECFP.h5" | ||
) | ||
self.data = pd.read_hdf(path) | ||
self.name = "ld50" | ||
self.x_values = np.stack(self.data['fingerprint_ECFP']) | ||
self.y_values = self.data["Y"].to_numpy() | ||
|
||
def size(self): | ||
return len(self.x_values) | ||
|
||
def x(self, dataset_slice: slice | np.ndarray = None) -> np.ndarray: | ||
if dataset_slice is None: | ||
return self.x_values | ||
return self.x_values[dataset_slice] | ||
|
||
def y(self, dataset_slice: slice | np.ndarray = None) -> np.ndarray: | ||
if dataset_slice is None: | ||
return self.y_values | ||
return self.y_values[dataset_slice] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from tdc.utils import retrieve_label_name_list | ||
from tdc.single_pred import Tox | ||
import numpy as np | ||
from transformer.fingerprints import FingerprintsTransformer | ||
from data_loaders.dataset import DataLoader | ||
|
||
# https://tdcommons.ai/single_pred_tasks/tox/#tox21 | ||
class Tox21(DataLoader): | ||
def __init__(self): | ||
label_list = retrieve_label_name_list('Tox21') | ||
data = Tox(name = 'Tox21', label_name = label_list[0]).get_data() | ||
|
||
transformer = FingerprintsTransformer(data, "Drug", "ECFP") | ||
|
||
self.name = "Tox21" | ||
self.x_values = transformer.to_np() | ||
self.y_values = data["Y"].to_numpy() | ||
|
||
def size(self): | ||
return len(self.x_values) | ||
|
||
def x(self, dataset_slice_indices: np.ndarray) -> np.ndarray: | ||
return self.x_values[dataset_slice_indices] | ||
|
||
def y(self, dataset_slice_indices): | ||
return self.y_values[dataset_slice_indices] |