-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_deepchem.py
37 lines (29 loc) · 1001 Bytes
/
test_deepchem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import deepchem as dc
def test_split():
N_samples = 50
n_features = 10
X = np.random.rand(N_samples, n_features)
y = np.random.rand(N_samples)
dataset = dc.data.NumpyDataset(X, y)
dataset = dc.data.NumpyDataset(X, y)
dataset.X.shape
dataset.y.shape
smiles = [
'O=Cc1ccc(O)c(OC)c1',
'CN1CCC[C@H]1c2cccnc2',
'C1CCCCC1',
'c1ccccc1',
'CC(=O)O',
]
properties = [0.4, -1.5, 3.2, -0.2, 1.7]
featurizer = dc.feat.CircularFingerprint(size=1024)
ecfp = featurizer.featurize(smiles)
ecfp.shape
dataset = dc.data.NumpyDataset(X=ecfp, y=np.array(properties))
assert len(dataset) == 5
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset=dataset, frac_train=0.6, frac_valid=0.2, frac_test=0.2)
assert len(train_dataset) == 3
assert len(valid_dataset) == 1
assert len(test_dataset) == 1