Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Example] Add TADF材料分子的光电性质预测 #974

Open
wants to merge 30 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c0b76d9
merge code of upstream
Zhaoyiou123 Aug 16, 2024
36cc5b5
Merge branch 'PaddlePaddle:develop' into dev_model
YfB1125 Aug 17, 2024
28ac09e
Delete 2024-08 directory
YfB1125 Aug 17, 2024
4e65408
Update test.py
YfB1125 Aug 17, 2024
a77606e
merge code of upstream
Zhaoyiou123 Aug 17, 2024
ab27b8e
pre-commit
Zhaoyiou123 Aug 17, 2024
7f9c80c
pre-commit
Zhaoyiou123 Aug 17, 2024
b111bd4
changes
Zhaoyiou123 Aug 17, 2024
754e841
Update f_ppsci_train.py
YfB1125 Aug 20, 2024
cba37c4
Delete 2024-08 directory
YfB1125 Aug 20, 2024
22ad460
Merge branch 'develop' into dev_model
YfB1125 Aug 20, 2024
e4dda68
Update f_paddle_train.py
YfB1125 Aug 20, 2024
62a04a5
Merge branch 'develop' into dev_model
YfB1125 Aug 23, 2024
d60a01b
'pre-commit'
Zhaoyiou123 Aug 26, 2024
5241419
'pre-commit'
Zhaoyiou123 Aug 26, 2024
2eccbb5
'pre-commit'
Zhaoyiou123 Aug 26, 2024
3bef6a1
Delete Est directory
YfB1125 Aug 26, 2024
34de075
Delete f directory
YfB1125 Aug 26, 2024
96e5cd5
Delete angle directory
YfB1125 Aug 26, 2024
1b9a43c
Add files via upload
YfB1125 Aug 26, 2024
cb3b15a
Update est_paddle_train.py
YfB1125 Oct 7, 2024
a06f763
'yaml'
Zhaoyiou123 Oct 15, 2024
70872c8
Merge branch 'develop' into dev_model
YfB1125 Oct 16, 2024
e412303
'yaml'
Zhaoyiou123 Oct 16, 2024
88168e0
'yaml'
Zhaoyiou123 Oct 16, 2024
3dce5f3
Delete TADF/Est directory
YfB1125 Oct 16, 2024
c0c609f
Delete TADF/angle directory
YfB1125 Oct 16, 2024
f61345f
Delete TADF/f directory
YfB1125 Oct 16, 2024
3907ef3
Update
Zhaoyiou123 Oct 25, 2024
d4b7ada
Merge branch 'dev_model' of https://github.com/YFB1125/PaddleScience …
Zhaoyiou123 Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,136 changes: 5,136 additions & 0 deletions TADF/TADF_Est/Est.dat

Large diffs are not rendered by default.

Binary file added TADF/TADF_Est/Test_Est.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
82 changes: 82 additions & 0 deletions TADF/TADF_Est/config/est.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
defaults: #
- ppsci_default #
- TRAIN: train_default #
- EVAL: eval_default #
# - INFER: infer_default #
- _self_ #

hydra:
run:
# dynamic output directory according to running time and override name
dir: outputs_Est/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} #
job:
name: ${mode} # name of logfile
chdir: false # keep current working directory unchanged
callbacks:
init_callback: #
_target_: ppsci.utils.callbacks.InitCallback #
sweep:
# output directory for multirun
dir: ${hydra.run.dir}
subdir: ./

# general settings
mode: eval # running mode: train/eval #
seed: 42 #
output_dir: ${hydra:run.dir} #
log_freq: 20 #
use_tbd: false #
VIV_DATA_PATH: "./f.dat" #

# model settings
MODEL: #
# input_keys: tuple(x.keys()) #
output_keys: ["u"] #
# num_layers: None #
# hidden_size:
# - 587
# - 256 #
activation: "relu" #
dropout: 0.5

# training settings
TRAIN: #
epochs: 200 #
iters_per_epoch: 20 #
save_freq: 100 #
eval_during_train: False #
batch_size: 8 #
learning_rate: 0.0001
save_model_path: './est_model.pth'
weight_decay: 1e-5
pretrained_model_path: null #
checkpoint_path: null #
k: 9
i: 2

# evaluation settings
EVAL:
test_size: 0.1
load_model_path: './est_model.pth'
seed: 20
# pretrained_model_path: null #
# batch_size: 32 #

# inference settings
#INFER: #
# pretrained_model_path: "https://github.com/YfB1125/PaddleScience/blob/dev_model/TADF/f/output/checkpoints/latest.pdparams" #
# export_path: ./inference/viv #
# pdmodel_path: ${INFER.export_path}.pdmodel #
# pdiparams_path: ${INFER.export_path}.pdiparams #
# input_keys: ${MODEL.input_keys} #
# output_keys: ["u"] #
# device: cpu #
# engine: native #
# precision: fp32 #
# ir_optim: true #
# min_subgraph_size: 10 #
# gpu_mem: 4000 #
# gpu_id: 0 #
# max_batch_size: 64 #
# num_cpu_threads: 4 #
# batch_size: 16 #
Binary file added TADF/TADF_Est/est_model.pth
Binary file not shown.
177 changes: 177 additions & 0 deletions TADF/TADF_Est/est_ppsci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import os

import hydra
import matplotlib.pyplot as plt
import numpy as np
import paddle
import rdkit.Chem as Chem
from omegaconf import DictConfig
from rdkit.Chem import AllChem
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

import ppsci

os.environ["HYDRA_FULL_ERROR"] = "1"
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
plt.rcParams["axes.unicode_minus"] = False
plt.rcParams["font.sans-serif"] = ["DejaVu Sans"]

# 加载数据集
data = []
for line in open("./est.dat"):
num = float(line.strip())
data.append(num)
smis = []
for line in open("./smis.txt"):
smis.append(line.strip())
vectors = []
del_mol = []
for num in smis:
mol = Chem.MolFromSmiles(num)
try:
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
_input = np.array(list(map(float, fp.ToBitString())))
vectors.append(_input)
except Exception:
del_mol.append(num)
pca = PCA(n_components=0.99)
pca.fit(vectors)
Xlist = paddle.to_tensor(pca.transform(vectors))


def train(cfg: DictConfig):
# 划分数据集
def k_fold(k, i, X, Y):
fold_size = tuple(X.shape)[0] // k
val_start = i * fold_size
if i != k - 1:
val_end = (i + 1) * fold_size
x_val, y_val = X[val_start:val_end], Y[val_start:val_end]
x_train = np.concatenate((X[0:val_start], X[val_end:]), axis=0)
y_train = np.concatenate((Y[0:val_start], Y[val_end:]), axis=0)
else:
x_val, y_val = X[val_start:], Y[val_start:]
x_train = X[0:val_start]
y_train = Y[0:val_start]
return x_train, y_train, x_val, y_val

x_train, y_train, x_test, y_test = k_fold(cfg.TRAIN.k, cfg.TRAIN.i, Xlist, data)
# 处理数据集
x_train = paddle.to_tensor(x_train, dtype="float32")
x = {
"key_{}".format(i): paddle.unsqueeze(
paddle.to_tensor(x_train[:, i], dtype="float32"), axis=1
)
for i in range(x_train.shape[1])
}
y_train = paddle.unsqueeze(paddle.to_tensor(y_train, dtype="float32"), axis=1)

# 构建约束
bc_sup = ppsci.constraint.SupervisedConstraint(
dataloader_cfg={
"dataset": {
"input": x,
"label": {"u": y_train},
"name": "IterableNamedArrayDataset",
},
"batch_size": cfg.TRAIN.batch_size,
},
loss=ppsci.loss.MSELoss("mean"),
name="bc_sup",
)

# 设置模型
hidden_size = [587, 256]
num_layers = None
# 实例化模型
model = ppsci.arch.DNN(
input_keys=tuple(x.keys()),
hidden_size=hidden_size,
num_layers=num_layers,
**cfg.MODEL,
)
optimizer = ppsci.optimizer.optimizer.Adam(
cfg.TRAIN.learning_rate,
beta1=(0.9, 0.99)[0],
beta2=(0.9, 0.99)[1],
weight_decay=cfg.TRAIN.weight_decay,
)(model)
# 构建Solver
solver = ppsci.solver.Solver(
model,
constraint={
"bc_sup": bc_sup,
},
optimizer=optimizer,
epochs=cfg.TRAIN.epochs,
eval_during_train=False,
iters_per_epoch=cfg.TRAIN.iters_per_epoch,
seed=cfg.seed,
)
try:
solver.train()
except Exception as ex:
print("error", ex)
paddle.save(model.state_dict(), cfg.TRAIN.save_model_path)


# 进行测试
def eval(cfg: DictConfig):
# 重新划分数据集
x_train, x_test, y_train, y_test = train_test_split(
Xlist, data, test_size=cfg.EVAL.test_size, random_state=cfg.EVAL.seed
)
x = {
"key_{}".format(i): paddle.unsqueeze(
paddle.to_tensor(x_test[:, i], "float32"), axis=1
)
for i in range(x_test.shape[1])
}
hidden_size = [587, 256]
num_layers = None
model = ppsci.arch.DNN(
input_keys=tuple(x.keys()),
hidden_size=hidden_size,
num_layers=num_layers,
**cfg.MODEL,
)
model.set_state_dict(paddle.load(cfg.EVAL.load_model_path))
ytest = paddle.unsqueeze(paddle.to_tensor(y_test, dtype="float32"), axis=1)
ypred = model(x)
ytest = {"u": ytest}

# 计算损失
loss = ppsci.metric.MAE()
MAE = loss(ypred, ytest).get("u").numpy()
loss = ppsci.metric.RMSE()
RMSE = loss(ypred, ytest).get("u").numpy()
ypred = ypred.get("u").numpy()
ytest = ytest.get("u").numpy()
R2 = r2_score(ytest, ypred)
print("MAE", MAE)
print("RMSE", RMSE)
print("R2", R2)

# 可视化
plt.scatter(ytest, ypred, s=15, color="royalblue", marker="s", linewidth=1)
plt.plot([ytest.min(), ytest.max()], [ytest.min(), ytest.max()], "r-", lw=1)
plt.legend(title="R²={:.3f}\n\nMAE={:.3f}".format(R2, MAE))
plt.xlabel("Test ΔEst(eV)")
plt.ylabel("Predicted ΔEst(eV)")
plt.show()


@hydra.main(version_base=None, config_path="./config", config_name="est.yaml")
def main(cfg: DictConfig):
if cfg.mode == "train":
train(cfg)
elif cfg.mode == "eval":
eval(cfg)
else:
raise ValueError(f"cfg.mode should in ['train', 'eval'], but got '{cfg.mode}'")


if __name__ == "__main__":
main()
Binary file added TADF/TADF_Est/output/checkpoints/latest.pdopt
Binary file not shown.
Binary file added TADF/TADF_Est/output/checkpoints/latest.pdparams
Binary file not shown.
Binary file added TADF/TADF_Est/output/checkpoints/latest.pdstates
Binary file not shown.
47 changes: 47 additions & 0 deletions TADF/TADF_Est/outputs_Est/2024-10-15/23-53-31/.hydra/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
mode: train
output_dir: ${hydra:run.dir}
log_freq: 20
seed: 42
use_vdl: false
use_tbd: false
wandb_config: null
use_wandb: false
device: gpu
use_amp: false
amp_level: O1
to_static: false
prim: false
log_level: info
TRAIN:
epochs: 200
iters_per_epoch: 20
update_freq: 1
save_freq: 100
eval_during_train: false
start_eval_epoch: 1
eval_freq: 1
checkpoint_path: null
pretrained_model_path: null
ema: null
swa: null
batch_size: 8
learning_rate: 0.0001
save_model_path: ./est_model.pth
weight_decay: 1.0e-05
k: 9
i: 2
EVAL:
pretrained_model_path: null
eval_with_no_grad: false
compute_metric_by_batch: false
batch_size: 256
test_size: 0.1
load_model_path: ./est_model.pth
seed: 20
INFER: null
VIV_DATA_PATH: ./f.dat
MODEL:
output_keys:
- u
activation: relu
dropout: 0.5
Loading