Skip to content

Commit

Permalink
Merge pull request #163 from hspark1212/develop
Browse files Browse the repository at this point in the history
Version 2.2.0
  • Loading branch information
hspark1212 authored Jun 14, 2024
2 parents 5e4856a + ba2d3af commit 87068fd
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 64 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ data/
test/
tmp/
embedding
database/

# baseline_model
baseline_model/*/*.csv
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

<p align="center">
<a href="https://hspark1212.github.io/MOFTransformer/">
<img alt="Docs" src="https://img.shields.io/badge/Docs-v2.1.5-brightgreen.svg?style=plastic">
<img alt="Docs" src="https://img.shields.io/badge/Docs-v2.2.0-brightgreen.svg?style=plastic">
</a>
<a href="https://pypi.org/project/moftransformer/">
<img alt="PypI" src="https://img.shields.io/badge/PyPI-v2.1.5-blue.svg?style=plastic&logo=PyPI">
<img alt="PypI" src="https://img.shields.io/badge/PyPI-v2.2.0-blue.svg?style=plastic&logo=PyPI">
</a>
<a href="https://doi.org/10.6084/m9.figshare.21155506.v2">
<img alt="Figshare" src="https://img.shields.io/badge/Figshare-v2-blue.svg?style=plastic&logo=figshare">
Expand All @@ -21,9 +21,10 @@
# [PMTransformer (MOFTransformer)](https://hspark1212.github.io/MOFTransformer/index.html)

This package provides a universal transfer learning model, `PMTransformer` (Porous Materials Transformer), which obtains the state-of-the-art performance in predicting various properties of porous materials. The PMTRansformer was pre-trainied with 1.9 million hypothetical porous materials including Metal-Organic Frameworks (MOFs), Covalent-Organic Frameworks (COFs), Porous Polymer Networks (PPNs), and zeolites. By fine-tuning the pre-trained `PMTransformer`, you can easily obtain machine learning models to accurately predict various properties of porous materials .

NOTE: From version 2.0.0, the default pre-training model has been changed from `MOFTransformer` to `PMTransformer`, which was pre-trained with a larger dataset, containing other porous materials as well as MOFs. The `PMTransformer` outperforms the `MOFTransformer` in predicting various properties of porous materials.


## [Install](https://hspark1212.github.io/MOFTransformer/installation.html)

### Depedencies
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
project = "MOFTransformer"
copyright = "2022, Yeonghun Kang, Hyunsoo Park"
author = "Yeonghun Kang, Hyunsoo Park"
release = "2.1.5"
release = "2.2.0"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
22 changes: 22 additions & 0 deletions docs/source/getting_started/training.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,28 @@ moftransformer.run(root_dataset, downstream, log_dir=log_dir,
mean=mean, std=std)
```

### Example for multi-task learning:
```python
import moftransformer
from moftransformer.examples import example_path

# data root and downstream from example
root_dataset = example_path['root_dataset']
downstream = example_path['downstream']
log_dir = './logs/'
# load_path = "pmtransformer" (default)

# kwargs (optional)
max_epochs = 10
batch_size = 8
mean = [0, 1, 2]
std = [1, 2, 3]
n_targets = 3

moftransformer.run(root_dataset, downstream, log_dir=log_dir,
max_epochs=max_epochs, batch_size=batch_size,
mean=mean, std=std, n_targets=n_targets)
```
After training, the trained model, logs and hyperparameters will be saved at `log_dir`.
Then you look over the results with tensorboard

Expand Down
4 changes: 2 additions & 2 deletions moftransformer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# MOFTransformer version 2.1.5
# MOFTransformer version 2.2.0
import os

__version__ = "2.1.5"
__version__ = "2.2.0"
__root_dir__ = os.path.dirname(__file__)

from moftransformer import visualize, utils, modules, libs, gadgets, datamodules, assets
Expand Down
10 changes: 6 additions & 4 deletions moftransformer/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# MOFTransformer version 2.1.3
# MOFTransformer version 2.2.0
import os
from sacred import Experiment
from moftransformer import __root_dir__
from moftransformer.utils.validation import _set_load_path, _loss_names

ex = Experiment("pretrained_mof", save_git_info=False)


@ex.config
def config():
"""
Expand All @@ -23,7 +24,7 @@ def config():
loss_names = _loss_names({"regression": 1})

# graph seeting
#max_supercell_atoms = None # number of maximum atoms in supercell atoms
# max_supercell_atoms = None # number of maximum atoms in supercell atoms
atom_fea_len = 64
nbr_fea_len = 64
max_graph_len = 300 # number of maximum nodes in graph
Expand All @@ -46,7 +47,8 @@ def config():

# downstream
downstream = ""
n_classes = 0
n_targets = 1 # for regression
n_classes = 0 # for classification

# Optimizer Setting
optim_type = "adamw" # adamw, adam, sgd (momentum=0.9)
Expand Down Expand Up @@ -75,7 +77,7 @@ def config():
devices = "auto"
num_nodes = 1

load_path = _set_load_path('pmtransformer')
load_path = _set_load_path("pmtransformer")

num_workers = 16 # the number of cpu's core
precision = 16
Expand Down
6 changes: 3 additions & 3 deletions moftransformer/modules/heads.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# MOFTransformer version 2.0.0
# MOFTransformer version 2.2.0
import torch.nn as nn

from transformers.models.bert.modeling_bert import (
Expand Down Expand Up @@ -90,9 +90,9 @@ class RegressionHead(nn.Module):
head for Regression
"""

def __init__(self, hid_dim):
def __init__(self, hid_dim, n_targets=1):
super().__init__()
self.fc = nn.Linear(hid_dim, 1)
self.fc = nn.Linear(hid_dim, n_targets)

def forward(self, x):
x = self.fc(x)
Expand Down
53 changes: 31 additions & 22 deletions moftransformer/modules/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(self, config):
print(f"load model : {config['load_path']}")

if self.hparams.config["loss_names"]["regression"] > 0:
self.regression_head = heads.RegressionHead(hid_dim)
self.regression_head = heads.RegressionHead(hid_dim, config["n_targets"])
self.regression_head.apply(objectives.init_weights)
# normalization
self.mean = config["mean"]
Expand Down Expand Up @@ -263,15 +263,14 @@ def forward(self, batch):

# regression
if "regression" in self.current_tasks:
normalizer = Normalizer(self.mean, self.std)
normalizer = Normalizer(self.mean, self.std, self.device)
ret.update(objectives.compute_regression(self, batch, normalizer))

# classification
if "classification" in self.current_tasks:
ret.update(objectives.compute_classification(self, batch))
return ret


def on_train_start(self):
module_utils.set_task(self)
self.write_log = True
Expand All @@ -294,16 +293,18 @@ def validation_step(self, batch, batch_idx):
def on_validation_epoch_end(self) -> None:
module_utils.epoch_wrapup(self)

def on_test_start(self,):
def on_test_start(
self,
):
module_utils.set_task(self)

def test_step(self, batch, batch_idx):
output = self(batch)
output = {
k: (v.cpu() if torch.is_tensor(v) else v) for k, v in output.items()
} # update cpu for memory

if 'regression_logits' in output.keys():
if "regression_logits" in output.keys():
self.test_logits += output["regression_logits"].tolist()
self.test_labels += output["regression_labels"].tolist()
return output
Expand All @@ -313,55 +314,63 @@ def on_test_epoch_end(self):

# calculate r2 score when regression
if len(self.test_logits) > 1:
r2 = r2_score(
np.array(self.test_labels), np.array(self.test_logits)
)
r2 = r2_score(np.array(self.test_labels), np.array(self.test_logits))
self.log(f"test/r2_score", r2, sync_dist=True)
self.test_labels.clear()
self.test_logits.clear()

def configure_optimizers(self):
return module_utils.set_schedule(self)

def on_predict_start(self):
self.write_log = False
module_utils.set_task(self)

def predict_step(self, batch, batch_idx, dataloader_idx=0):
output = self(batch)

if 'classification_logits' in output:
if self.hparams.config['n_classes'] == 2:
output['classification_logits_index'] = torch.round(output['classification_logits']).to(torch.int)

if "classification_logits" in output:
if self.hparams.config["n_classes"] == 2:
output["classification_logits_index"] = torch.round(
output["classification_logits"]
).to(torch.int)
else:
softmax = torch.nn.Softmax(dim=1)
output['classification_logits'] = softmax(output['classification_logits'])
output['classification_logits_index'] = torch.argmax(output['classification_logits'], dim=1)
output["classification_logits"] = softmax(
output["classification_logits"]
)
output["classification_logits_index"] = torch.argmax(
output["classification_logits"], dim=1
)

output = {
k: (v.cpu().tolist() if torch.is_tensor(v) else v)
for k, v in output.items()
if ('logits' in k) or ('labels' in k) or 'cif_id' == k
if ("logits" in k) or ("labels" in k) or "cif_id" == k
}

return output

def on_predict_epoch_end(self, *args):
self.test_labels.clear()
self.test_logits.clear()

def on_predict_end(self, ):
def on_predict_end(
self,
):
self.write_log = True

def lr_scheduler_step(self, scheduler, *args):
if len(args) == 2:
optimizer_idx, metric = args
elif len(args) == 1:
metric, = args
(metric,) = args
else:
raise ValueError('lr_scheduler_step must have metric and optimizer_idx(optional)')
raise ValueError(
"lr_scheduler_step must have metric and optimizer_idx(optional)"
)

if pl.__version__ >= '2.0.0':
if pl.__version__ >= "2.0.0":
scheduler.step(epoch=self.current_epoch)
else:
scheduler.step()
13 changes: 8 additions & 5 deletions moftransformer/modules/module_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# MOFTransformer version 2.1.0
# MOFTransformer version 2.2.0
import torch

from torch.optim import AdamW
Expand Down Expand Up @@ -207,17 +207,20 @@ class Normalizer(object):
normalize for regression
"""

def __init__(self, mean, std):
def __init__(self, mean, std, device):
if mean and std:
if isinstance(mean, list):
mean = torch.tensor(mean).to(device)
if isinstance(std, list):
std = torch.tensor(std).to(device)
self.mean = mean
self.std = std
self._norm_func = lambda tensor: (tensor - mean) / std
self._denorm_func = lambda tensor: tensor * std + mean
else:
self._norm_func = lambda tensor: tensor
self._denorm_func = lambda tensor: tensor

self.mean = mean
self.std = std

def encode(self, tensor):
return self._norm_func(tensor)

Expand Down
10 changes: 6 additions & 4 deletions moftransformer/modules/objectives.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# MOFTransformer version 2.1.0
# MOFTransformer version 2.2.0
import torch
import torch.nn as nn
import torch.nn.functional as F
Expand All @@ -19,11 +19,13 @@ def init_weights(module):
def compute_regression(pl_module, batch, normalizer):
infer = pl_module.infer(batch)

logits = pl_module.regression_head(infer["cls_feats"]).squeeze(-1) # [B]
labels = torch.FloatTensor(batch["target"]).to(logits.device) # [B]
assert len(labels.shape) == 1
logits = pl_module.regression_head(infer["cls_feats"]) # [B, n_targets]
labels = torch.FloatTensor(batch["target"]).to(
logits.device
) # [B] or [B, n_targets]

# normalize encode if config["mean"] and config["std], else pass
logits = logits.squeeze(-1)
labels = normalizer.encode(labels)
loss = F.mse_loss(logits, labels)

Expand Down
13 changes: 7 additions & 6 deletions moftransformer/run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# MOFTransformer version 2.1.0
# MOFTransformer version 2.2.0
import sys
import os
import copy
Expand Down Expand Up @@ -261,7 +261,7 @@ def main(_config):

if _IS_INTERACTIVE:
strategy = None
elif pl.__version__ >= '2.0.0':
elif pl.__version__ >= "2.0.0":
strategy = "ddp_find_unused_parameters_true"
else:
strategy = "ddp"
Expand All @@ -287,9 +287,10 @@ def main(_config):

if not _config["test_only"]:
trainer.fit(model, datamodule=dm, ckpt_path=_config["resume_from"])
log_dir = Path(logger.log_dir)/'checkpoints'
if best_model:= next(log_dir.glob('epoch=*.ckpt')):
shutil.copy(best_model, log_dir/'best.ckpt')

trainer.test(model, datamodule=dm, ckpt_path="best")
log_dir = Path(logger.log_dir) / "checkpoints"
if best_model := next(log_dir.glob("epoch=*.ckpt")):
shutil.copy(best_model, log_dir / "best.ckpt")

else:
trainer.test(model, datamodule=dm)
8 changes: 5 additions & 3 deletions moftransformer/utils/prepare_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Version 2.2.0
import os
import math
import logging
Expand Down Expand Up @@ -36,7 +37,7 @@ def get_logger(filename):
formatter = logging.Formatter(
fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)

file_handler = logging.FileHandler(filename)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
Expand Down Expand Up @@ -290,7 +291,9 @@ def _split_dataset(root_dataset: Path, **kwargs):
def _split_json(root_cifs: Path, root_dataset: Path, downstream: str):
with open(str(root_cifs / f"raw_{downstream}.json")) as f:
src = json.load(f)
src = {i.replace(".cif", ""):v for i, v in src.items()} # if *.cif in JSON files
src = {
i.replace(".cif", ""): v for i, v in src.items()
} # if *.cif in JSON files

for split in ["train", "test", "val"]:
cif_folder = root_dataset / split
Expand Down Expand Up @@ -368,7 +371,6 @@ def make_prepared_data(

# 1. get crystal graph
atoms = _make_supercell(atoms, cutoff=8) # radius = 8

if max_num_atoms and len(atoms) > max_num_atoms:
logger.error(
f"{cif_id} failed : number of atoms are larger than `max_num_atoms` ({max_num_atoms})"
Expand Down
Loading

0 comments on commit 87068fd

Please sign in to comment.