Skip to content

Commit

Permalink
benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
killiansheriff committed Mar 11, 2024
1 parent dcac265 commit c677240
Show file tree
Hide file tree
Showing 4 changed files with 386 additions and 0 deletions.
113 changes: 113 additions & 0 deletions benchmark/_frameworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
import pandas as pd
from simplex import Simplex

from chemicalmotifidentifier import (
BaseMonteCarloChemicalMotifIdentifier,
BaseSyntheticChemicalMotifIdentifier,
)

# Model used in https://arxiv.org/abs/2311.01545

INPUT_GDOWN_LINK = 'https://drive.google.com/drive/folders/1VxK5mPu8bveaqFSSYnxrsKfZrTW_qXOX?usp=sharing' # Folder with model weights, sample graphs etc. Permission needs to be anyone with the link.

class SyntheticChemicalMotifIdentifier(BaseSyntheticChemicalMotifIdentifier):
"""Just a class that re use the framework above but that matches the parameters of the first paper.
Args:
ECA_Synthetic (_type_): _description_
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.dataset_config["one_hot_dim"] = 3

def import_model_config(self):
lmax, layers, outlength, number = 1, 1, 100, 0 # 2,2,4,0

model_config = {
"out_feature_length": outlength,
"max_radius": 2.5,
"min_radius": 0,
"number_of_basis": 10,
"num_nodes": 12, # only used for feqtur enormalization, we don't really care set it to a constant so that we can have only 1 network for all the crystal structure
"num_neighbors": 11, # only used for feqtur enormalization, we don't really care set it to a constant so that we can have only 1 network for all the crystal structure
"layers": layers,
"lmax": lmax,
"net_number": number,
"irreps_node_attr": "3x0e",
# "model_load": f"/home/ksheriff/PAPERS/first_paper/02_1nn_synthetic/data/nets/net_{lmax}-{layers}-{outlength}_{number}.pt",
"model_load": "data/inputs_doi-10.48550-arXiv.2311.01545/net.pt",
"mul": 50, # 50
}
self.model_config = model_config

def set_up_generator_space(self, yhat, concs, nelement=3):
"""Setup the generator space.
Args:
yhat (np.array): rounded fingerprint of microstate having generating concentrations.
concs (np.array): concentrations associated with each yhat.
nelement (int, optional): number of atomic type allowed in your sysnthetic dataset. Defaults to 3.
Returns:
tupe (generators, generator_space, inverses) : (concentration generators, physically constrained embeding space for these generators, inverses for each microstates / in wich concentration class they belong wrt to concentration. )
"""
self.nelement = nelement
concs = concs[:, : self.nelement]
generators, inverses = np.unique(concs, axis=0, return_inverse=True)

self.simplex = Simplex(
n_dim=len(generators[0]) - 1, edge_length=1, nneigh=self.num_nodes
)
self.vertices = self.simplex.get_vertex_coordinates()

# comvert bary_coords to cartesian
cartesian_coords = self.simplex.barycenter_coods_2_cartesian(
self.vertices, concs
)

# last dim is structural information

structural_information = self.get_structural_information_PCA(
yhat, inverses, rounding_number=8
)

generator_space = np.hstack(
(cartesian_coords, structural_information.reshape(-1, 1))
)

return generators, generator_space, inverses


class MonteCarloChemicalMotifIdentifier(BaseMonteCarloChemicalMotifIdentifier):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.nelement = 3
self.dataset_config["one_hot_dim"] = 3

def import_synthetic(self):
"""Import chemical shell synthetic dataset pandas dataframe"""
self.df_synthetic = pd.read_pickle(
f"data/inputs_doi-10.48550-arXiv.2311.01545/df_{self.crystal_structure}.pkl"
)

def import_model_config(self):
lmax, layers, outlength, number = 1, 1, 100, 0 # 2,2,4,0

model_config = {
"out_feature_length": outlength,
"max_radius": 2.5,
"min_radius": 0,
"number_of_basis": 10,
"num_nodes": 12, # only used for feqtur enormalization, we don't really care set it to a constant so that we can have only 1 network for all the crystal structure
"num_neighbors": 11, # only used for feqtur enormalization, we don't really care set it to a constant so that we can have only 1 network for all the crystal structure
"layers": layers,
"lmax": lmax,
"net_number": number,
"irreps_node_attr": "3x0e",
# "model_load": f"/home/ksheriff/PAPERS/first_paper/02_1nn_synthetic/data/nets/net_{lmax}-{layers}-{outlength}_{number}.pt",
"model_load": "data/inputs_doi-10.48550-arXiv.2311.01545/net.pt",
"mul": 50, # 50
}
self.model_config = model_config
81 changes: 81 additions & 0 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import time

import matplotlib.pyplot as plt
from eca import ECA_MD_Thermal

plt.style.use("paper")


import os

import GenerateRandomSolution as grs
import numpy as np
from ase.build import bulk
from ovito.io import export_file
from ovito.io.ase import ase_to_ovito
from ovito.pipeline import Pipeline, StaticSource


def generate_random_system(crystal_structure: str, size: tuple):
os.makedirs("data/dumps/", exist_ok=True)

# Create a full Cu system
atoms = bulk(name="Cu", crystalstructure=crystal_structure, a=1, cubic=True).repeat(
size
)

data = ase_to_ovito(atoms)
pipeline = Pipeline(source=StaticSource(data=data))

# Apply the RSS modifier based on the above wanted concentration
pipeline.modifiers.append(
grs.GenerateRandomSolution(
only_selected=False,
concentrations=[1 / 3, 1 / 3, 1 / 3],
seed=np.random.randint(1000000),
)
)

data = pipeline.compute()

export_file(
data,
f"data/dumps/{crystal_structure}_{size}.dump",
"lammps/dump",
columns=[
"Particle Identifier",
"Particle Type",
"Position.X",
"Position.Y",
"Position.Z",
],
)


if __name__ == "__main__":
structure = "fcc"
nrange = np.arange(5, 60, 5)

for n in nrange:
generate_random_system(structure, size=(n, n, n))

dump_files = [f"data/dumps/fcc_{(n,n,n)}.dump" for n in nrange]
times = []

eca = ECA_MD_Thermal(crystal_structure=structure, rmsd_cutoff=0.05)
for i, dump_file in enumerate(dump_files):
t = time.perf_counter()
root = f"data/eca_id/dump_{i}/"
df = eca.predict(root=root, dump_file=dump_file)
kl = eca.get_kl(df)
df.to_pickle(root + "df_microstates.pkl")
times.append(time.perf_counter() - t)

fig, ax = plt.subplots()
ax.plot(nrange**3 * 4, times, "-o")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Number of atoms")
fig.savefig("number_of_atoms_vs_time.pdf")

# Deleting
os.system(f"rm -rf data/eca_id")
192 changes: 192 additions & 0 deletions examples/benchmark.ipynb

Large diffs are not rendered by default.

Binary file added examples/number_of_atoms_vs_time.pdf
Binary file not shown.

0 comments on commit c677240

Please sign in to comment.