Skip to content

Commit

Permalink
optimize order parameter evaluation, allow hamiltonians to be evaluat…
Browse files Browse the repository at this point in the history
…ed on single states
  • Loading branch information
svandenhaute committed Jun 6, 2024
1 parent c0df3a5 commit 9681561
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 34 deletions.
9 changes: 6 additions & 3 deletions psiflow/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def _batch_frames(
@join_app
@typeguard.typechecked
def batch_apply(
func: Callable,
funcs: list[Callable],
batch_size: int,
length: int,
inputs: list = [],
Expand All @@ -821,6 +821,9 @@ def batch_apply(
nbatches = math.ceil(length / batch_size)
batches = [psiflow.context().new_file("data_", ".xyz") for _ in range(nbatches)]
future = batch_frames(batch_size, inputs=[inputs[0]], outputs=batches)
evaluated = [func(Dataset(None, extxyz=e)) for e in future.outputs]
f = join_frames(inputs=[e.extxyz for e in evaluated], outputs=[outputs[0]])
datasets = [Dataset(None, extxyz=e) for e in future.outputs]
for func in funcs:
datasets = [func(d) for d in datasets]
# evaluated = [func(Dataset(None, extxyz=e)) for e in future.outputs]
f = join_frames(inputs=[d.extxyz for d in datasets], outputs=[outputs[0]])
return f
9 changes: 5 additions & 4 deletions psiflow/hamiltonians/_plumed.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ def try_manual_plumed_linking() -> str:
def remove_comments_printflush(plumed_input: str) -> str:
new_input = []
for line in list(plumed_input.split("\n")):
if line.strip().startswith("#"):
pre_comment = line.strip().split("#")[0].strip()
if len(pre_comment) == 0:
continue
if line.strip().startswith("PRINT"):
if pre_comment.startswith("PRINT"):
continue
if line.strip().startswith("FLUSH"):
if pre_comment.startswith("FLUSH"):
continue
new_input.append(line)
new_input.append(pre_comment)
return "\n".join(new_input)


Expand Down
53 changes: 37 additions & 16 deletions psiflow/hamiltonians/hamiltonian.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations # necessary for type-guarding class methods

import logging
from typing import Callable, Optional
from typing import Callable, Optional, Union

import typeguard
from parsl.app.app import python_app
from parsl.app.futures import DataFuture
from parsl.dataflow.futures import AppFuture
from parsl.data_provider.files import File

import psiflow
Expand All @@ -22,16 +23,20 @@ def evaluate_function(
outputs: list = [],
parsl_resource_specification: dict = {},
**parameters, # dict values can be futures, so app must wait for those
) -> None:
) -> Optional[Geometry]:
import numpy as np
from ase import Atoms

from psiflow.data import _read_frames, _write_frames
from psiflow.geometry import NullState

assert len(inputs) >= 1
assert len(outputs) == 1
states = _read_frames(inputs=[inputs[0]])
if isinstance(inputs[0], Geometry):
assert len(outputs) == 0
states = [inputs[0]]
else:
assert len(outputs) == 1
states = _read_frames(inputs=[inputs[0]])
calculators, index_mapping = load_calculators(states, inputs[1], **parameters)
for i, state in enumerate(states):
if state == NullState:
Expand All @@ -54,25 +59,41 @@ def evaluate_function(
print(e)
stress = np.zeros((3, 3))
state.stress = stress
_write_frames(*states, outputs=[outputs[0]])
if isinstance(inputs[0], Geometry):
return states[0]
else:
_write_frames(*states, outputs=[outputs[0]])


@typeguard.typechecked
@psiflow.serializable # otherwise MixtureHamiltonian.hamiltonians is not serialized
class Hamiltonian:
external: Optional[psiflow._DataFuture]

def evaluate(self, dataset: Dataset, batch_size: Optional[int] = 100) -> Dataset:
future = batch_apply(
self.single_evaluate,
batch_size,
dataset.length(),
inputs=[dataset.extxyz],
outputs=[
psiflow.context().new_file("data_", ".xyz")
], # join_app needs outputs kwarg here!
)
return Dataset(None, future.outputs[0])
def evaluate(
self,
arg: Union[Dataset, Geometry, AppFuture[Geometry]],
batch_size: Optional[int] = 100,
) -> Union[AppFuture, Dataset]:
if isinstance(arg, Dataset):
future = batch_apply(
[self.single_evaluate],
batch_size,
arg.length(),
inputs=[arg.extxyz],
outputs=[
psiflow.context().new_file("data_", ".xyz")
], # join_app needs outputs kwarg here!
)
return Dataset(None, future.outputs[0])
else:
future = self.evaluate_app(
self.load_calculators,
inputs=[arg, self.external],
outputs=[],
**self.parameters,
)
return future

# mostly for internal use
def single_evaluate(self, dataset: Dataset) -> Dataset:
Expand Down
63 changes: 54 additions & 9 deletions psiflow/sampling/order.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
from __future__ import annotations # necessary for type-guarding class methods

from typing import Union
from functools import partial
from typing import Union, Optional

import typeguard
from ase.units import kJ, mol
from parsl.app.app import python_app
from parsl.dataflow.futures import AppFuture

import psiflow
from psiflow.data import Dataset
from psiflow.data import Dataset, batch_apply
from psiflow.geometry import Geometry
from psiflow.hamiltonians._plumed import PlumedHamiltonian
from psiflow.hamiltonians.hamiltonian import Hamiltonian


def _insert_in_state(
@typeguard.typechecked
def insert_in_state(
state: Geometry,
name: str,
) -> Geometry:
Expand All @@ -24,7 +26,32 @@ def _insert_in_state(
return state


insert_in_state = python_app(_insert_in_state, executors=["default_threads"])
@typeguard.typechecked
def _insert(
state_or_states: Union[Geometry, list[Geometry]],
name: str,
) -> Union[list[Geometry], Geometry]:
if not isinstance(state_or_states, list):
return insert_in_state(state_or_states, name)
else:
for state in state_or_states:
insert_in_state(state, name) # modify list in place
return state_or_states


insert = python_app(_insert, executors=["default_threads"])


@typeguard.typechecked
def insert_in_dataset(
data: Dataset,
name: str,
) -> Dataset:
geometries = insert(
data.geometries(),
name,
)
return Dataset(geometries)


@typeguard.typechecked
Expand All @@ -51,11 +78,29 @@ def __init__(self, name: str, hamiltonian: Hamiltonian):
super().__init__(name)
self.hamiltonian = hamiltonian

def evaluate(self, state: Union[Geometry, AppFuture]) -> AppFuture:
return insert_in_state(
self.hamiltonian.evaluate(Dataset([state]))[0],
self.name,
)
def evaluate(
self,
arg: Union[Dataset, Geometry, AppFuture[Geometry]],
batch_size: Optional[int] = 100,
) -> Union[Dataset, AppFuture]:
if isinstance(arg, Dataset):
# avoid batching the dataset twice:
# apply hamiltonian in batched sense and put insert afterwards
funcs = [
self.hamiltonian.single_evaluate,
partial(insert_in_dataset, name=self.name),
]
future = batch_apply(
funcs,
batch_size,
arg.length(),
inputs=[arg.extxyz],
outputs=[psiflow.context().new_file("data_", ".xyz")],
)
return Dataset(None, future.outputs[0])
else:
state = self.hamiltonian.evaluate(arg)
return insert(state, self.name)

def __eq__(self, other):
if type(other) is not HamiltonianOrderParameter:
Expand Down
1 change: 1 addition & 0 deletions psiflow/sampling/walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def quench(walkers: list[Walker], dataset: Dataset) -> None:
coefficients = []
for walker in walkers:
c = all_hamiltonians.get_coefficients(1.0 * walker.hamiltonian)
assert c is not None
coefficients.append(c)
coefficients = np.array(coefficients)

Expand Down
8 changes: 6 additions & 2 deletions tests/test_hamiltonian.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def test_get_filename_hills():
RESTART
UNITS LENGTH=A ENERGY=kj/mol TIME=fs
CV: VOLUME
CV0: CV
CV0: CV #lkasdjf
METAD ARG=CV0 SIGMA=100 HEIGHT=2 PACE=50 LABEL=metad FILE=test_hills sdld
METADD ARG=CV SIGMA=100 HEIGHT=2 PACE=50 LABEL=metad sdld
METADD ARG=CV SIGMA=100 HEIGHT=2 PACE=50 LABEL=metad sdld #fjalsdkfj
PRINT ARG=CV,metad.bias STRIDE=10 FILE=COLVAR
FLUSH STRIDE=10
"""
Expand Down Expand Up @@ -62,6 +62,10 @@ def test_einstein(dataset, dataset_h2):
for i in range(1, 10):
assert evaluated[i].result().energy > 0.0
assert not np.allclose(evaluated[i].result().stress, 0.0)
assert np.allclose(
evaluated[i].result().energy,
hamiltonian.evaluate(evaluated[i]).result().energy,
)

# test evaluation with NullState in data
data = hamiltonian.evaluate(dataset[:5] + Dataset([NullState]) + dataset[5:10])
Expand Down
7 changes: 7 additions & 0 deletions tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,13 @@ def test_order_parameter(dataset):
assert state.energy is None
assert np.allclose(CV, np.linalg.det(dataset[3].result().cell))

# test batch evaluation of order parameter
data = order.evaluate(dataset[:10], batch_size=5)
volumes = data.get("CV").result()
for i in range(10):
volume = np.linalg.det(dataset[i].result().cell)
assert np.allclose(volume, volumes[i])


def test_walker_serialization(dataset, tmp_path):
einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
Expand Down

0 comments on commit 9681561

Please sign in to comment.