diff --git a/overview/index.html b/overview/index.html index d9c70f2..dcc0d94 100644 --- a/overview/index.html +++ b/overview/index.html @@ -331,6 +331,13 @@ CP2K + + +
NaN
or ValueError
at any point throughout the propagation),
the walker will reset its internal state to the starting configuration
in order to make sure that subsequent propagations again start from a physically
@@ -748,20 +782,18 @@ float32
) of model evaluation.
In the vast majority of molecular dynamics simulations of realistic systems,
it is beneficial to modify the equilibrium Boltzmann distribution with bias potentials
or advanced sampling schemes as to increase the sampling efficiency and reduce
redundancy within the trajectory.
-The PLUMED library provides the user with various choices of enhanced sampling
-techniques; the user specifies the input parameters in a PLUMED input file
-and passes it into a molecular dynamics engine (e.g. OpenMM, GROMACS, or LAMMPS).
-Similarly, in psiflow, the contents of the PLUMED input file can be directly
-converted into a PlumedBias
instance in order to apply PLUMED's enhanced
-sampling magic to dynamic simulations or evaluate collective variables (and
-bias energy) across a dataset of atomic configurations.
In the following example, we define the PLUMED input as a multi-line string in Python. We consider the particular case of applying a metadynamics bias to a collective variable - in this case the unit cell volume. @@ -783,8 +815,8 @@
PlumedBias
objects
on Dataset
instances using the bias.evaluate()
method.
-The returned object is a Parsl Future
that represents an ndarray
of shape (nstates, 2)
.
+The returned object is a Parsl Future
that represents an ndarray
of shape (nstates, ncolvars + 1)
.
The first column represents the value of the collective variable for each state,
and the second column contains the bias energy.
values = bias.evaluate(data_train, variable='CV') # compute the collective variable 'CV' and bias energy
@@ -812,11 +844,11 @@ Bias potentials and enhanced samp
RESTRAINT ARG=CV AT=150 KAPPA=1 LABEL=restraint
"""
walker = BiasedDynamicWalker(data_train[0], bias=PlumedBias(plumed_input)) # walker with harmonic bias
-state = walker.propagate(model=model)
+state = walker.propagate(model=model).state
# change bias center and width
walker.bias.adjust_restraint(variable='CV', kappa=2, center=200)
-state_ = walker.propagate(model)
+state_ = walker.propagate(model).state
# if the system had enough time to equilibrate with the bias, then the following should hold
assert state.result().get_volume() < state_.result().get_volume()
@@ -875,10 +907,14 @@ Level of theory
behavior.
Their location in the file system is kept track of using additional attributes
provided by the FlowAtoms
class:
-assert labeled.result().reference_status # True, because state is successfully evaluated
+assert labeled.result().reference_status # True, because state is successfully evaluated
print(labeled.result().reference_stdout) # e.g. ./psiflow_internal/000/task_logs/0000/cp2k_evaluate.stdout
print(labeled.result().reference_stderr) # e.g. ./psiflow_internal/000/task_logs/0000/cp2k_evaluate.stderr
+Reference instances provide a convenient interface of computing the absolute energy of an isolated atom:
+
CP2K
The CP2KReference
expects a traditional CP2K
input file
@@ -886,155 +922,132 @@
CP2K
it should only contain the FORCE_EVAL section.
Additional input files which define the basis sets, pseudopotentials, and
dispersion correction parameters have to be added to the calculator after initialization.
-from psiflow.reference import CP2KReference
-
-
-cp2k_input = with file('cp2k_input.txt', 'r') as f: f.read()
-reference = CP2KReference(cp2k_input)
-
-# register additional input files with the following mapping
-# if the corresponding keyword in the CP2K input file is X, use Y as key here:
-# X: BASIS_SET_FILE_NAME -> Y: basis_set
-# X: POTENTIAL_FILE_NAME -> Y: potential
-# X: PARAMETER_FILE_NAME -> Y: dftd3
-reference.add_file('basis_set', 'BASIS_MOLOPT_UZH')
-reference.add_file('potential', 'POTENTIAL_UZH')
-reference.add_file('dftd3', 'dftd3.dat')
-
-
-
+
+cp2k_input = with file('cp2k_input.txt', 'r') as f: f.read()
+reference = CP2KReference(cp2k_input)
+
+# register additional input files with the following mapping
+# if the corresponding keyword in the CP2K input file is X, use Y as key here:
+# X: BASIS_SET_FILE_NAME -> Y: basis_set
+# X: POTENTIAL_FILE_NAME -> Y: potential
+# X: PARAMETER_FILE_NAME -> Y: dftd3
+reference.add_file('basis_set', 'BASIS_MOLOPT_UZH')
+reference.add_file('potential', 'POTENTIAL_UZH')
+reference.add_file('dftd3', 'dftd3.dat')
+
+NWChem
+
For nonperiodic systems, psiflow provides an interface with NWChem,
+which implements a plethora of DFT and post-HF methods for both periodic and nonperiodic systems.
+The NWChemReference
class essentially wraps around the ASE calculator, and is similarly easy to use:
+
calculator_kwargs = {
+ 'basis': {e: '3-21g' for e in ['H', 'C', 'O', 'N']},
+ 'dft': {
+ 'xc': 'pw91lda',
+ 'mult': 1,
+ 'convergence': {
+ 'energy': 1e-6,
+ 'density': 1e-6,
+ 'gradient': 1e-6,
+ },
+ },
+ }
+reference = NWChemReference(**calculator_kwargs)
+
Learning algorithms
-The endgame of psiflow is to allow for seamless development and scalable
+
The endgame of psiflow is to allow for the seamless development and scalable
execution of online learning algorithms for interatomic
potentials.
-The BaseLearning
class provides an example interface based on which such
-algorithms may be implemented.
-Within the space of online learning, the most trivial approach is represented
+The BaseLearning
class provides an interface based on which such
+algorithms can be implemented, and it has the following characteristics:
+
+- an output folder: used for storing intermediate models, (labeled) datasets, walkers, and reported metrics.
+- dataset identifier: to facilitate logging and/or debugging of the active learning progress,
+each successfully labeled state is immediately given a unique identifier (an integer).
+This is necessary in order to keep track of which molecular dynamics log or DFT evaluation log
+belongs to which state, especially when data is shuffled in each iteration. The identifier is stored
+in the
info
dict of each of the FlowAtoms
instances, and is therefore also human-readable in the
+dataset XYZ files.
+- metrics: the
Metrics
helper class is used to compute and save various error metrics and
+other relevant diagnostics during online learning. Examples are per-element validation RMSEs
+or collective variables of the sampled data:
+dataset.log+------------+--------+--------+-------+----------+----------+----------+----------+-----------+
+| identifier | e_rmse | f_rmse | CV | f_rmse_H | f_rmse_C | f_rmse_N | f_rmse_I | f_rmse_Pb |
++------------+--------+--------+-------+----------+----------+----------+----------+-----------+
+| 0 | 0.23 | 32.15 | -4.54 | 23.82 | 47.04 | 37.72 | 27.97 | 46.47 |
+| 1 | 0.27 | 31.72 | -4.45 | 23.13 | 43.52 | 34.12 | 28.43 | 52.42 |
+| 2 | 0.45 | 33.60 | -4.49 | 27.02 | 44.40 | 40.34 | 27.77 | 48.51 |
+| 3 | 0.39 | 33.02 | -4.44 | 26.52 | 50.11 | 36.97 | 27.50 | 45.21 |
+| 4 | 0.36 | 31.75 | -4.47 | 25.15 | 41.36 | 37.35 | 27.10 | 47.16 |
+| 5 | 0.35 | 34.00 | -4.41 | 28.04 | 43.99 | 39.52 | 28.56 | 49.31 |
+...
+
+or the (a posteriori) error of individual walkers and other relevant information:
+walkers.log+--------------+---------+----------+--------+--------------+-------------+------------+-------+--------+-------------------------------------+
+| walker_index | counter | is_reset | f_rmse | disagreement | temperature | identifier | CV | e_rmse | stdout |
++--------------+---------+----------+--------+--------------+-------------+------------+-------+--------+-------------------------------------+
+| 0 | 1000 | False | 47.33 | None | 135.79 | 150 | -4.61 | 4.04 | task_7028_molecular_dynamics_openmm |
+| 1 | 1000 | False | 50.69 | None | 142.89 | 151 | -4.39 | 4.11 | task_7046_molecular_dynamics_openmm |
+| 2 | 1000 | False | 46.34 | None | 140.72 | 152 | -4.61 | 4.07 | task_7064_molecular_dynamics_openmm |
+| 3 | 1000 | False | 43.71 | None | 136.12 | 153 | -4.45 | 4.24 | task_7082_molecular_dynamics_openmm |
+...
+
+Although optional, it also provides a convenient
+Weights & Biases interface for easier navigation and interpretation of all of the metrics.
+learning.run()
: performs the actual active learning.
+- (optional) pretraining: pretraining is used to bootstrap active learning runs, in order to
+make the model familiar with bonds in the system and ensure that it doesn't go too crazy during
+sampling in the first few iterations. During pretraining, a minimal set of configurations is generated by applying
+random perturbations to the atomic positions and/or unit cell vectors (typically about 0.05 A in magnitude).
+These configurations are then evaluated using the provided
BaseReference
instance after which the obtained
+data is split into training and validation in order to pretrain the model.
+When learning.run()
is called, it decides whether or not to perform pretraining based on the state of
+the model as well as
+
+Sequential Learning
+Within the space of online learning, the most trivial approach is represented
using the SequentialLearning
class.
In sequential learning, the data generation (as performed by a set of walkers)
is interleaved with short model training steps as to update
the knowledge in the model with the states that were sampled by the walkers
and evaluated with the chosen reference level of theory.
Take a look at the following example:
-
from psiflow.learning import SequentialLearning
-
-
-data_train = Dataset.load('initial_train.xyz')
-data_valid = Dataset.load('initial_valid.xyz')
-
-walkers = DynamicWalker.multiply( # initializes 30 walkers, with different initial configuration and seed
- 30,
- data_train, # Dataset which provides initial configurations
- timestep=0.5,
- steps=400,
- step=50,
- start=0,
- temperature=600,
- pressure=0, # NPT
- force_threshold=30,
- initial_temperature=600,
- )
-
-learning = SequentialLearning( # implements sequential learning
- path_output=path_output, # folder in which consecutive models and data should be saved
- niterations=10, # number of (generate, train) iterations
- train_from_scratch=True, # whether to train with reinitialized weights in each iteration
- train_valid_split=0.9, # partitioning of generated states into training and validation
- )
-
-data_train, data_valid = learning.run(
- model=model, # initial model
- reference=reference, # reference level of theory
- walkers=walkers, # list of walkers
- )
-
-model.save(path_output) # save new model separately
-data_train.save('final_train.xyz') # save final training data
-data_valid.save('final_valid.xyz') # save final validation data
+from psiflow.learning import SequentialLearning
+
+
+data_train = Dataset.load('initial_train.xyz')
+data_valid = Dataset.load('initial_valid.xyz')
+
+walkers = DynamicWalker.multiply( # initializes 30 walkers, with different initial configuration and seed
+ 30,
+ data_train, # Dataset which provides initial configurations
+ timestep=0.5,
+ steps=400,
+ step=50,
+ start=0,
+ temperature=600,
+ pressure=0, # NPT
+ force_threshold=30,
+ initial_temperature=600,
+ )
+
+learning = SequentialLearning( # implements sequential learning
+ path_output=path_output, # folder in which consecutive models and data should be saved
+ niterations=10, # number of (generate, train) iterations
+ train_from_scratch=True, # whether to train with reinitialized weights in each iteration
+ train_valid_split=0.9, # partitioning of generated states into training and validation
+ )
+
+data_train, data_valid = learning.run(
+ model=model, # initial model
+ reference=reference, # reference level of theory
+ walkers=walkers, # list of walkers
+ )
+
+model.save(path_output) # save new model separately
+data_train.save('final_train.xyz') # save final training data
+data_valid.save('final_valid.xyz') # save final validation data
The learning.run()
method implements the actual online learning algorithm.
In this case, it will repeat the following
@@ -1054,6 +1067,8 @@ Learning algorithms
as the entire state of the walkers (i.e. start and stop configuration,
and state of the bias potentials if present).
Additional features are demonstrated in the Examples.
+Incremental Learning
+Committee Learning
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 1f5216c..afef8cf 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ