Skip to content

Commit

Permalink
Merge pull request #259 from nlesc-nano/mol_anchor
Browse files Browse the repository at this point in the history
ENH: Add support for polyatomic core anchors
  • Loading branch information
BvB93 authored Sep 15, 2022
2 parents ad4892b + e5c4ded commit 189d46b
Show file tree
Hide file tree
Showing 20 changed files with 1,015 additions and 58 deletions.
2 changes: 1 addition & 1 deletion CAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""The **CAT** version."""

__version__ = '1.0.1.dev0'
__version__ = '1.1.0.dev0'
70 changes: 63 additions & 7 deletions CAT/attachment/core_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
import numpy as np
from scm.plams import Molecule, MoleculeError, to_rdmol

from .perp_surface import get_surface_vec
from .distribution import distribute_idx
from ..utils import AllignmentEnum, AllignmentTup, AnchorTup
from ..utils import AllignmentEnum, AllignmentTup, AnchorTup, KindEnum

if TYPE_CHECKING:
from numpy.typing import NDArray
from numpy import int64 as i8
from numpy import int64 as i8, float64 as f8

__all__ = ["set_core_anchors", "find_core_substructure"]

Expand All @@ -46,13 +47,17 @@ def set_core_anchors(
# Get the indices of all anchor atom ligand placeholders in the core
anchors = mol.properties.dummies
if anchors is None:
anchor_idx, remove_idx = find_core_substructure(mol, anchor_tup)
anchor_idx_group, remove_idx = find_core_substructure(mol, anchor_tup)
anchor_idx = anchor_idx_group[:, anchor_tup.group_idx[0]].copy()
else:
anchor_idx = np.fromiter(anchors, count=len(anchors), dtype=np.int64)
anchor_idx -= 1
remove_idx = anchor_idx.copy()
if subset_kwargs:
anchor_idx_group = anchor_idx.reshape(-1, 1).copy()
if subset_kwargs is not None:
anchor_idx_old = anchor_idx
anchor_idx = distribute_idx(mol, anchor_idx, **subset_kwargs)
anchor_idx_group = anchor_idx_group[np.isin(anchor_idx_old, anchor_idx)]
if not len(anchor_idx):
raise MoleculeError(f"No valid anchoring groups found in the core {formula!r}")

Expand All @@ -63,11 +68,20 @@ def set_core_anchors(

# Returns an error if no anchor atoms were found
if (len(mol) - len(anchor_idx)) < 4 and allignment_tup.kind == AllignmentEnum.SURFACE:
raise NotImplementedError(
raise ValueError(
'`optional.core.allignment = "surface"` is not supported for cores with less '
f'than 4 (non-anchor) atoms ({mol.get_formula()}); consider using '
'`optional.core.allignment = "sphere"`'
)
elif len(anchor_tup.mol.GetAtoms()) < 2 and allignment_tup.kind == AllignmentEnum.ANCHOR:
raise ValueError(
'`optional.core.allignment = "anchor"` is not supported for mono-atomic core anchors'
)

# Define all core vectors
mol.properties.core_vec = _get_core_vectors(
mol, anchor_idx_group, remove_idx, allignment_tup, anchor_tup
)

# Delete all core anchor atoms
if remove_idx is not None:
Expand All @@ -78,6 +92,48 @@ def set_core_anchors(
return formula, ' '.join(anchor_idx.astype(str))


def _get_core_vectors(
core: Molecule,
anchor_group_idx: "NDArray[i8]",
remove_idx: "None | NDArray[i8]",
allignment: AllignmentTup,
anchor_tup: AnchorTup,
) -> "NDArray[f8]":
"""Return a 2D array with all core (unit) vectors."""
core_ar = np.array(core)

# Put the (effective) coordinates of the anchors into an (n, 3) array
if anchor_tup.kind == KindEnum.FIRST:
anchor_atoms = core_ar[anchor_group_idx[:, anchor_tup.group_idx[0]]]
elif anchor_tup.kind == KindEnum.MEAN:
anchor_idx = anchor_group_idx[:, np.fromiter(anchor_tup.group_idx, np.int64)]
anchor_atoms = core_ar[anchor_idx].mean(axis=1)
else:
raise ValueError(f"Unknown anchor kind: {anchor_tup.kind!r}")

# Define vectors based on the various allignment options
if allignment.kind == AllignmentEnum.SPHERE:
vec = np.array(core.get_center_of_mass()) - anchor_atoms
vec /= np.linalg.norm(vec, axis=1)[..., None]
elif allignment.kind == AllignmentEnum.SURFACE:
if remove_idx is None:
vec = -get_surface_vec(core_ar, anchor_atoms)
else:
no_anchor_mask = np.ones(len(core_ar), dtype=np.bool_)
no_anchor_mask[remove_idx] = False
vec = -get_surface_vec(core_ar[no_anchor_mask], anchor_atoms)
elif allignment.kind == AllignmentEnum.ANCHOR:
anchor_mean = core_ar[anchor_group_idx].mean(axis=1)
vec = anchor_atoms - anchor_mean
vec /= np.linalg.norm(vec, axis=1)[..., None]
else:
raise ValueError(f"Unknown allignment kind: {allignment.kind!r}")

if allignment.invert:
vec *= -1
return vec


def find_core_substructure(
mol: Molecule,
anchor_tup: AnchorTup,
Expand Down Expand Up @@ -105,9 +161,9 @@ def find_core_substructure(

if remove is not None:
remove_list += [idx_tup[i] for i in remove]
anchor_list.append(anchor_idx_tup[0])
anchor_list.append(idx_tup)

anchor_array = np.fromiter(anchor_list, dtype=np.int64, count=len(anchor_list))
anchor_array = np.array(anchor_list, dtype=np.int64)
if remove is not None:
remove_array = np.fromiter(remove_list, dtype=np.int64, count=len(remove_list))
return anchor_array, remove_array
Expand Down
39 changes: 4 additions & 35 deletions CAT/attachment/ligand_attach.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"""

from typing import List, Any, Optional, NoReturn, Union, Iterable
from typing import List, Any, Optional, NoReturn, Union
from collections import abc

import numpy as np
Expand All @@ -45,13 +45,11 @@
from scm.plams import Molecule, Atom, Settings, MoleculeError
from assertionlib.ndrepr import aNDRepr

from .perp_surface import get_surface_vec
from ..mol_utils import get_index, round_coords # noqa: F401
from ..utils import AnchorTup, KindEnum
from ..workflows import WorkFlow, HDF5_INDEX, MOL, OPT
from ..settings_dataframe import SettingsDataFrame
from ..data_handling import mol_to_file, WARN_MAP
from ..utils import AllignmentTup, AllignmentEnum

__all__ = ['init_qd_construction']

Expand Down Expand Up @@ -111,7 +109,7 @@ def construct_mol_series(qd_df: SettingsDataFrame, core_df: pd.DataFrame,
def _get_mol(i, j, k, m):
ij = i, j
km = k, m
return ligand_to_qd(core_df.at[ij, MOL], ligand_df.at[km, MOL], path, allignment=allignment)
return ligand_to_qd(core_df.at[ij, MOL], ligand_df.at[km, MOL], path)

mol_list = [_get_mol(i, j, k, m) for i, j, k, m in qd_df.index]
return pd.Series(mol_list, index=qd_df.index, name=MOL, dtype=object)
Expand Down Expand Up @@ -154,13 +152,7 @@ def _get_df(core_index: pd.MultiIndex,
return SettingsDataFrame(data, index=index, columns=columns, settings=settings)


def ligand_to_qd(
core: Molecule,
ligand: Molecule,
path: str,
allignment: AllignmentTup,
idx_subset: "None | Iterable[int]" = None,
) -> Molecule:
def ligand_to_qd(core: Molecule, ligand: Molecule, path: str) -> Molecule:
"""Function that handles quantum dot (qd, *i.e.* core + all ligands) operations.
Combine the core and ligands and assign properties to the quantom dot.
Expand All @@ -173,19 +165,6 @@ def ligand_to_qd(
ligand : |plams.Molecule|_
A ligand molecule.
allignment : :class:`str`
How the core vector(s) should be defined.
Accepted values are ``"sphere"`` and ``"surface"``:
* ``"sphere"``: Vectors from the core anchor atoms to the center of the core.
* ``"surface"``: Vectors perpendicular to the surface of the core.
Note that for a perfect sphere both approaches are equivalent.
idx_subset : :class:`Iterable<collections.anc.Iterable>` [:class:`int`], optional
An iterable with the (0-based) indices defining a subset of atoms in **core**.
Only relevant in the construction of the convex hull when ``allignment=surface``.
Returns
-------
|plams.Molecule|_
Expand All @@ -199,7 +178,6 @@ def get_name() -> str:
return f'{core_name}__{anchor}_{lig_name}'

anchor_tup = ligand.properties.anchor_tup
idx_subset_ = idx_subset if idx_subset is not None else ...

# Define vectors and indices used for rotation and translation the ligands
vec1 = np.array([-1, 0, 0], dtype=float) # All ligands are already alligned along the X-axis
Expand All @@ -212,16 +190,7 @@ def get_name() -> str:
ligand.properties.dummies.properties.anchor = True

# Attach the rotated ligands to the core, returning the resulting strucutre (PLAMS Molecule).
anchor = sanitize_dim_2(core.properties.dummies)
if allignment.kind == AllignmentEnum.SPHERE:
vec2 = np.array(core.get_center_of_mass()) - anchor
vec2 /= np.linalg.norm(vec2, axis=1)[..., None]
elif allignment.kind == AllignmentEnum.SURFACE:
vec2 = -get_surface_vec(np.array(core)[idx_subset_], anchor)
else:
raise ValueError(f"Unknown allignment kind: {allignment.kind}")
if allignment.invert:
vec2 *= -1
vec2 = core.properties.core_vec

# Rotate the ligands
if anchor_tup.dihedral is None:
Expand Down
4 changes: 2 additions & 2 deletions CAT/data_handling/anchor_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def parse_anchors(
raise TypeError("`dihedral != None` is not supported for core anchors")
elif angle_offset is not None:
raise TypeError("`angle_offset != None` is not supported for core anchors")
elif kwargs["kind"] != KindEnum.FIRST:
raise NotImplementedError('`kind != "first"` is not yet supported')
elif kwargs["kind"] == KindEnum.MEAN_TRANSLATE:
raise ValueError('`kind == "mean translate"` is not supported for core anchors')
else:
# Check that at least 3 atoms are available for `angle_offset`
# (so a plane can be defined)
Expand Down
3 changes: 3 additions & 0 deletions CAT/data_handling/validation_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,13 @@ def _get_crsjob() -> type:
allignment_mapping = {
'sphere': AllignmentTup(AllignmentEnum.SPHERE, False),
'surface': AllignmentTup(AllignmentEnum.SURFACE, False),
'anchor': AllignmentTup(AllignmentEnum.ANCHOR, False),
'sphere_invert': AllignmentTup(AllignmentEnum.SPHERE, True),
'sphere invert': AllignmentTup(AllignmentEnum.SPHERE, True),
'surface_invert': AllignmentTup(AllignmentEnum.SURFACE, True),
'surface invert': AllignmentTup(AllignmentEnum.SURFACE, True),
'anchor_invert': AllignmentTup(AllignmentEnum.ANCHOR, True),
'anchor invert': AllignmentTup(AllignmentEnum.ANCHOR, True),
}

#: Schema for validating the ``['optional']['core']`` block.
Expand Down
4 changes: 1 addition & 3 deletions CAT/multi_ligand.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarra

coords = Molecule.as_array(None, atom_subset=atoms)
qd.properties.dummies = np.array(coords, ndmin=2, dtype=float)
qd = ligand_to_qd(qd, ligand, path=path,
allignment=allignment,
idx_subset=qd.properties.indices)
qd = ligand_to_qd(qd, ligand, path=path)
ret[j, i] = qd
for at in reversed(atoms):
qd.delete_atom(qd[at.id])
Expand Down
23 changes: 22 additions & 1 deletion CAT/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ class AllignmentEnum(enum.Enum):

SPHERE = 0
SURFACE = 1
ANCHOR = 2


class MultiAnchorEnum(enum.Enum):
Expand All @@ -582,16 +583,36 @@ class MultiAnchorEnum(enum.Enum):
class AnchorTup(NamedTuple):
"""A named tuple with anchoring operation instructions."""

#: The rdkit molecule representing the functional group.
mol: "None | Mol"

#: The string representation of the function group (usually SMILES).
group: "None | str" = None

#: The indices of the anchoring atom(s) in :attr:`AnchorTup.group`.
group_idx: Tuple[int, ...] = (0,)

#: The indices of the anchor inside a specific core or ligand.
anchor_idx: Tuple[int, ...] = ()
anchor_group_idx: Tuple[int, ...] = ()

#: The indices of the to-be removed atoms in :attr:`AnchorTup.group`.
remove: "None | Tuple[int, ...]" = None

#: How atoms are to-be attached when multiple anchor atoms are
#: specified in :attr:`AnchorTup.group_idx`.
kind: KindEnum = KindEnum.FIRST

#: Manually offset the angle of the ligand vector by a given number in radian.
angle_offset: "None | float" = None

#: Manually specify the ligands vector dihedral angle in radian,
#: rather than optimizing it w.r.t. the inter-ligand distance.
dihedral: "None | float" = None

#: The string format of :attr:`AnchorTup.group`.
group_format: FormatEnum = FormatEnum.SMILES

#: The to-be undertaken action when multiple functional groups are encountered.
multi_anchor_filter: MultiAnchorEnum = MultiAnchorEnum.ALL

def __repr__(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Change Log
All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.

1.0.1
1.1.0
*****
* *placeholder*.

Expand Down
4 changes: 4 additions & 0 deletions docs/4_optional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,14 @@ Core
The surface is herein defined by a convex hull constructed from the core.
* ``"sphere"``: Define the core vectors as those drawn from the core anchor
atoms to the cores center.
* ``"anchor"``: Define the core vectors based on the optimal vector of its anchors.
Only available in when the core contains molecular anchors, *e.g.* acetates.
* ``"surface invert"``/``"surface_invert"``: The same as ``"surface"``,
except the core vectors are inverted.
* ``"sphere invert"``/``"sphere_invert"``: The same as ``"sphere"``,
except the core vectors are inverted.
* ``"anchor invert"``/``"anchor_invert"``: The same as ``"anchor"``,
except the core vectors are inverted.

Note that for a spherical core both approaches are equivalent.

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ per-file-ignores =

[tool:pytest]
testpaths = CAT tests
addopts = --tb=short --doctest-glob='*.py' --doctest-glob='*.rst' --cov=CAT --cov-report xml --cov-report term --cov-report html --doctest-modules
addopts = --tb=short --doctest-glob='*.py' --doctest-glob='*.rst' --cov=CAT --cov-report xml --cov-report term --cov-report html --doctest-modules --pdbcls=IPython.terminal.debugger:TerminalPdb
markers = slow: A marker for slow tests.
filterwarnings =
ignore:Conversion of the second argument of issubdtype from `.` to `.` is deprecated:FutureWarning:h5py.*
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
'pytest-cov',
'pytest-mock',
'h5py>=2.7.0',
'ipython>=5.0.0',
'sphinx>=2.4; python_version>="3.7"',
'sphinx_rtd_theme; python_version>="3.7"',
'data-CAT>=0.7.0; python_version>="3.7"',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_CAT.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_cat_fail() -> None:
arg.optional.qd.optimize = False
del arg.input_cores[0]["Cd68Se55.xyz"]
arg.input_cores[0]["CdCl.xyz"] = {"indices": [2]}
with pytest.raises(NotImplementedError):
with pytest.raises(ValueError):
prep(arg)


Expand Down
31 changes: 29 additions & 2 deletions tests/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,25 @@

from pathlib import Path

import yaml
import pytest
import numpy as np

from scm.plams import Molecule
from scm.plams import Molecule, Settings, readpdb
from assertionlib import assertion
from nanoutils import delete_finally

from CAT.test_utils import assert_mol_allclose
from CAT.base import prep
from CAT.workflows import MOL as MOL_KEY
from CAT.attachment.distribution import distribute_idx

PATH = Path('tests') / 'test_files'
LIG_PATH = PATH / 'ligand'
QD_PATH = PATH / 'qd'
DB_PATH = PATH / 'database'

MOL = Molecule(PATH / 'core' / 'Cd68Se55.xyz')

IDX = np.array([i for i, at in enumerate(MOL) if at.symbol == 'Cl'])
IDX.setflags(write=False)

Expand Down Expand Up @@ -65,3 +75,20 @@ def test_distribute_idx() -> None:
assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness='bob', exception=TypeError)
assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness=-10, exception=ValueError)
assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness=1.5, exception=ValueError)


@pytest.mark.slow
@delete_finally(LIG_PATH, QD_PATH, DB_PATH)
def test_cat() -> None:
"""Tests for the CAT package."""
yaml_path = PATH / 'CAT_subset.yaml'
with open(yaml_path, 'r') as f:
arg = Settings(yaml.load(f, Loader=yaml.FullLoader))

arg.path = PATH
qd_df, _, _ = prep(arg)

assertion.len_eq(qd_df, 1)
qd = qd_df[MOL_KEY].iloc[0]
qd_ref = readpdb(PATH / "qd_test_distribution.pdb")
assert_mol_allclose(qd, qd_ref)
Loading

0 comments on commit 189d46b

Please sign in to comment.