Merge pull request #259 from nlesc-nano/mol_anchor

ENH: Add support for polyatomic core anchors
nlesc-nano · Sep 15, 2022 · 189d46b · 189d46b
2 parents ad4892b + e5c4ded
commit 189d46b
Show file tree

Hide file tree

Showing 20 changed files with 1,015 additions and 58 deletions.
diff --git a/CAT/__version__.py b/CAT/__version__.py
@@ -1,3 +1,3 @@
 """The **CAT** version."""
 
-__version__ = '1.0.1.dev0'
+__version__ = '1.1.0.dev0'
diff --git a/CAT/attachment/core_anchoring.py b/CAT/attachment/core_anchoring.py
@@ -19,12 +19,13 @@
 import numpy as np
 from scm.plams import Molecule, MoleculeError, to_rdmol
 
+from .perp_surface import get_surface_vec
 from .distribution import distribute_idx
-from ..utils import AllignmentEnum, AllignmentTup, AnchorTup
+from ..utils import AllignmentEnum, AllignmentTup, AnchorTup, KindEnum
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
-    from numpy import int64 as i8
+    from numpy import int64 as i8, float64 as f8
 
 __all__ = ["set_core_anchors", "find_core_substructure"]
 
@@ -46,13 +47,17 @@ def set_core_anchors(
     # Get the indices of all anchor atom ligand placeholders in the core
     anchors = mol.properties.dummies
     if anchors is None:
-        anchor_idx, remove_idx = find_core_substructure(mol, anchor_tup)
+        anchor_idx_group, remove_idx = find_core_substructure(mol, anchor_tup)
+        anchor_idx = anchor_idx_group[:, anchor_tup.group_idx[0]].copy()
     else:
         anchor_idx = np.fromiter(anchors, count=len(anchors), dtype=np.int64)
         anchor_idx -= 1
         remove_idx = anchor_idx.copy()
-    if subset_kwargs:
+        anchor_idx_group = anchor_idx.reshape(-1, 1).copy()
+    if subset_kwargs is not None:
+        anchor_idx_old = anchor_idx
         anchor_idx = distribute_idx(mol, anchor_idx, **subset_kwargs)
+        anchor_idx_group = anchor_idx_group[np.isin(anchor_idx_old, anchor_idx)]
     if not len(anchor_idx):
         raise MoleculeError(f"No valid anchoring groups found in the core {formula!r}")
 
@@ -63,11 +68,20 @@ def set_core_anchors(
 
     # Returns an error if no anchor atoms were found
     if (len(mol) - len(anchor_idx)) < 4 and allignment_tup.kind == AllignmentEnum.SURFACE:
-        raise NotImplementedError(
+        raise ValueError(
             '`optional.core.allignment = "surface"` is not supported for cores with less '
             f'than 4 (non-anchor) atoms ({mol.get_formula()}); consider using '
             '`optional.core.allignment = "sphere"`'
         )
+    elif len(anchor_tup.mol.GetAtoms()) < 2 and allignment_tup.kind == AllignmentEnum.ANCHOR:
+        raise ValueError(
+            '`optional.core.allignment = "anchor"` is not supported for mono-atomic core anchors'
+        )
+
+    # Define all core vectors
+    mol.properties.core_vec = _get_core_vectors(
+        mol, anchor_idx_group, remove_idx, allignment_tup, anchor_tup
+    )
 
     # Delete all core anchor atoms
     if remove_idx is not None:
@@ -78,6 +92,48 @@ def set_core_anchors(
     return formula, ' '.join(anchor_idx.astype(str))
 
 
+def _get_core_vectors(
+    core: Molecule,
+    anchor_group_idx: "NDArray[i8]",
+    remove_idx: "None | NDArray[i8]",
+    allignment: AllignmentTup,
+    anchor_tup: AnchorTup,
+) -> "NDArray[f8]":
+    """Return a 2D array with all core (unit) vectors."""
+    core_ar = np.array(core)
+
+    # Put the (effective) coordinates of the anchors into an (n, 3) array
+    if anchor_tup.kind == KindEnum.FIRST:
+        anchor_atoms = core_ar[anchor_group_idx[:, anchor_tup.group_idx[0]]]
+    elif anchor_tup.kind == KindEnum.MEAN:
+        anchor_idx = anchor_group_idx[:, np.fromiter(anchor_tup.group_idx, np.int64)]
+        anchor_atoms = core_ar[anchor_idx].mean(axis=1)
+    else:
+        raise ValueError(f"Unknown anchor kind: {anchor_tup.kind!r}")
+
+    # Define vectors based on the various allignment options
+    if allignment.kind == AllignmentEnum.SPHERE:
+        vec = np.array(core.get_center_of_mass()) - anchor_atoms
+        vec /= np.linalg.norm(vec, axis=1)[..., None]
+    elif allignment.kind == AllignmentEnum.SURFACE:
+        if remove_idx is None:
+            vec = -get_surface_vec(core_ar, anchor_atoms)
+        else:
+            no_anchor_mask = np.ones(len(core_ar), dtype=np.bool_)
+            no_anchor_mask[remove_idx] = False
+            vec = -get_surface_vec(core_ar[no_anchor_mask], anchor_atoms)
+    elif allignment.kind == AllignmentEnum.ANCHOR:
+        anchor_mean = core_ar[anchor_group_idx].mean(axis=1)
+        vec = anchor_atoms - anchor_mean
+        vec /= np.linalg.norm(vec, axis=1)[..., None]
+    else:
+        raise ValueError(f"Unknown allignment kind: {allignment.kind!r}")
+
+    if allignment.invert:
+        vec *= -1
+    return vec
+
+
 def find_core_substructure(
     mol: Molecule,
     anchor_tup: AnchorTup,
@@ -105,9 +161,9 @@ def find_core_substructure(
 
         if remove is not None:
             remove_list += [idx_tup[i] for i in remove]
-        anchor_list.append(anchor_idx_tup[0])
+        anchor_list.append(idx_tup)
 
-    anchor_array = np.fromiter(anchor_list, dtype=np.int64, count=len(anchor_list))
+    anchor_array = np.array(anchor_list, dtype=np.int64)
     if remove is not None:
         remove_array = np.fromiter(remove_list, dtype=np.int64, count=len(remove_list))
         return anchor_array, remove_array

diff --git a/CAT/attachment/ligand_attach.py b/CAT/attachment/ligand_attach.py
@@ -34,7 +34,7 @@
 
 """
 
-from typing import List, Any, Optional, NoReturn, Union, Iterable
+from typing import List, Any, Optional, NoReturn, Union
 from collections import abc
 
 import numpy as np
@@ -45,13 +45,11 @@
 from scm.plams import Molecule, Atom, Settings, MoleculeError
 from assertionlib.ndrepr import aNDRepr
 
-from .perp_surface import get_surface_vec
 from ..mol_utils import get_index, round_coords  # noqa: F401
 from ..utils import AnchorTup, KindEnum
 from ..workflows import WorkFlow, HDF5_INDEX, MOL, OPT
 from ..settings_dataframe import SettingsDataFrame
 from ..data_handling import mol_to_file, WARN_MAP
-from ..utils import AllignmentTup, AllignmentEnum
 
 __all__ = ['init_qd_construction']
 
@@ -111,7 +109,7 @@ def construct_mol_series(qd_df: SettingsDataFrame, core_df: pd.DataFrame,
     def _get_mol(i, j, k, m):
         ij = i, j
         km = k, m
-        return ligand_to_qd(core_df.at[ij, MOL], ligand_df.at[km, MOL], path, allignment=allignment)
+        return ligand_to_qd(core_df.at[ij, MOL], ligand_df.at[km, MOL], path)
 
     mol_list = [_get_mol(i, j, k, m) for i, j, k, m in qd_df.index]
     return pd.Series(mol_list, index=qd_df.index, name=MOL, dtype=object)
@@ -154,13 +152,7 @@ def _get_df(core_index: pd.MultiIndex,
     return SettingsDataFrame(data, index=index, columns=columns, settings=settings)
 
 
-def ligand_to_qd(
-    core: Molecule,
-    ligand: Molecule,
-    path: str,
-    allignment: AllignmentTup,
-    idx_subset: "None | Iterable[int]" = None,
-) -> Molecule:
+def ligand_to_qd(core: Molecule, ligand: Molecule, path: str) -> Molecule:
     """Function that handles quantum dot (qd, *i.e.* core + all ligands) operations.
 
     Combine the core and ligands and assign properties to the quantom dot.
@@ -173,19 +165,6 @@ def ligand_to_qd(
     ligand : |plams.Molecule|_
         A ligand molecule.
 
-    allignment : :class:`str`
-        How the core vector(s) should be defined.
-        Accepted values are ``"sphere"`` and ``"surface"``:
-
-        * ``"sphere"``: Vectors from the core anchor atoms to the center of the core.
-        * ``"surface"``: Vectors perpendicular to the surface of the core.
-
-        Note that for a perfect sphere both approaches are equivalent.
-
-    idx_subset : :class:`Iterable<collections.anc.Iterable>` [:class:`int`], optional
-        An iterable with the (0-based) indices defining a subset of atoms in **core**.
-        Only relevant in the construction of the convex hull when ``allignment=surface``.
-
     Returns
     -------
     |plams.Molecule|_
@@ -199,7 +178,6 @@ def get_name() -> str:
         return f'{core_name}__{anchor}_{lig_name}'
 
     anchor_tup = ligand.properties.anchor_tup
-    idx_subset_ = idx_subset if idx_subset is not None else ...
 
     # Define vectors and indices used for rotation and translation the ligands
     vec1 = np.array([-1, 0, 0], dtype=float)  # All ligands are already alligned along the X-axis
@@ -212,16 +190,7 @@ def get_name() -> str:
     ligand.properties.dummies.properties.anchor = True
 
     # Attach the rotated ligands to the core, returning the resulting strucutre (PLAMS Molecule).
-    anchor = sanitize_dim_2(core.properties.dummies)
-    if allignment.kind == AllignmentEnum.SPHERE:
-        vec2 = np.array(core.get_center_of_mass()) - anchor
-        vec2 /= np.linalg.norm(vec2, axis=1)[..., None]
-    elif allignment.kind == AllignmentEnum.SURFACE:
-        vec2 = -get_surface_vec(np.array(core)[idx_subset_], anchor)
-    else:
-        raise ValueError(f"Unknown allignment kind: {allignment.kind}")
-    if allignment.invert:
-        vec2 *= -1
+    vec2 = core.properties.core_vec
 
     # Rotate the ligands
     if anchor_tup.dihedral is None:

diff --git a/CAT/data_handling/anchor_parsing.py b/CAT/data_handling/anchor_parsing.py
@@ -219,8 +219,8 @@ def parse_anchors(
                     raise TypeError("`dihedral != None` is not supported for core anchors")
                 elif angle_offset is not None:
                     raise TypeError("`angle_offset != None` is not supported for core anchors")
-                elif kwargs["kind"] != KindEnum.FIRST:
-                    raise NotImplementedError('`kind != "first"` is not yet supported')
+                elif kwargs["kind"] == KindEnum.MEAN_TRANSLATE:
+                    raise ValueError('`kind == "mean translate"` is not supported for core anchors')
             else:
                 # Check that at least 3 atoms are available for `angle_offset`
                 # (so a plane can be defined)

diff --git a/CAT/data_handling/validation_schemas.py b/CAT/data_handling/validation_schemas.py
@@ -291,10 +291,13 @@ def _get_crsjob() -> type:
 allignment_mapping = {
     'sphere': AllignmentTup(AllignmentEnum.SPHERE, False),
     'surface': AllignmentTup(AllignmentEnum.SURFACE, False),
+    'anchor': AllignmentTup(AllignmentEnum.ANCHOR, False),
     'sphere_invert': AllignmentTup(AllignmentEnum.SPHERE, True),
     'sphere invert': AllignmentTup(AllignmentEnum.SPHERE, True),
     'surface_invert': AllignmentTup(AllignmentEnum.SURFACE, True),
     'surface invert': AllignmentTup(AllignmentEnum.SURFACE, True),
+    'anchor_invert': AllignmentTup(AllignmentEnum.ANCHOR, True),
+    'anchor invert': AllignmentTup(AllignmentEnum.ANCHOR, True),
 }
 
 #: Schema for validating the ``['optional']['core']`` block.

diff --git a/CAT/multi_ligand.py b/CAT/multi_ligand.py
@@ -99,9 +99,7 @@ def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarra
 
             coords = Molecule.as_array(None, atom_subset=atoms)
             qd.properties.dummies = np.array(coords, ndmin=2, dtype=float)
-            qd = ligand_to_qd(qd, ligand, path=path,
-                              allignment=allignment,
-                              idx_subset=qd.properties.indices)
+            qd = ligand_to_qd(qd, ligand, path=path)
             ret[j, i] = qd
             for at in reversed(atoms):
                 qd.delete_atom(qd[at.id])

diff --git a/CAT/utils.py b/CAT/utils.py
@@ -569,6 +569,7 @@ class AllignmentEnum(enum.Enum):
 
     SPHERE = 0
     SURFACE = 1
+    ANCHOR = 2
 
 
 class MultiAnchorEnum(enum.Enum):
@@ -582,16 +583,36 @@ class MultiAnchorEnum(enum.Enum):
 class AnchorTup(NamedTuple):
     """A named tuple with anchoring operation instructions."""
 
+    #: The rdkit molecule representing the functional group.
     mol: "None | Mol"
+
+    #: The string representation of the function group (usually SMILES).
     group: "None | str" = None
+
+    #: The indices of the anchoring atom(s) in :attr:`AnchorTup.group`.
     group_idx: Tuple[int, ...] = (0,)
+
+    #: The indices of the anchor inside a specific core or ligand.
     anchor_idx: Tuple[int, ...] = ()
-    anchor_group_idx: Tuple[int, ...] = ()
+
+    #: The indices of the to-be removed atoms in :attr:`AnchorTup.group`.
     remove: "None | Tuple[int, ...]" = None
+
+    #: How atoms are to-be attached when multiple anchor atoms are
+    #: specified in :attr:`AnchorTup.group_idx`.
     kind: KindEnum = KindEnum.FIRST
+
+    #: Manually offset the angle of the ligand vector by a given number in radian.
     angle_offset: "None | float" = None
+
+    #: Manually specify the ligands vector dihedral angle in radian,
+    #: rather than optimizing it w.r.t. the inter-ligand distance.
     dihedral: "None | float" = None
+
+    #: The string format of :attr:`AnchorTup.group`.
     group_format: FormatEnum = FormatEnum.SMILES
+
+    #: The to-be undertaken action when multiple functional groups are encountered.
     multi_anchor_filter: MultiAnchorEnum = MultiAnchorEnum.ALL
 
     def __repr__(self) -> str:

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,7 +5,7 @@ Change Log
 All notable changes to this project will be documented in this file.
 This project adheres to `Semantic Versioning <http://semver.org/>`_.
 
-1.0.1
+1.1.0
 *****
 * *placeholder*.
 

diff --git a/docs/4_optional.rst b/docs/4_optional.rst
@@ -302,10 +302,14 @@ Core
           The surface is herein defined by a convex hull constructed from the core.
         * ``"sphere"``: Define the core vectors as those drawn from the core anchor
           atoms to the cores center.
+        * ``"anchor"``: Define the core vectors based on the optimal vector of its anchors.
+          Only available in when the core contains molecular anchors, *e.g.* acetates.
         * ``"surface invert"``/``"surface_invert"``: The same as ``"surface"``,
           except the core vectors are inverted.
         * ``"sphere invert"``/``"sphere_invert"``: The same as ``"sphere"``,
           except the core vectors are inverted.
+        * ``"anchor invert"``/``"anchor_invert"``: The same as ``"anchor"``,
+          except the core vectors are inverted.
 
         Note that for a spherical core both approaches are equivalent.
 

diff --git a/setup.cfg b/setup.cfg
@@ -17,7 +17,7 @@ per-file-ignores =
 
 [tool:pytest]
 testpaths = CAT tests
-addopts = --tb=short --doctest-glob='*.py' --doctest-glob='*.rst' --cov=CAT --cov-report xml --cov-report term --cov-report html --doctest-modules
+addopts = --tb=short --doctest-glob='*.py' --doctest-glob='*.rst' --cov=CAT --cov-report xml --cov-report term --cov-report html --doctest-modules --pdbcls=IPython.terminal.debugger:TerminalPdb
 markers = slow: A marker for slow tests.
 filterwarnings =
     ignore:Conversion of the second argument of issubdtype from `.` to `.` is deprecated:FutureWarning:h5py.*

diff --git a/setup.py b/setup.py
@@ -113,6 +113,7 @@
             'pytest-cov',
             'pytest-mock',
             'h5py>=2.7.0',
+            'ipython>=5.0.0',
             'sphinx>=2.4; python_version>="3.7"',
             'sphinx_rtd_theme; python_version>="3.7"',
             'data-CAT>=0.7.0; python_version>="3.7"',

diff --git a/tests/test_CAT.py b/tests/test_CAT.py
@@ -37,7 +37,7 @@ def test_cat_fail() -> None:
     arg.optional.qd.optimize = False
     del arg.input_cores[0]["Cd68Se55.xyz"]
     arg.input_cores[0]["CdCl.xyz"] = {"indices": [2]}
-    with pytest.raises(NotImplementedError):
+    with pytest.raises(ValueError):
         prep(arg)
 
 

diff --git a/tests/test_distribution.py b/tests/test_distribution.py
@@ -2,15 +2,25 @@
 
 from pathlib import Path
 
+import yaml
+import pytest
 import numpy as np
-
-from scm.plams import Molecule
+from scm.plams import Molecule, Settings, readpdb
 from assertionlib import assertion
+from nanoutils import delete_finally
 
+from CAT.test_utils import assert_mol_allclose
+from CAT.base import prep
+from CAT.workflows import MOL as MOL_KEY
 from CAT.attachment.distribution import distribute_idx
 
 PATH = Path('tests') / 'test_files'
+LIG_PATH = PATH / 'ligand'
+QD_PATH = PATH / 'qd'
+DB_PATH = PATH / 'database'
+
 MOL = Molecule(PATH / 'core' / 'Cd68Se55.xyz')
+
 IDX = np.array([i for i, at in enumerate(MOL) if at.symbol == 'Cl'])
 IDX.setflags(write=False)
 
@@ -65,3 +75,20 @@ def test_distribute_idx() -> None:
     assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness='bob', exception=TypeError)
     assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness=-10, exception=ValueError)
     assertion.assert_(distribute_idx, MOL, IDX, f=0.5, randomness=1.5, exception=ValueError)
+
+
+@pytest.mark.slow
+@delete_finally(LIG_PATH, QD_PATH, DB_PATH)
+def test_cat() -> None:
+    """Tests for the CAT package."""
+    yaml_path = PATH / 'CAT_subset.yaml'
+    with open(yaml_path, 'r') as f:
+        arg = Settings(yaml.load(f, Loader=yaml.FullLoader))
+
+    arg.path = PATH
+    qd_df, _, _ = prep(arg)
+
+    assertion.len_eq(qd_df, 1)
+    qd = qd_df[MOL_KEY].iloc[0]
+    qd_ref = readpdb(PATH / "qd_test_distribution.pdb")
+    assert_mol_allclose(qd, qd_ref)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,7 @@ Change Log @@
     All notable changes to this project will be documented in this file.
     This project adheres to `Semantic Versioning <http://semver.org/>`_.
-.0.1
+.1.0
     *****
     * *placeholder*.
@@ Expand Down @@