Make dummy and functional_groups aliases for anchor (#164)

Closes #163. Adds the new `optional.core.anchor` and `optional.ligand.functional_groups` options, which are respectively aliases for the old `optional.core.dummy` and `optional.ligand.functional_groups` options.
nlesc-nano · Nov 2, 2020 · 0b3f7b3 · 0b3f7b3
1 parent 0b883fe
commit 0b3f7b3
Show file tree

Hide file tree

Showing 16 changed files with 151 additions and 90 deletions.
diff --git a/CAT/attachment/ligand_anchoring.py b/CAT/attachment/ligand_anchoring.py
@@ -59,7 +59,7 @@ def init_ligand_anchoring(ligand_df: SettingsDataFrame) -> SettingsDataFrame:
     # Unpack arguments
     settings = ligand_df.settings.optional
     split = settings.ligand.split
-    functional_groups = settings.ligand.functional_groups
+    functional_groups = settings.ligand.anchor
 
     # Find all functional groups; return a copy of each mol for each functional group
     mol_list = []

diff --git a/CAT/base.py b/CAT/base.py
@@ -114,7 +114,7 @@ def prep(arg: Settings, return_mol: bool = True
     ligand_df, core_df, qd_df = prep_input(arg)
 
     if qd_df is None:
-        # Adds the indices of the core dummy atoms to core.properties.core
+        # Adds the indices of the core anchor atoms to core.properties.core
         core_df = prep_core(core_df)
 
         # Optimize the ligands, find functional groups, calculate properties
@@ -194,7 +194,7 @@ def prep_input(arg: Settings) -> Tuple[SettingsDataFrame, SettingsDataFrame, Set
 
 # TODO: Move this function to its own module; this is a workflow and NOT a workflow manager
 def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
-    """Function that handles the identification and marking of all core dummy atoms.
+    """Function that handles the identification and marking of all core anchor atoms.
 
     Parameters
     ----------
@@ -204,21 +204,21 @@ def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
     Returns
     -------
     |CAT.SettingsDataFrame|_
-        A dataframe of cores with all dummy/anchor atoms removed.
+        A dataframe of cores with all anchor atoms removed.
 
     """
     # Unpack arguments
-    dummy = core_df.settings.optional.core.dummy
+    anchor = core_df.settings.optional.core.anchor
     subset = core_df.settings.optional.core.subset
 
     idx_tuples = []
     for core in core_df[MOL]:
-        # Checks the if the dummy is a string (atomic symbol) or integer (atomic number)
+        # Checks the if the anchor is a string (atomic symbol) or integer (atomic number)
         formula = core.get_formula()
 
-        # Returns the indices of all dummy atom ligand placeholders in the core
+        # Returns the indices of all anchor atom ligand placeholders in the core
         if not core.properties.dummies:
-            at_idx = np.array([i for i, atom in enumerate(core) if atom.atnum == dummy])
+            at_idx = np.array([i for i, atom in enumerate(core) if atom.atnum == anchor])
         else:
             dummies = core.properties.dummies
             at_idx = np.fromiter(dummies, count=len(dummies), dtype=int)
@@ -231,13 +231,13 @@ def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
         at_idx.sort()
         core.properties.dummies = dummies = [core[i] for i in at_idx]
 
-        # Returns an error if no dummy atoms were found
+        # Returns an error if no anchor atoms were found
         if not dummies:
-            raise MoleculeError(f"{repr(to_symbol(dummy))} was specified as core dummy atom, yet "
+            raise MoleculeError(f"{repr(to_symbol(anchor))} was specified as core anchor atom, yet "
                                 f"no matching atoms were found in {core.properties.name} "
                                 f"(formula: {formula})")
 
-        # Delete all core dummy atoms
+        # Delete all core anchor atoms
         for at in dummies:
             core.delete_atom(at)
         idx_tuples.append(

diff --git a/CAT/data_handling/validate_input.py b/CAT/data_handling/validate_input.py
@@ -51,17 +51,22 @@
 def _validate_multi_lig(s: Settings) -> None:
     """Check that one (and only one!) of ``'f'`` and ``'dummy'`` is specified."""
     f = s.optional.qd.multi_ligand.f
-    dummy = s.optional.qd.multi_ligand.dummy
 
-    if f is dummy is None:
-        raise ValueError("'.multi_ligand.f' and '.multi_ligand.dummy' cannot be "
+    anchor = s.optional.qd.multi_ligand.anchor
+    if anchor is None:
+        anchor = s.optional.qd.multi_ligand.dummy
+        s.optional.qd.multi_ligand.anchor = anchor
+    del s.optional.qd.multi_ligand.dummy
+
+    if f is anchor is None:
+        raise ValueError("'.multi_ligand.f' and '.multi_ligand.anchor' cannot be "
                          "both unspecified or set to 'None'")
-    elif None not in (f, dummy):
-        raise ValueError("Only one of '.multi_ligand.f' and '.multi_ligand.dummy' "
+    elif None not in (f, anchor):
+        raise ValueError("Only one of '.multi_ligand.f' and '.multi_ligand.anchor' "
                          "should be specified")
 
-    if dummy is not None:
-        assert len(dummy) == len(s.optional.qd.multi_ligand.ligands)
+    if anchor is not None:
+        assert len(anchor) == len(s.optional.qd.multi_ligand.ligands)
     else:
         assert len(f) == len(s.optional.qd.multi_ligand.ligands) - 1
 
@@ -97,6 +102,7 @@ def validate_input(s: Settings) -> None:
     # Validate some of the more complex optionala rguments
     if s.optional.database.mongodb:
         s.optional.database.mongodb = mongodb_schema.validate(s.optional.database.mongodb)
+
     if s.optional.core.subset:
         s.optional.core.subset = subset_schema.validate(s.optional.core.subset)
         if 'p' in s.optional.core.subset:
@@ -105,6 +111,11 @@ def validate_input(s: Settings) -> None:
             logger.warn("The 'subset.p' parameter is deprecated; see 'subset.weight'")
             p = s.optional.core.subset.pop('p')
             s.optional.core.subset.weight = lambda x: -(x**p)
+    if s.optional.core.anchor is not None:
+        s.optional.core.anchor = 17
+    elif s.optional.core.dummy is not None:
+        s.optional.core.anchor = s.optional.core.dummy
+    del s.optional.core.dummy
 
     if s.optional.ligand.optimize:
         s.optional.ligand.optimize = ligand_opt_schema.validate(s.optional.ligand.optimize)
@@ -143,8 +154,14 @@ def validate_input(s: Settings) -> None:
         s.optional.database.db = False
 
     # Create RDKit molecules representing functional groups
-    func_groups, split = s.optional.ligand.functional_groups, s.optional.ligand.split
-    if not func_groups:
-        s.optional.ligand.functional_groups = get_functional_groups(None, split)
+    if s.optional.ligand.anchor is not None:
+        func_groups = s.optional.ligand.anchor
+    else:
+        func_groups = s.optional.ligand.functional_groups
+    del s.optional.ligand.functional_groups
+
+    split = s.optional.ligand.split
+    if func_groups is None:
+        s.optional.ligand.anchor = get_functional_groups(None, split)
     else:
-        s.optional.ligand.functional_groups = get_functional_groups(func_groups)
+        s.optional.ligand.anchor = get_functional_groups(func_groups)
diff --git a/CAT/data_handling/validation_schemas.py b/CAT/data_handling/validation_schemas.py
@@ -285,13 +285,23 @@ def _get_crsjob() -> type:
     'dirname':
         And(str, error='optional.core.dirname expects a string'),
 
-    Optional_('dummy', default=17):  # Return a tuple of atomic numbers
+    # Alias for `optional.core.anchor`
+    Optional_('dummy', default=None):  # Return a tuple of atomic numbers
         Or(
+            None,
             And(val_int, Use(lambda n: to_atnum(int(n)))),
             And(str, Use(to_atnum)),
             error='optional.core.dummy expects a valid atomic number (int) or symbol (string)'
         ),
 
+    Optional_('anchor', default=None):  # Return a tuple of atomic numbers
+        Or(
+            None,
+            And(val_int, Use(lambda n: to_atnum(int(n)))),
+            And(str, Use(to_atnum)),
+            error='optional.core.anchor expects a valid atomic number (int) or symbol (string)'
+        ),
+
     Optional_('subset', default=None):
         Or(None, dict, error="optional.core.subset epected 'None' or a dictionary"),
 
@@ -445,6 +455,22 @@ def _get_crsjob() -> type:
     'dirname':
         And(str, error='optional.ligand.dirname expects a string'),
 
+    Optional_('anchor', default=None):
+        Or(
+            None,
+            And(str, Use(lambda n: (n,))),
+            And(
+                abc.Collection,
+                lambda n: all(isinstance(i, str) for i in n),
+                lambda n: len(n) == len(set(n)),
+                Use(to_tuple),
+                error='optional.ligand.anchor expects a list of unique SMILES strings'
+            ),
+            error=('optional.ligand.anchor expects None (NoneType), a SMILES string, '
+                   'or a list of unique SMILES string')
+        ),
+
+    # Alias for `optional.ligand.anchor`
     Optional_('functional_groups', default=None):
         Or(
             None,
@@ -913,6 +939,16 @@ def _get_crsjob() -> type:
             Use(tuple)
         ),
 
+    Optional_('anchor', default=None):
+        Or(
+            None,
+            And(abc.Collection,
+                lambda n: not isinstance(n, str),
+                lambda n: len(set(n)) == len(n),
+                Use(lambda n: to_tuple(n, func=to_atnum)))
+        ),
+
+    # Alias for `optional.qd.multi_ligand.anchor`
     Optional_('dummy', default=None):
         Or(
             None,

diff --git a/CAT/multi_ligand.py b/CAT/multi_ligand.py
@@ -26,12 +26,12 @@ def init_multi_ligand(qd_df):
     """Initialize the multi-ligand attachment procedure."""
     workflow = WorkFlow.from_template(qd_df, name='multi_ligand')
 
-    if workflow.dummy is not None:
-        sequence = [to_symbol(i) for i in workflow.dummy]
+    if workflow.anchor is not None:
+        sequence = [to_symbol(i) for i in workflow.anchor]
     elif workflow.f is not None:
         sequence = [str(i) for i in workflow.f]
     else:
-        raise TypeError("'workflow.f' and 'workflow.dummy' cannot be both 'None'")
+        raise TypeError("'workflow.f' and 'workflow.anchor' cannot be both 'None'")
 
     columns_iter1 = ('/'.join(item for item in sequence[:i]) for i in range(1, 1+len(sequence)))
     columns_iter2 = (('multi ligand', i) for i in columns_iter1)
@@ -50,15 +50,15 @@ def init_multi_ligand(qd_df):
 
 @overload
 def multi_lig(qd_series: pd.Series, ligands: Iterable[str],
-              dummy: Sequence[Union[str, int]], f: None,
+              anchor: Sequence[Union[str, int]], f: None,
               **kwargs: Any) -> pd.DataFrame:
     ...
 @overload  # noqa: E302
 def multi_lig(qd_series: pd.Series, ligands: Iterable[str],
-              dummy: None, f: Sequence[float],
+              anchor: None, f: Sequence[float],
               **kwargs: Any) -> pd.DataFrame:
     ...
-def multi_lig(qd_series, ligands, dummy=None, f=None, **kwargs):  # noqa: E302
+def multi_lig(qd_series, ligands, anchor=None, f=None, **kwargs):  # noqa: E302
     """Attach multiple non-unique **ligands** to each qd in **qd_series**."""
     # Read and parse the SMILES strings
     ligands = smiles_to_lig(list(ligands),
@@ -75,21 +75,21 @@ def multi_lig(qd_series, ligands, dummy=None, f=None, **kwargs):  # noqa: E302
     if f is not None:
         raise NotImplementedError("'f != None' is not yet implemented")
 
-    if dummy is not None:
-        return _multi_lig_dummy(qd_series, ligands, kwargs['path'], dummy, kwargs['allignment'])
+    if anchor is not None:
+        return _multi_lig_anchor(qd_series, ligands, kwargs['path'], anchor, kwargs['allignment'])
     elif f is not None:
         return [[NotImplemented]]
     else:
-        raise TypeError("'f' and 'dummy' cannot be both 'None'")
+        raise TypeError("'f' and 'anchor' cannot be both 'None'")
 
 
-def _multi_lig_dummy(qd_series, ligands, path, dummy, allignment) -> np.ndarray:
+def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarray:
     """Gogogo."""
     ret = np.empty((len(ligands), len(qd_series)), dtype=object)
     for i, qd in enumerate(qd_series):
         qd = qd.copy()
 
-        for j, (ligand, atnum) in enumerate(zip(ligands, dummy)):
+        for j, (ligand, atnum) in enumerate(zip(ligands, anchor)):
             try:
                 atoms = [at for at in qd if at.atnum == atnum]
                 assert atoms

diff --git a/CAT/workflows/workflow_yaml.yaml b/CAT/workflows/workflow_yaml.yaml
@@ -149,10 +149,10 @@ multi_ligand:
         allignment: [optional, core, allignment]
         opt: [optional, ligand, optimize]
         split: [optional, ligand, split]
-        functional_groups: [optional, ligand, functional_groups]
+        functional_groups: [optional, ligand, anchor]
 
         ligands: [optional, qd, multi_ligand, ligands]
-        dummy: [optional, qd, multi_ligand, dummy]
+        anchor: [optional, qd, multi_ligand, anchor]
         f: [optional, qd, multi_ligand, f]
         mode: [optional, qd, multi_ligand, uniform]
         start: [optional, qd, multi_ligand, start]

diff --git a/docs/13_multi_ligand.rst b/docs/13_multi_ligand.rst
@@ -19,7 +19,7 @@ Multi-ligand attachment
                         - OCCC
                         - OCCCCCCC
                         - OCCCCCCCCCCCC
-                    dummy:
+                    anchor:
                         - F
                         - Br
                         - I
@@ -39,19 +39,21 @@ Multi-ligand attachment
             This argument has no value be default and must thus be provided by the user.
 
 
-    .. attribute:: optional.qd.multi_ligand.dummy
+    .. attribute:: optional.qd.multi_ligand.anchor
 
         :Parameter:     * **Type** - :class:`list` [:class:`str` or :class:`int`]
 
-        Atomic number of symbol of the core dummy atoms.
+        Atomic number of symbol of the core anchor atoms.
 
-        The first dummy atom will be assigned to the first ligand in
-        :attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`, the second dummy atom
+        The first anchor atom will be assigned to the first ligand in
+        :attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`, the second anchor atom
         to the second ligand, *etc.*.
         The list's length should consequently be of the same length as
         :attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`.
 
-        Works analogous to :attr:`optional.core.dummy`.
+        Works analogous to :attr:`optional.core.anchor`.
+
+        This optiona can alternatively be provided as ``optional.qd.multi_ligand.dummy``.
 
         .. note::
             This argument has no value be default and must thus be provided by the user.
diff --git a/docs/1_get_started.rst b/docs/1_get_started.rst
@@ -67,14 +67,14 @@ Verbose default Settings
 
         core:
             dirname: core
-            dummy: Cl
+            anchor: Cl
             subset: null
 
         ligand:
             dirname: ligand
             optimize: True
             split: True
-            functional_groups: null
+            anchor: null
             cosmo-rs: False
 
         qd:
@@ -112,13 +112,13 @@ Maximum verbose default Settings
 
         core:
             dirname: core
-            dummy: Cl
+            anchor: Cl
             subset: null
 
         ligand:
             dirname: ligand
             split: True
-            functional_groups: null
+            anchor: null
             cosmo-rs: False
             optimize:
                 use_ff: False