Skip to content

Commit

Permalink
Make dummy and functional_groups aliases for anchor (#164)
Browse files Browse the repository at this point in the history
Closes #163.

Adds the new `optional.core.anchor` and `optional.ligand.functional_groups` options,
which are respectively aliases for the old `optional.core.dummy` and `optional.ligand.functional_groups` options.
  • Loading branch information
BvB93 authored Nov 2, 2020
1 parent 0b883fe commit 0b3f7b3
Show file tree
Hide file tree
Showing 16 changed files with 151 additions and 90 deletions.
2 changes: 1 addition & 1 deletion CAT/attachment/ligand_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def init_ligand_anchoring(ligand_df: SettingsDataFrame) -> SettingsDataFrame:
# Unpack arguments
settings = ligand_df.settings.optional
split = settings.ligand.split
functional_groups = settings.ligand.functional_groups
functional_groups = settings.ligand.anchor

# Find all functional groups; return a copy of each mol for each functional group
mol_list = []
Expand Down
20 changes: 10 additions & 10 deletions CAT/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def prep(arg: Settings, return_mol: bool = True
ligand_df, core_df, qd_df = prep_input(arg)

if qd_df is None:
# Adds the indices of the core dummy atoms to core.properties.core
# Adds the indices of the core anchor atoms to core.properties.core
core_df = prep_core(core_df)

# Optimize the ligands, find functional groups, calculate properties
Expand Down Expand Up @@ -194,7 +194,7 @@ def prep_input(arg: Settings) -> Tuple[SettingsDataFrame, SettingsDataFrame, Set

# TODO: Move this function to its own module; this is a workflow and NOT a workflow manager
def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
"""Function that handles the identification and marking of all core dummy atoms.
"""Function that handles the identification and marking of all core anchor atoms.
Parameters
----------
Expand All @@ -204,21 +204,21 @@ def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
Returns
-------
|CAT.SettingsDataFrame|_
A dataframe of cores with all dummy/anchor atoms removed.
A dataframe of cores with all anchor atoms removed.
"""
# Unpack arguments
dummy = core_df.settings.optional.core.dummy
anchor = core_df.settings.optional.core.anchor
subset = core_df.settings.optional.core.subset

idx_tuples = []
for core in core_df[MOL]:
# Checks the if the dummy is a string (atomic symbol) or integer (atomic number)
# Checks the if the anchor is a string (atomic symbol) or integer (atomic number)
formula = core.get_formula()

# Returns the indices of all dummy atom ligand placeholders in the core
# Returns the indices of all anchor atom ligand placeholders in the core
if not core.properties.dummies:
at_idx = np.array([i for i, atom in enumerate(core) if atom.atnum == dummy])
at_idx = np.array([i for i, atom in enumerate(core) if atom.atnum == anchor])
else:
dummies = core.properties.dummies
at_idx = np.fromiter(dummies, count=len(dummies), dtype=int)
Expand All @@ -231,13 +231,13 @@ def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
at_idx.sort()
core.properties.dummies = dummies = [core[i] for i in at_idx]

# Returns an error if no dummy atoms were found
# Returns an error if no anchor atoms were found
if not dummies:
raise MoleculeError(f"{repr(to_symbol(dummy))} was specified as core dummy atom, yet "
raise MoleculeError(f"{repr(to_symbol(anchor))} was specified as core anchor atom, yet "
f"no matching atoms were found in {core.properties.name} "
f"(formula: {formula})")

# Delete all core dummy atoms
# Delete all core anchor atoms
for at in dummies:
core.delete_atom(at)
idx_tuples.append(
Expand Down
39 changes: 28 additions & 11 deletions CAT/data_handling/validate_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,22 @@
def _validate_multi_lig(s: Settings) -> None:
"""Check that one (and only one!) of ``'f'`` and ``'dummy'`` is specified."""
f = s.optional.qd.multi_ligand.f
dummy = s.optional.qd.multi_ligand.dummy

if f is dummy is None:
raise ValueError("'.multi_ligand.f' and '.multi_ligand.dummy' cannot be "
anchor = s.optional.qd.multi_ligand.anchor
if anchor is None:
anchor = s.optional.qd.multi_ligand.dummy
s.optional.qd.multi_ligand.anchor = anchor
del s.optional.qd.multi_ligand.dummy

if f is anchor is None:
raise ValueError("'.multi_ligand.f' and '.multi_ligand.anchor' cannot be "
"both unspecified or set to 'None'")
elif None not in (f, dummy):
raise ValueError("Only one of '.multi_ligand.f' and '.multi_ligand.dummy' "
elif None not in (f, anchor):
raise ValueError("Only one of '.multi_ligand.f' and '.multi_ligand.anchor' "
"should be specified")

if dummy is not None:
assert len(dummy) == len(s.optional.qd.multi_ligand.ligands)
if anchor is not None:
assert len(anchor) == len(s.optional.qd.multi_ligand.ligands)
else:
assert len(f) == len(s.optional.qd.multi_ligand.ligands) - 1

Expand Down Expand Up @@ -97,6 +102,7 @@ def validate_input(s: Settings) -> None:
# Validate some of the more complex optionala rguments
if s.optional.database.mongodb:
s.optional.database.mongodb = mongodb_schema.validate(s.optional.database.mongodb)

if s.optional.core.subset:
s.optional.core.subset = subset_schema.validate(s.optional.core.subset)
if 'p' in s.optional.core.subset:
Expand All @@ -105,6 +111,11 @@ def validate_input(s: Settings) -> None:
logger.warn("The 'subset.p' parameter is deprecated; see 'subset.weight'")
p = s.optional.core.subset.pop('p')
s.optional.core.subset.weight = lambda x: -(x**p)
if s.optional.core.anchor is not None:
s.optional.core.anchor = 17
elif s.optional.core.dummy is not None:
s.optional.core.anchor = s.optional.core.dummy
del s.optional.core.dummy

if s.optional.ligand.optimize:
s.optional.ligand.optimize = ligand_opt_schema.validate(s.optional.ligand.optimize)
Expand Down Expand Up @@ -143,8 +154,14 @@ def validate_input(s: Settings) -> None:
s.optional.database.db = False

# Create RDKit molecules representing functional groups
func_groups, split = s.optional.ligand.functional_groups, s.optional.ligand.split
if not func_groups:
s.optional.ligand.functional_groups = get_functional_groups(None, split)
if s.optional.ligand.anchor is not None:
func_groups = s.optional.ligand.anchor
else:
func_groups = s.optional.ligand.functional_groups
del s.optional.ligand.functional_groups

split = s.optional.ligand.split
if func_groups is None:
s.optional.ligand.anchor = get_functional_groups(None, split)
else:
s.optional.ligand.functional_groups = get_functional_groups(func_groups)
s.optional.ligand.anchor = get_functional_groups(func_groups)
38 changes: 37 additions & 1 deletion CAT/data_handling/validation_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,23 @@ def _get_crsjob() -> type:
'dirname':
And(str, error='optional.core.dirname expects a string'),

Optional_('dummy', default=17): # Return a tuple of atomic numbers
# Alias for `optional.core.anchor`
Optional_('dummy', default=None): # Return a tuple of atomic numbers
Or(
None,
And(val_int, Use(lambda n: to_atnum(int(n)))),
And(str, Use(to_atnum)),
error='optional.core.dummy expects a valid atomic number (int) or symbol (string)'
),

Optional_('anchor', default=None): # Return a tuple of atomic numbers
Or(
None,
And(val_int, Use(lambda n: to_atnum(int(n)))),
And(str, Use(to_atnum)),
error='optional.core.anchor expects a valid atomic number (int) or symbol (string)'
),

Optional_('subset', default=None):
Or(None, dict, error="optional.core.subset epected 'None' or a dictionary"),

Expand Down Expand Up @@ -445,6 +455,22 @@ def _get_crsjob() -> type:
'dirname':
And(str, error='optional.ligand.dirname expects a string'),

Optional_('anchor', default=None):
Or(
None,
And(str, Use(lambda n: (n,))),
And(
abc.Collection,
lambda n: all(isinstance(i, str) for i in n),
lambda n: len(n) == len(set(n)),
Use(to_tuple),
error='optional.ligand.anchor expects a list of unique SMILES strings'
),
error=('optional.ligand.anchor expects None (NoneType), a SMILES string, '
'or a list of unique SMILES string')
),

# Alias for `optional.ligand.anchor`
Optional_('functional_groups', default=None):
Or(
None,
Expand Down Expand Up @@ -913,6 +939,16 @@ def _get_crsjob() -> type:
Use(tuple)
),

Optional_('anchor', default=None):
Or(
None,
And(abc.Collection,
lambda n: not isinstance(n, str),
lambda n: len(set(n)) == len(n),
Use(lambda n: to_tuple(n, func=to_atnum)))
),

# Alias for `optional.qd.multi_ligand.anchor`
Optional_('dummy', default=None):
Or(
None,
Expand Down
22 changes: 11 additions & 11 deletions CAT/multi_ligand.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def init_multi_ligand(qd_df):
"""Initialize the multi-ligand attachment procedure."""
workflow = WorkFlow.from_template(qd_df, name='multi_ligand')

if workflow.dummy is not None:
sequence = [to_symbol(i) for i in workflow.dummy]
if workflow.anchor is not None:
sequence = [to_symbol(i) for i in workflow.anchor]
elif workflow.f is not None:
sequence = [str(i) for i in workflow.f]
else:
raise TypeError("'workflow.f' and 'workflow.dummy' cannot be both 'None'")
raise TypeError("'workflow.f' and 'workflow.anchor' cannot be both 'None'")

columns_iter1 = ('/'.join(item for item in sequence[:i]) for i in range(1, 1+len(sequence)))
columns_iter2 = (('multi ligand', i) for i in columns_iter1)
Expand All @@ -50,15 +50,15 @@ def init_multi_ligand(qd_df):

@overload
def multi_lig(qd_series: pd.Series, ligands: Iterable[str],
dummy: Sequence[Union[str, int]], f: None,
anchor: Sequence[Union[str, int]], f: None,
**kwargs: Any) -> pd.DataFrame:
...
@overload # noqa: E302
def multi_lig(qd_series: pd.Series, ligands: Iterable[str],
dummy: None, f: Sequence[float],
anchor: None, f: Sequence[float],
**kwargs: Any) -> pd.DataFrame:
...
def multi_lig(qd_series, ligands, dummy=None, f=None, **kwargs): # noqa: E302
def multi_lig(qd_series, ligands, anchor=None, f=None, **kwargs): # noqa: E302
"""Attach multiple non-unique **ligands** to each qd in **qd_series**."""
# Read and parse the SMILES strings
ligands = smiles_to_lig(list(ligands),
Expand All @@ -75,21 +75,21 @@ def multi_lig(qd_series, ligands, dummy=None, f=None, **kwargs): # noqa: E302
if f is not None:
raise NotImplementedError("'f != None' is not yet implemented")

if dummy is not None:
return _multi_lig_dummy(qd_series, ligands, kwargs['path'], dummy, kwargs['allignment'])
if anchor is not None:
return _multi_lig_anchor(qd_series, ligands, kwargs['path'], anchor, kwargs['allignment'])
elif f is not None:
return [[NotImplemented]]
else:
raise TypeError("'f' and 'dummy' cannot be both 'None'")
raise TypeError("'f' and 'anchor' cannot be both 'None'")


def _multi_lig_dummy(qd_series, ligands, path, dummy, allignment) -> np.ndarray:
def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarray:
"""Gogogo."""
ret = np.empty((len(ligands), len(qd_series)), dtype=object)
for i, qd in enumerate(qd_series):
qd = qd.copy()

for j, (ligand, atnum) in enumerate(zip(ligands, dummy)):
for j, (ligand, atnum) in enumerate(zip(ligands, anchor)):
try:
atoms = [at for at in qd if at.atnum == atnum]
assert atoms
Expand Down
4 changes: 2 additions & 2 deletions CAT/workflows/workflow_yaml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,10 @@ multi_ligand:
allignment: [optional, core, allignment]
opt: [optional, ligand, optimize]
split: [optional, ligand, split]
functional_groups: [optional, ligand, functional_groups]
functional_groups: [optional, ligand, anchor]

ligands: [optional, qd, multi_ligand, ligands]
dummy: [optional, qd, multi_ligand, dummy]
anchor: [optional, qd, multi_ligand, anchor]
f: [optional, qd, multi_ligand, f]
mode: [optional, qd, multi_ligand, uniform]
start: [optional, qd, multi_ligand, start]
Expand Down
14 changes: 8 additions & 6 deletions docs/13_multi_ligand.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Multi-ligand attachment
- OCCC
- OCCCCCCC
- OCCCCCCCCCCCC
dummy:
anchor:
- F
- Br
- I
Expand All @@ -39,19 +39,21 @@ Multi-ligand attachment
This argument has no value be default and must thus be provided by the user.


.. attribute:: optional.qd.multi_ligand.dummy
.. attribute:: optional.qd.multi_ligand.anchor

:Parameter: * **Type** - :class:`list` [:class:`str` or :class:`int`]

Atomic number of symbol of the core dummy atoms.
Atomic number of symbol of the core anchor atoms.

The first dummy atom will be assigned to the first ligand in
:attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`, the second dummy atom
The first anchor atom will be assigned to the first ligand in
:attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`, the second anchor atom
to the second ligand, *etc.*.
The list's length should consequently be of the same length as
:attr:`multi_ligand.ligands<optional.qd.multi_ligand.ligands>`.

Works analogous to :attr:`optional.core.dummy`.
Works analogous to :attr:`optional.core.anchor`.

This optiona can alternatively be provided as ``optional.qd.multi_ligand.dummy``.

.. note::
This argument has no value be default and must thus be provided by the user.
8 changes: 4 additions & 4 deletions docs/1_get_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ Verbose default Settings
core:
dirname: core
dummy: Cl
anchor: Cl
subset: null
ligand:
dirname: ligand
optimize: True
split: True
functional_groups: null
anchor: null
cosmo-rs: False
qd:
Expand Down Expand Up @@ -112,13 +112,13 @@ Maximum verbose default Settings
core:
dirname: core
dummy: Cl
anchor: Cl
subset: null
ligand:
dirname: ligand
split: True
functional_groups: null
anchor: null
cosmo-rs: False
optimize:
use_ff: False
Expand Down
Loading

0 comments on commit 0b3f7b3

Please sign in to comment.