diff --git a/package/CHANGELOG b/package/CHANGELOG index 8feb28087e..ccf6845450 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -26,6 +26,8 @@ Fixes * Fixes bug where multiple identical connection topologies could be added to a _Connection TopologyAttr, and that deleting connections by index would only delete one of them (Issue #4762, PR #4763) + * Changes error to warning on Universe creation if guessing fails + due to missing information (Issue #4750, PR #4754) * Adds guessed attributes documentation back to each parser page and updates overall guesser docs (Issue #4696) * Fix Bohrium (Bh) atomic mass in tables.py (PR #3753) @@ -115,6 +117,10 @@ Changes numpy.testing.assert_allclose #4438) Deprecations + * MDAnalysis.topology.guessers is deprecated in favour of the new + Guessers API and will be removed in version 3.0 (PR #4752) + * MDAnalysis.topology.tables is deprecated in favour of + MDAnalysis.guesser.tables and will be removed in version 3.0 (PR #4752) * Element guessing in the ITPParser is deprecated and will be removed in version 3.0 (Issue #4698) * Unknown masses are set to 0.0 for current version, this will be depracated diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index c0ac6bcf6f..dcc8c634aa 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -410,7 +410,8 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, force_guess = list(force_guess) + ['bonds', 'angles', 'dihedrals'] self.guess_TopologyAttrs( - context, to_guess, force_guess, vdwradii=vdwradii, **kwargs) + context, to_guess, force_guess, error_if_missing=False + ) def copy(self): @@ -1498,7 +1499,9 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, return cls(mol, **kwargs) def guess_TopologyAttrs( - self, context=None, to_guess=None, force_guess=None, **kwargs): + self, context=None, to_guess=None, force_guess=None, + error_if_missing=True, **kwargs + ): """ Guess and add attributes through a specific context-aware guesser. @@ -1523,6 +1526,13 @@ def guess_TopologyAttrs( TopologyAttr does not already exist in the Universe, this has no effect. If the TopologyAttr does already exist, all values will be overwritten by guessed values. + error_if_missing: bool + If `True`, raise an error if the guesser cannot guess the attribute + due to missing TopologyAttrs used as the inputs for guessing. + If `False`, a warning will be raised instead. + Errors will always be raised if an attribute is in the + ``force_guess`` list, even if this parameter is set to False. + **kwargs: extra arguments to be passed to the guesser class Examples @@ -1537,7 +1547,11 @@ def guess_TopologyAttrs( if not context: context = self._context - guesser = get_guesser(context, self.universe, **kwargs) + # update iteratively to avoid multiple kwargs clashing + guesser_kwargs = {} + guesser_kwargs.update(self._kwargs) + guesser_kwargs.update(kwargs) + guesser = get_guesser(context, self.universe, **guesser_kwargs) self._context = guesser if to_guess is None: @@ -1577,7 +1591,14 @@ def guess_TopologyAttrs( for attr in total_guess: if guesser.is_guessable(attr): fg = attr in force_guess - values = guesser.guess_attr(attr, fg) + try: + values = guesser.guess_attr(attr, fg) + except ValueError as e: + if error_if_missing or fg: + raise e + else: + warnings.warn(str(e)) + continue if values is not None: if attr in objects: diff --git a/package/MDAnalysis/guesser/default_guesser.py b/package/MDAnalysis/guesser/default_guesser.py index f49e75b24c..87da87e12c 100644 --- a/package/MDAnalysis/guesser/default_guesser.py +++ b/package/MDAnalysis/guesser/default_guesser.py @@ -293,7 +293,7 @@ def guess_types(self, atom_types=None, indices_to_guess=None): atom_types = self._universe.atoms.names except AttributeError: raise ValueError( - "there is no reference attributes in this universe" + "there is no reference attributes in this universe " "to guess types from") if indices_to_guess is not None: diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py new file mode 100644 index 0000000000..7d81f23961 --- /dev/null +++ b/package/MDAnalysis/topology/guessers.py @@ -0,0 +1,542 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Guessing unknown Topology information --- :mod:`MDAnalysis.topology.guessers` +============================================================================= + +.. deprecated:: 2.8.0 + The :mod:`MDAnalysis.topology.guessers` module will be removed in release 3.0.0. + It is deprecated in favor of the new Guessers API. See + :mod:`MDAnalysis.guesser.default_guesser` for more details. + +In general `guess_atom_X` returns the guessed value for a single value, +while `guess_Xs` will work on an array of many atoms. + + +Example uses of guessers +------------------------ + +Guessing elements from atom names +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, it is possible to guess elements from atom names using +:func:`guess_atom_element` (or the synonymous :func:`guess_atom_type`). This can +be done in the following manner:: + + import MDAnalysis as mda + from MDAnalysis.topology.guessers import guess_atom_element + from MDAnalysisTests.datafiles import PRM7 + + u = mda.Universe(PRM7) + + print(u.atoms.names[1]) # returns the atom name H1 + + element = guess_atom_element(u.atoms.names[1]) + + print(element) # returns element H + +In the above example, we take an atom named H1 and use +:func:`guess_atom_element` to guess the element hydrogen (i.e. H). It is +important to note that element guessing is not always accurate. Indeed in cases +where the atom type is not recognised, we may end up with the wrong element. +For example:: + + import MDAnalysis as mda + from MDAnalysis.topology.guessers import guess_atom_element + from MDAnalysisTests.datafiles import PRM19SBOPC + + u = mda.Universe(PRM19SBOPC) + + print(u.atoms.names[-1]) # returns the atom name EPW + + element = guess_atom_element(u.atoms.names[-1]) + + print(element) # returns element P + +Here we find that virtual site atom 'EPW' was given the element P, which +would not be an expected result. We therefore always recommend that users +carefully check the outcomes of any guessers. + +In some cases, one may want to guess elements for an entire universe and add +this guess as a topology attribute. This can be done using :func:`guess_types` +in the following manner:: + + import MDAnalysis as mda + from MDAnalysis.topology.guessers import guess_types + from MDAnalysisTests.datafiles import PRM7 + + u = mda.Universe(PRM7) + + guessed_elements = guess_types(u.atoms.names) + + u.add_TopologyAttr('elements', guessed_elements) + + print(u.atoms.elements) # returns an array of guessed elements + +More information on adding topology attributes can found in the `user guide`_. + + +.. Links + +.. _user guide: https://www.mdanalysis.org/UserGuide/examples/constructing_universe.html#Adding-topology-attributes + +""" +import numpy as np +import warnings +import re + +from ..lib import distances +from MDAnalysis.guesser import tables + + +wmsg = ( + "Deprecated in version 2.8.0\n" + "MDAnalysis.topology.guessers is deprecated in favour of " + "the new Guessers API and will be removed in MDAnalysis version 3.0.0. " + "See MDAnalysis.guesser.default_guesser for more details." +) + + +warnings.warn(wmsg, category=DeprecationWarning) + + +def guess_masses(atom_types): + """Guess the mass of many atoms based upon their type + + Parameters + ---------- + atom_types + Type of each atom + + Returns + ------- + atom_masses : np.ndarray dtype float64 + """ + validate_atom_types(atom_types) + masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64) + return masses + + +def validate_atom_types(atom_types): + """Vaildates the atom types based on whether they are available in our tables + + Parameters + ---------- + atom_types + Type of each atom + + Returns + ------- + None + + .. versionchanged:: 0.20.0 + Try uppercase atom type name as well + """ + for atom_type in np.unique(atom_types): + try: + tables.masses[atom_type] + except KeyError: + try: + tables.masses[atom_type.upper()] + except KeyError: + warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type)) + + +def guess_types(atom_names): + """Guess the atom type of many atoms based on atom name + + Parameters + ---------- + atom_names + Name of each atom + + Returns + ------- + atom_types : np.ndarray dtype object + """ + return np.array([guess_atom_element(name) for name in atom_names], dtype=object) + + +def guess_atom_type(atomname): + """Guess atom type from the name. + + At the moment, this function simply returns the element, as + guessed by :func:`guess_atom_element`. + + + See Also + -------- + :func:`guess_atom_element` + :mod:`MDAnalysis.topology.tables` + + + """ + return guess_atom_element(atomname) + + +NUMBERS = re.compile(r'[0-9]') # match numbers +SYMBOLS = re.compile(r'[*+-]') # match *, +, - + +def guess_atom_element(atomname): + """Guess the element of the atom from the name. + + Looks in dict to see if element is found, otherwise it uses the first + character in the atomname. The table comes from CHARMM and AMBER atom + types, where the first character is not sufficient to determine the atom + type. Some GROMOS ions have also been added. + + .. Warning: The translation table is incomplete. This will probably result + in some mistakes, but it still better than nothing! + + See Also + -------- + :func:`guess_atom_type` + :mod:`MDAnalysis.topology.tables` + """ + if atomname == '': + return '' + try: + return tables.atomelements[atomname.upper()] + except KeyError: + # strip symbols + no_symbols = re.sub(SYMBOLS, '', atomname) + + # split name by numbers + no_numbers = re.split(NUMBERS, no_symbols) + no_numbers = list(filter(None, no_numbers)) #remove '' + # if no_numbers is not empty, use the first element of no_numbers + name = no_numbers[0].upper() if no_numbers else '' + + # just in case + if name in tables.atomelements: + return tables.atomelements[name] + + while name: + if name in tables.elements: + return name + if name[:-1] in tables.elements: + return name[:-1] + if name[1:] in tables.elements: + return name[1:] + if len(name) <= 2: + return name[0] + name = name[:-1] # probably element is on left not right + + # if it's numbers + return no_symbols + + +def guess_bonds(atoms, coords, box=None, **kwargs): + r"""Guess if bonds exist between two atoms based on their distance. + + Bond between two atoms is created, if the two atoms are within + + .. math:: + + d < f \cdot (R_1 + R_2) + + of each other, where :math:`R_1` and :math:`R_2` are the VdW radii + of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is + the `same algorithm that VMD uses`_. + + Parameters + ---------- + atoms : AtomGroup + atoms for which bonds should be guessed + coords : array + coordinates of the atoms (i.e., `AtomGroup.positions)`) + fudge_factor : float, optional + The factor by which atoms must overlap eachother to be considered a + bond. Larger values will increase the number of bonds found. [0.55] + vdwradii : dict, optional + To supply custom vdwradii for atoms in the algorithm. Must be a dict + of format {type:radii}. The default table of van der Waals radii is + hard-coded as :data:`MDAnalysis.topology.tables.vdwradii`. Any user + defined vdwradii passed as an argument will supercede the table + values. [``None``] + lower_bound : float, optional + The minimum bond length. All bonds found shorter than this length will + be ignored. This is useful for parsing PDB with altloc records where + atoms with altloc A and B maybe very close together and there should be + no chemical bond between them. [0.1] + box : array_like, optional + Bonds are found using a distance search, if unit cell information is + given, periodic boundary conditions will be considered in the distance + search. [``None``] + + Returns + ------- + list + List of tuples suitable for use in Universe topology building. + + Warnings + -------- + No check is done after the bonds are guessed to see if Lewis + structure is correct. This is wrong and will burn somebody. + + Raises + ------ + :exc:`ValueError` if inputs are malformed or `vdwradii` data is missing. + + + .. _`same algorithm that VMD uses`: + http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html + + .. versionadded:: 0.7.7 + .. versionchanged:: 0.9.0 + Updated method internally to use more :mod:`numpy`, should work + faster. Should also use less memory, previously scaled as + :math:`O(n^2)`. *vdwradii* argument now augments table list + rather than replacing entirely. + """ + # why not just use atom.positions? + if len(atoms) != len(coords): + raise ValueError("'atoms' and 'coord' must be the same length") + + fudge_factor = kwargs.get('fudge_factor', 0.55) + + vdwradii = tables.vdwradii.copy() # so I don't permanently change it + user_vdwradii = kwargs.get('vdwradii', None) + if user_vdwradii: # this should make algo use their values over defaults + vdwradii.update(user_vdwradii) + + # Try using types, then elements + atomtypes = atoms.types + + # check that all types have a defined vdw + if not all(val in vdwradii for val in set(atomtypes)): + raise ValueError(("vdw radii for types: " + + ", ".join([t for t in set(atomtypes) if + not t in vdwradii]) + + ". These can be defined manually using the" + + " keyword 'vdwradii'")) + + lower_bound = kwargs.get('lower_bound', 0.1) + + if box is not None: + box = np.asarray(box) + + # to speed up checking, calculate what the largest possible bond + # atom that would warrant attention. + # then use this to quickly mask distance results later + max_vdw = max([vdwradii[t] for t in atomtypes]) + + bonds = [] + + pairs, dist = distances.self_capped_distance(coords, + max_cutoff=2.0*max_vdw, + min_cutoff=lower_bound, + box=box) + for idx, (i, j) in enumerate(pairs): + d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor + if (dist[idx] < d): + bonds.append((atoms[i].index, atoms[j].index)) + return tuple(bonds) + + +def guess_angles(bonds): + """Given a list of Bonds, find all angles that exist between atoms. + + Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, + then (1,2,3) must be an angle. + + Returns + ------- + list of tuples + List of tuples defining the angles. + Suitable for use in u._topology + + + See Also + -------- + :meth:`guess_bonds` + + + .. versionadded 0.9.0 + """ + angles_found = set() + + for b in bonds: + for atom in b: + other_a = b.partner(atom) # who's my friend currently in Bond + for other_b in atom.bonds: + if other_b != b: # if not the same bond I start as + third_a = other_b.partner(atom) + desc = tuple([other_a.index, atom.index, third_a.index]) + if desc[0] > desc[-1]: # first index always less than last + desc = desc[::-1] + angles_found.add(desc) + + return tuple(angles_found) + + +def guess_dihedrals(angles): + """Given a list of Angles, find all dihedrals that exist between atoms. + + Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, + then (1,2,3,4) must be a dihedral. + + Returns + ------- + list of tuples + List of tuples defining the dihedrals. + Suitable for use in u._topology + + .. versionadded 0.9.0 + """ + dihedrals_found = set() + + for b in angles: + a_tup = tuple([a.index for a in b]) # angle as tuple of numbers + # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) + # search the first and last atom of each angle + for atom, prefix in zip([b.atoms[0], b.atoms[-1]], + [a_tup[::-1], a_tup]): + for other_b in atom.bonds: + if not other_b.partner(atom) in b: + third_a = other_b.partner(atom) + desc = prefix + (third_a.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + +def guess_improper_dihedrals(angles): + """Given a list of Angles, find all improper dihedrals that exist between + atoms. + + Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, + then (2, 1, 3, 4) must be an improper dihedral. + ie the improper dihedral is the angle between the planes formed by + (1, 2, 3) and (1, 3, 4) + + Returns + ------- + List of tuples defining the improper dihedrals. + Suitable for use in u._topology + + .. versionadded 0.9.0 + """ + dihedrals_found = set() + + for b in angles: + atom = b[1] # select middle atom in angle + # start of improper tuple + a_tup = tuple([b[a].index for a in [1, 2, 0]]) + # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) + # search the first and last atom of each angle + for other_b in atom.bonds: + other_atom = other_b.partner(atom) + # if this atom isn't in the angle I started with + if not other_atom in b: + desc = a_tup + (other_atom.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + +def get_atom_mass(element): + """Return the atomic mass in u for *element*. + + Masses are looked up in :data:`MDAnalysis.topology.tables.masses`. + + .. Warning:: Unknown masses are set to 0.0 + + .. versionchanged:: 0.20.0 + Try uppercase atom type name as well + """ + try: + return tables.masses[element] + except KeyError: + try: + return tables.masses[element.upper()] + except KeyError: + return 0.0 + + +def guess_atom_mass(atomname): + """Guess a mass based on the atom name. + + :func:`guess_atom_element` is used to determine the kind of atom. + + .. warning:: Anything not recognized is simply set to 0; if you rely on the + masses you might want to double check. + """ + return get_atom_mass(guess_atom_element(atomname)) + + +def guess_atom_charge(atomname): + """Guess atom charge from the name. + + .. Warning:: Not implemented; simply returns 0. + """ + # TODO: do something slightly smarter, at least use name/element + return 0.0 + + +def guess_aromaticities(atomgroup): + """Guess aromaticity of atoms using RDKit + + Parameters + ---------- + atomgroup : mda.core.groups.AtomGroup + Atoms for which the aromaticity will be guessed + + Returns + ------- + aromaticities : numpy.ndarray + Array of boolean values for the aromaticity of each atom + + + .. versionadded:: 2.0.0 + """ + mol = atomgroup.convert_to("RDKIT") + return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + + +def guess_gasteiger_charges(atomgroup): + """Guess Gasteiger partial charges using RDKit + + Parameters + ---------- + atomgroup : mda.core.groups.AtomGroup + Atoms for which the charges will be guessed + + Returns + ------- + charges : numpy.ndarray + Array of float values representing the charge of each atom + + + .. versionadded:: 2.0.0 + """ + mol = atomgroup.convert_to("RDKIT") + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + return np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], + dtype=np.float32) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py new file mode 100644 index 0000000000..6f88368ace --- /dev/null +++ b/package/MDAnalysis/topology/tables.py @@ -0,0 +1,52 @@ +""" +MDAnalysis topology tables +========================== + +.. deprecated:: 2.8.0 + The :mod:`MDAnalysis.topology.tables` module has been moved to + :mod:`MDAnalysis.guesser.tables`. This import point will + be removed in release 3.0.0. + +The module contains static lookup tables for atom typing etc. The +tables are dictionaries that are indexed by the element. + +.. autodata:: atomelements +.. autodata:: masses +.. autodata:: vdwradii + +The original raw data are stored as multi-line strings that are +translated into dictionaries with :func:`kv2dict`. In the future, +these tables might be moved into external data files; see +:func:`kv2dict` for explanation of the file format. + +.. autofunction:: kv2dict + +The raw tables are stored in the strings + +.. autodata:: TABLE_ATOMELEMENTS +.. autodata:: TABLE_MASSES +.. autodata:: TABLE_VDWRADII +""" + +import warnings +from MDAnalysis.guesser.tables import ( + kv2dict, + TABLE_ATOMELEMENTS, + atomelements, + elements, + TABLE_MASSES, + masses, + TABLE_VDWRADII, + vdwradii, + Z2SYMB, + SYMB2Z, + SYBYL2SYMB, +) + +wmsg = ( + "Deprecated in version 2.8.0\n" + "MDAnalysis.topology.tables has been moved to " + "MDAnalysis.guesser.tables. This import point " + "will be removed in MDAnalysis version 3.0.0" +) +warnings.warn(wmsg, category=DeprecationWarning) diff --git a/package/doc/sphinx/source/documentation_pages/analysis/parallelization.rst b/package/doc/sphinx/source/documentation_pages/analysis/parallelization.rst index 91ae05fcec..3070614b5a 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/parallelization.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/parallelization.rst @@ -41,7 +41,8 @@ impossible with any but the ``serial`` backend. Parallelization is getting added to existing analysis classes. Initially, only :class:`MDAnalysis.analysis.rms.RMSD` supports parallel analysis, but - we aim to increase support in future releases. + we aim to increase support in future releases. Please check issues labeled + `parallelization` on the `MDAnalysis issues tracker `_. How does parallelization work @@ -106,6 +107,10 @@ If you want to write your own *parallel* analysis class, you have to implement denote if your analysis can run in parallel by following the steps under :ref:`adding-parallelization`. +.. Note:: + + Attributes that are the same for the whole trajectory, should be defined + in `__init__` method because they need to be consistent across all workers. For MDAnalysis developers ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst index 96cb324270..d672b748bb 100644 --- a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst @@ -17,7 +17,8 @@ Default behavior By default, MDAnalysis will guess the "mass" and "type" (atom type) attributes for all particles in the Universe using the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` at the time of Universe creation, -if they are not read from the input file. +if they are not read from the input file. If the required information is not present in the input file, +a warning will be raised. Please see the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` for more information. diff --git a/package/doc/sphinx/source/documentation_pages/topology/guessers.rst b/package/doc/sphinx/source/documentation_pages/topology/guessers.rst new file mode 100644 index 0000000000..e6449f5ddc --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/topology/guessers.rst @@ -0,0 +1,2 @@ +.. automodule:: MDAnalysis.topology.guessers + :members: diff --git a/package/doc/sphinx/source/documentation_pages/topology/tables.rst b/package/doc/sphinx/source/documentation_pages/topology/tables.rst new file mode 100644 index 0000000000..f4d579ec9c --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/topology/tables.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.topology.tables diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index 01f3ab32e2..e1f818adab 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -60,3 +60,5 @@ the topology readers. topology/base topology/core topology/tpr_util + topology/guessers + topology/tables diff --git a/package/doc/sphinx/source/images/AnalysisBase_parallel.png b/package/doc/sphinx/source/images/AnalysisBase_parallel.png index 960e7cc257..9a9b1ea4d7 100644 Binary files a/package/doc/sphinx/source/images/AnalysisBase_parallel.png and b/package/doc/sphinx/source/images/AnalysisBase_parallel.png differ diff --git a/testsuite/MDAnalysisTests/guesser/test_base.py b/testsuite/MDAnalysisTests/guesser/test_base.py index b429826647..4cca0de24d 100644 --- a/testsuite/MDAnalysisTests/guesser/test_base.py +++ b/testsuite/MDAnalysisTests/guesser/test_base.py @@ -30,7 +30,7 @@ from numpy.testing import assert_allclose, assert_equal -class TesttBaseGuesser(): +class TestBaseGuesser(): def test_get_guesser(self): class TestGuesser1(GuesserBase): @@ -100,3 +100,20 @@ def test_partial_guess_attr_with_unknown_no_value_label(self): top = Topology(4, 1, 1, attrs=[names, types, ]) u = mda.Universe(top, to_guess=['types']) assert_equal(u.atoms.types, ['', '', '', '']) + + +@pytest.mark.parametrize( + "universe_input", + [datafiles.DCD, datafiles.XTC, np.random.rand(3, 3), datafiles.PDB] +) +def test_universe_creation_from_coordinates(universe_input): + mda.Universe(universe_input) + + +def test_universe_creation_from_specific_array(): + a = np.array([ + [0., 0., 150.], [0., 0., 150.], [200., 0., 150.], + [0., 0., 150.], [100., 100., 150.], [200., 100., 150.], + [0., 200., 150.], [100., 200., 150.], [200., 200., 150.] + ]) + mda.Universe(a, n_atoms=9) diff --git a/testsuite/MDAnalysisTests/guesser/test_default_guesser.py b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py index 2aacb28d4f..8eb55b6952 100644 --- a/testsuite/MDAnalysisTests/guesser/test_default_guesser.py +++ b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py @@ -117,9 +117,11 @@ def test_partial_guess_elements(self, default_guesser): def test_guess_elements_from_no_data(self): top = Topology(5) - msg = "there is no reference attributes in this universe" - "to guess types from" - with pytest.raises(ValueError, match=(msg)): + msg = ( + "there is no reference attributes in this " + "universe to guess types from" + ) + with pytest.warns(UserWarning, match=msg): mda.Universe(top, to_guess=['types']) @pytest.mark.parametrize('name, element', ( @@ -236,6 +238,19 @@ def test_guess_bonds_water(): (3, 5))) +@pytest.mark.parametrize( + "fudge_factor, n_bonds", + [(0, 0), (0.55, 4), (200, 6)] +) +def test_guess_bonds_water_fudge_factor_passed(fudge_factor, n_bonds): + u = mda.Universe( + datafiles.two_water_gro, + fudge_factor=fudge_factor, + to_guess=("types", "bonds") + ) + assert len(u.atoms.bonds) == n_bonds + + def test_guess_bonds_adk(): u = mda.Universe(datafiles.PSF, datafiles.DCD) u.guess_TopologyAttrs(force_guess=['types']) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py new file mode 100644 index 0000000000..7ab62b56ee --- /dev/null +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -0,0 +1,212 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from importlib import reload +import pytest +from numpy.testing import assert_equal +import numpy as np + +import MDAnalysis as mda +from MDAnalysis.topology import guessers +from MDAnalysis.core.topologyattrs import Angles + +from MDAnalysisTests import make_Universe +from MDAnalysisTests.core.test_fragments import make_starshape +import MDAnalysis.tests.datafiles as datafiles + +from MDAnalysisTests.util import import_not_available + + +try: + from rdkit import Chem + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges +except ImportError: + pass + +requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), + reason="requires RDKit") + + +def test_moved_to_guessers_warning(): + wmsg = "deprecated in favour of the new Guessers API" + with pytest.warns(DeprecationWarning, match=wmsg): + reload(guessers) + + +class TestGuessMasses(object): + def test_guess_masses(self): + out = guessers.guess_masses(['C', 'C', 'H']) + + assert isinstance(out, np.ndarray) + assert_equal(out, np.array([12.011, 12.011, 1.008])) + + def test_guess_masses_warn(self): + with pytest.warns(UserWarning): + guessers.guess_masses(['X']) + + def test_guess_masses_miss(self): + out = guessers.guess_masses(['X', 'Z']) + assert_equal(out, np.array([0.0, 0.0])) + + @pytest.mark.parametrize('element, value', (('H', 1.008), ('XYZ', 0.0), )) + def test_get_atom_mass(self, element, value): + assert guessers.get_atom_mass(element) == value + + def test_guess_atom_mass(self): + assert guessers.guess_atom_mass('1H') == 1.008 + + +class TestGuessTypes(object): + # guess_types + # guess_atom_type + # guess_atom_element + def test_guess_types(self): + out = guessers.guess_types(['MG2+', 'C12']) + + assert isinstance(out, np.ndarray) + assert_equal(out, np.array(['MG', 'C'], dtype=object)) + + def test_guess_atom_element(self): + assert guessers.guess_atom_element('MG2+') == 'MG' + + def test_guess_atom_element_empty(self): + assert guessers.guess_atom_element('') == '' + + def test_guess_atom_element_singledigit(self): + assert guessers.guess_atom_element('1') == '1' + + def test_guess_atom_element_1H(self): + assert guessers.guess_atom_element('1H') == 'H' + assert guessers.guess_atom_element('2H') == 'H' + + @pytest.mark.parametrize('name, element', ( + ('AO5*', 'O'), + ('F-', 'F'), + ('HB1', 'H'), + ('OC2', 'O'), + ('1he2', 'H'), + ('3hg2', 'H'), + ('OH-', 'O'), + ('HO', 'H'), + ('he', 'H'), + ('zn', 'ZN'), + ('Ca2+', 'CA'), + ('CA', 'C'), + ('N0A', 'N'), + ('C0U', 'C'), + ('C0S', 'C'), + ('Na+', 'NA'), + ('Cu2+', 'CU') + )) + def test_guess_element_from_name(self, name, element): + assert guessers.guess_atom_element(name) == element + + +def test_guess_charge(): + # this always returns 0.0 + assert guessers.guess_atom_charge('this') == 0.0 + + +def test_guess_bonds_Error(): + u = make_Universe(trajectory=True) + with pytest.raises(ValueError): + guessers.guess_bonds(u.atoms[:4], u.atoms.positions[:5]) + + +def test_guess_impropers(): + u = make_starshape() + + ag = u.atoms[:5] + + u.add_TopologyAttr(Angles(guessers.guess_angles(ag.bonds))) + + vals = guessers.guess_improper_dihedrals(ag.angles) + assert_equal(len(vals), 12) + + +def bond_sort(arr): + # sort from low to high, also within a tuple + # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) + out = [] + for (i, j) in arr: + if i > j: + i, j = j, i + out.append((i, j)) + return sorted(out) + +def test_guess_bonds_water(): + u = mda.Universe(datafiles.two_water_gro) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions)) + assert_equal(bonds, ((0, 1), + (0, 2), + (3, 4), + (3, 5))) + +def test_guess_bonds_adk(): + u = mda.Universe(datafiles.PSF, datafiles.DCD) + u.atoms.types = guessers.guess_types(u.atoms.names) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + +def test_guess_bonds_peptide(): + u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) + u.atoms.types = guessers.guess_types(u.atoms.names) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_aromaticities(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + expected = np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + u = mda.Universe(mol) + values = guessers.guess_aromaticities(u.atoms) + assert_equal(values, expected) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_gasteiger_charges(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + expected = np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], dtype=np.float32) + u = mda.Universe(mol) + values = guessers.guess_gasteiger_charges(u.atoms) + assert_equal(values, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_tables.py b/testsuite/MDAnalysisTests/topology/test_tables.py new file mode 100644 index 0000000000..37246ad186 --- /dev/null +++ b/testsuite/MDAnalysisTests/topology/test_tables.py @@ -0,0 +1,35 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2024 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2.1 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest + +from importlib import reload +import pytest + +from MDAnalysis.topology import tables + + +def test_moved_to_guessers_warning(): + wmsg = "has been moved to MDAnalysis.guesser.tables" + with pytest.warns(DeprecationWarning, match=wmsg): + reload(tables) +