From 8939b3e74dc78fc307291a4471a43fb70c8c1628 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Thu, 26 Sep 2024 03:03:56 +0200 Subject: [PATCH 01/15] Adjust num of parts to accomodate number of frames (#4710) * Fixes #4685 * Changes made in this Pull Request: - set n_parts to the total number of frames being analyzed if n_parts is bigger and warn - add test - update CHANGELOG --- package/CHANGELOG | 2 ++ package/MDAnalysis/analysis/base.py | 7 +++++++ testsuite/MDAnalysisTests/analysis/test_base.py | 14 ++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index d799b52e4e..35e9e0e3cb 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,6 +23,8 @@ The rules for this file: * 2.8.0 Fixes + * set `n_parts` to the total number of frames being analyzed if + `n_parts` is bigger. (Issue #4685) * Catch higher dimensional indexing in GroupBase & ComponentBase (Issue #4647) * Do not raise an Error reading H5MD files with datasets like `observables//` (part of Issue #4598, PR #4615) diff --git a/package/MDAnalysis/analysis/base.py b/package/MDAnalysis/analysis/base.py index 47a7eccd13..930f4fa90c 100644 --- a/package/MDAnalysis/analysis/base.py +++ b/package/MDAnalysis/analysis/base.py @@ -577,6 +577,13 @@ def _setup_computation_groups( # similar to list(enumerate(frames)) enumerated_frames = np.vstack([np.arange(len(used_frames)), used_frames]).T + if len(enumerated_frames) == 0: + return [np.empty((0, 2), dtype=np.int64)] + elif len(enumerated_frames) < n_parts: + # Issue #4685 + n_parts = len(enumerated_frames) + warnings.warn(f"Set `n_parts` to {n_parts} to match the total " + "number of frames being analyzed") return np.array_split(enumerated_frames, n_parts) diff --git a/testsuite/MDAnalysisTests/analysis/test_base.py b/testsuite/MDAnalysisTests/analysis/test_base.py index 6e6b792196..623b2ca5fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_base.py +++ b/testsuite/MDAnalysisTests/analysis/test_base.py @@ -198,6 +198,20 @@ def test_start_stop_step_parallel(u, run_kwargs, frames, client_FrameAnalysis): assert_almost_equal(an.times, frames+1, decimal=4, err_msg=TIMES_ERR) +def test_reset_n_parts_to_n_frames(u): + """ + Issue #4685 + https://github.com/MDAnalysis/mdanalysis/issues/4685 + """ + a = FrameAnalysis(u.trajectory) + with pytest.warns(UserWarning, match='Set `n_parts` to'): + a.run(backend='multiprocessing', + start=0, + stop=1, + n_workers=2, + n_parts=2) + + @pytest.mark.parametrize('run_kwargs,frames', [ ({}, np.arange(98)), ({'start': 20}, np.arange(20, 98)), From b7f91a5e4d70786e1539576bd012484b5685b365 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Fri, 27 Sep 2024 01:23:52 -0700 Subject: [PATCH 02/15] Fix docs in timestep.pyx (#4719) - changed outdated reference of AtomGroup.coordinates() to AtomGroup.positions - changed outdated reference of AtomGroup.velocities() to AtomGroup.velocities - minor doc fixes --- package/MDAnalysis/coordinates/timestep.pyx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/coordinates/timestep.pyx b/package/MDAnalysis/coordinates/timestep.pyx index 09304ec85e..f2649d44c6 100644 --- a/package/MDAnalysis/coordinates/timestep.pyx +++ b/package/MDAnalysis/coordinates/timestep.pyx @@ -70,11 +70,11 @@ MDAnalysis. .. Note:: Normally one does not directly access :attr:`_pos` but uses - the :meth:`~MDAnalysis.core.groups.AtomGroup.coordinates` - method of an :class:`~MDAnalysis.core.groups.AtomGroup` but + the :attr:`~MDAnalysis.core.groups.AtomGroup.positions` + attribute of a :class:`~MDAnalysis.core.groups.AtomGroup` but sometimes it can be faster to directly use the raw coordinates. Any changes to this array are immediately - reflected in atom positions. If the frame is written to a new + reflected in the atom positions. If the frame is written to a new trajectory then the coordinates are changed. If a new trajectory frame is loaded, then *all* contents of :attr:`_pos` are overwritten. @@ -83,17 +83,17 @@ MDAnalysis. :class:`numpy.ndarray` of dtype :class:`~numpy.float32`. of shape (*n_atoms*, 3), holding the raw velocities (in MDAnalysis - units, i.e. typically Å/ps). + units, i.e., Å/ps). .. Note:: Normally velocities are accessed through the - :attr:`velocities` or the - :meth:`~MDAnalysis.core.groups.AtomGroup.velocities` - method of an :class:`~MDAnalysis.core.groups.AtomGroup` + :attr:`Timestep.velocities` attribute or the + :attr:`~MDAnalysis.core.groups.AtomGroup.velocities` + attribute of an :class:`~MDAnalysis.core.groups.AtomGroup` :attr:`~Timestep._velocities` only exists if the :attr:`has_velocities` - flag is True + flag is ``True``. .. versionadded:: 0.7.5 @@ -103,7 +103,7 @@ MDAnalysis. (*n_atoms*, 3), holding the forces :attr:`~Timestep._forces` only exists if :attr:`has_forces` - is True + is ``True``. .. versionadded:: 0.11.0 Added as optional to :class:`Timestep` From 84ee67b99fc3bf165d2f58057fac3315d8bb33af Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Fri, 27 Sep 2024 02:59:17 -0700 Subject: [PATCH 03/15] added parallelization in analysis.dihedrals (#4682) * fix #4673 * Changes made in this Pull Request: - parallelized Dihedral, Ramachandran, Janin - added tests (including fixtures in analysis/conftest.py) - update CHANGELOG Co-authored-by: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> --- package/CHANGELOG | 10 +- package/MDAnalysis/analysis/dihedrals.py | 27 +++++- package/MDAnalysis/analysis/rms.py | 14 +-- .../MDAnalysisTests/analysis/conftest.py | 31 +++++- .../analysis/test_dihedrals.py | 94 +++++++++++++------ .../MDAnalysisTests/analysis/test_rms.py | 6 +- 6 files changed, 133 insertions(+), 49 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 35e9e0e3cb..62ab8a0463 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -38,14 +38,14 @@ Fixes * Fix `MDAnalysis.analysis.align.AlignTraj` not accepting writer kwargs (Issue #4564, PR #4565) * Fix #4259 via removing argument `parallelizable` of `NoJump` transformation. - * Fix doctest errors of analysis/pca.py related to rounding issues + * Fix doctest errors of analysis/pca.py related to rounding issues (Issue #3925, PR #4377) * Convert openmm Quantity to raw value for KE and PE in OpenMMSimulationReader. * Atomname methods can handle empty groups (Issue #2879, PR #4529) * Add support for TPR files produced by Gromacs 2024.1 (PR #4523) * Remove mutable data from ``progressbar_kwargs`` argument in ``AnalysisBase.run()`` (PR #4459) - * Fix ChainReader `__repr__()` method when sub-reader is MemoryReader + * Fix ChainReader `__repr__()` method when sub-reader is MemoryReader (Issue #3349, PR #4407) * Fix bug in PCA preventing use of `frames=...` syntax (PR #4423) * Fix `analysis/diffusionmap.py` iteration through trajectory to iteration @@ -61,15 +61,17 @@ Enhancements * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) * enables parallelization for analysis.bat.BAT (Issue #4663) + * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} + (Issue #4673) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` (PR #4531) * Improved performance of PDBWriter (Issue #2785, PR #4472) * Added parsing of arbitrary columns of the LAMMPS dump parser. (Issue #3504) - * Documented the r0 attribute in the `Contacts` class and added the + * Documented the r0 attribute in the `Contacts` class and added the `n_initial_contacts` attribute, with documentation. (Issue #2604, PR #4415) - * Implement average structures with iterative algorithm from + * Implement average structures with iterative algorithm from DOI 10.1021/acs.jpcb.7b11988. (Issue #2039, PR #4524) Changes diff --git a/package/MDAnalysis/analysis/dihedrals.py b/package/MDAnalysis/analysis/dihedrals.py index 29c8eee0bd..56b95fc42c 100644 --- a/package/MDAnalysis/analysis/dihedrals.py +++ b/package/MDAnalysis/analysis/dihedrals.py @@ -245,7 +245,7 @@ import warnings import MDAnalysis as mda -from MDAnalysis.analysis.base import AnalysisBase +from MDAnalysis.analysis.base import AnalysisBase, ResultsGroup from MDAnalysis.lib.distances import calc_dihedrals from MDAnalysis.analysis.data.filenames import Rama_ref, Janin_ref @@ -267,8 +267,16 @@ class Dihedral(AnalysisBase): .. versionchanged:: 2.0.0 :attr:`angles` results are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. - + .. versionchanged:: 2.8.0 + introduced :meth:`get_supported_backends` allowing for parallel + execution on ``multiprocessing`` and ``dask`` backends. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + def __init__(self, atomgroups, **kwargs): """Parameters @@ -298,6 +306,9 @@ def __init__(self, atomgroups, **kwargs): def _prepare(self): self.results.angles = [] + def _get_aggregator(self): + return ResultsGroup(lookup={'angles': ResultsGroup.ndarray_vstack}) + def _single_frame(self): angle = calc_dihedrals(self.ag1.positions, self.ag2.positions, self.ag3.positions, self.ag4.positions, @@ -379,8 +390,15 @@ class Ramachandran(AnalysisBase): .. versionchanged:: 2.0.0 :attr:`angles` results are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. - + .. versionchanged:: 2.8.0 + introduced :meth:`get_supported_backends` allowing for parallel + execution on ``multiprocessing`` and ``dask`` backends. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', check_protein=True, **kwargs): @@ -437,6 +455,9 @@ def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', def _prepare(self): self.results.angles = [] + def _get_aggregator(self): + return ResultsGroup(lookup={'angles': ResultsGroup.ndarray_vstack}) + def _single_frame(self): phi_angles = calc_dihedrals(self.ag1.positions, self.ag2.positions, self.ag3.positions, self.ag4.positions, diff --git a/package/MDAnalysis/analysis/rms.py b/package/MDAnalysis/analysis/rms.py index afb11ed7d2..b8dcb97065 100644 --- a/package/MDAnalysis/analysis/rms.py +++ b/package/MDAnalysis/analysis/rms.py @@ -103,16 +103,16 @@ ax.set_xlabel("time (ps)") ax.set_ylabel(r"RMSD ($\\AA$)") fig.savefig("rmsd_all_CORE_LID_NMP_ref1AKE.pdf") - + .. _figure-RMSD: .. figure:: /images/RSMD_plot.png :scale: 50 % :alt: RMSD plot - + RMSD plot for backbone and CORE, LID, NMP domain of the protein. - + Functions --------- @@ -359,8 +359,8 @@ class RMSD(AnalysisBase): :attr:`rmsd` results are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. .. versionchanged:: 2.8.0 - introduced a :meth:`get_supported_backends` allowing for execution on with - ``multiprocessing`` and ``dask`` backends. + introduced :meth:`get_supported_backends` allowing for parallel + execution on ``multiprocessing`` and ``dask`` backends. """ _analysis_algorithm_is_parallelizable = True @@ -427,8 +427,8 @@ def __init__(self, atomgroup, reference=None, select='all', weights_groupselections : False or list of {"mass", ``None`` or array_like} (optional) 1. ``False`` will apply imposed weights to `groupselections` from - ``weights`` option if ``weights`` is either ``"mass"`` or ``None``. - Otherwise will assume a list of length equal to length of + ``weights`` option if ``weights`` is either ``"mass"`` or ``None``. + Otherwise will assume a list of length equal to length of `groupselections` filled with ``None`` values. 2. A list of {"mass", ``None`` or array_like} with the length of `groupselections` diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index f53cdcfaac..5c6157d3bb 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -7,10 +7,10 @@ OldAPIAnalysis, ) from MDAnalysis.analysis.rms import RMSD, RMSF +from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin from MDAnalysis.analysis.bat import BAT -from MDAnalysis.lib.util import is_installed from MDAnalysis.analysis.gnm import GNMAnalysis - +from MDAnalysis.lib.util import is_installed def params_for_cls(cls, exclude: list[str] = None): """ @@ -81,6 +81,8 @@ def client_OldAPIAnalysis(request): return request.param +# MDAnalysis.analysis.rms + @pytest.fixture(scope='module', params=params_for_cls(RMSD)) def client_RMSD(request): return request.param @@ -91,11 +93,32 @@ def client_RMSF(request): return request.param +# MDAnalysis.analysis.dihedrals + +@pytest.fixture(scope='module', params=params_for_cls(Dihedral)) +def client_Dihedral(request): + return request.param + + +@pytest.fixture(scope='module', params=params_for_cls(Ramachandran)) +def client_Ramachandran(request): + return request.param + + +@pytest.fixture(scope='module', params=params_for_cls(Janin)) +def client_Janin(request): + return request.param + + +# MDAnalysis.analysis.gnm + @pytest.fixture(scope='module', params=params_for_cls(GNMAnalysis)) def client_GNMAnalysis(request): return request.param - + +# MDAnalysis.analysis.bat + @pytest.fixture(scope='module', params=params_for_cls(BAT)) def client_BAT(request): - return request.param \ No newline at end of file + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_dihedrals.py b/testsuite/MDAnalysisTests/analysis/test_dihedrals.py index 731d5cb039..c5d291bf96 100644 --- a/testsuite/MDAnalysisTests/analysis/test_dihedrals.py +++ b/testsuite/MDAnalysisTests/analysis/test_dihedrals.py @@ -30,6 +30,7 @@ DihedralsArray, RamaArray, GLYRamaArray, JaninArray, LYSJaninArray, PDB_rama, PDB_janin) +import MDAnalysis.analysis.dihedrals from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin @@ -42,36 +43,42 @@ def atomgroup(self): return ag - def test_dihedral(self, atomgroup): - dihedral = Dihedral([atomgroup]).run() + def test_dihedral(self, atomgroup, client_Dihedral): + # client_Dihedral is defined in testsuite/analysis/conftest.py + # among with other testing fixtures. During testing, it will + # collect all possible backends and reasonable number of workers + # for a given AnalysisBase subclass, and extend the tests + # to run with all of them. + + dihedral = Dihedral([atomgroup]).run(**client_Dihedral) test_dihedral = np.load(DihedralArray) assert_allclose(dihedral.results.angles, test_dihedral, rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test values") - def test_dihedral_single_frame(self, atomgroup): - dihedral = Dihedral([atomgroup]).run(start=5, stop=6) + def test_dihedral_single_frame(self, atomgroup, client_Dihedral): + dihedral = Dihedral([atomgroup]).run(start=5, stop=6, **client_Dihedral) test_dihedral = [np.load(DihedralArray)[5]] assert_allclose(dihedral.results.angles, test_dihedral, rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test vales") - def test_atomgroup_list(self, atomgroup): - dihedral = Dihedral([atomgroup, atomgroup]).run() + def test_atomgroup_list(self, atomgroup, client_Dihedral): + dihedral = Dihedral([atomgroup, atomgroup]).run(**client_Dihedral) test_dihedral = np.load(DihedralsArray) assert_allclose(dihedral.results.angles, test_dihedral, rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test values") - def test_enough_atoms(self, atomgroup): + def test_enough_atoms(self, atomgroup, client_Dihedral): with pytest.raises(ValueError): - dihedral = Dihedral([atomgroup[:2]]).run() + dihedral = Dihedral([atomgroup[:2]]).run(**client_Dihedral) - def test_dihedral_attr_warning(self, atomgroup): - dihedral = Dihedral([atomgroup]).run(stop=2) + def test_dihedral_attr_warning(self, atomgroup, client_Dihedral): + dihedral = Dihedral([atomgroup]).run(stop=2, **client_Dihedral) wmsg = "The `angle` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): @@ -88,37 +95,39 @@ def universe(self): def rama_ref_array(self): return np.load(RamaArray) - def test_ramachandran(self, universe, rama_ref_array): - rama = Ramachandran(universe.select_atoms("protein")).run() + def test_ramachandran(self, universe, rama_ref_array, client_Ramachandran): + rama = Ramachandran(universe.select_atoms("protein")).run( + **client_Ramachandran) assert_allclose(rama.results.angles, rama_ref_array, rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test values") - def test_ramachandran_single_frame(self, universe, rama_ref_array): + def test_ramachandran_single_frame(self, universe, rama_ref_array, client_Ramachandran): rama = Ramachandran(universe.select_atoms("protein")).run( - start=5, stop=6) + start=5, stop=6, **client_Ramachandran) assert_allclose(rama.results.angles[0], rama_ref_array[5], rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test values") - def test_ramachandran_residue_selections(self, universe): - rama = Ramachandran(universe.select_atoms("resname GLY")).run() + def test_ramachandran_residue_selections(self, universe, client_Ramachandran): + rama = Ramachandran(universe.select_atoms("resname GLY")).run( + **client_Ramachandran) test_rama = np.load(GLYRamaArray) assert_allclose(rama.results.angles, test_rama, rtol=0, atol=1.5e-5, err_msg="error: dihedral angles should " "match test values") - def test_outside_protein_length(self, universe): + def test_outside_protein_length(self, universe, client_Ramachandran): with pytest.raises(ValueError): rama = Ramachandran(universe.select_atoms("resid 220"), - check_protein=True).run() + check_protein=True).run(**client_Ramachandran) - def test_outside_protein_unchecked(self, universe): + def test_outside_protein_unchecked(self, universe, client_Ramachandran): rama = Ramachandran(universe.select_atoms("resid 220"), - check_protein=False).run() + check_protein=False).run(**client_Ramachandran) def test_protein_ends(self, universe): with pytest.warns(UserWarning) as record: @@ -158,15 +167,15 @@ def universe_tpr(self): def janin_ref_array(self): return np.load(JaninArray) - def test_janin(self, universe, janin_ref_array): - self._test_janin(universe, janin_ref_array) + def test_janin(self, universe, janin_ref_array, client_Janin): + self._test_janin(universe, janin_ref_array, client_Janin) - def test_janin_tpr(self, universe_tpr, janin_ref_array): + def test_janin_tpr(self, universe_tpr, janin_ref_array, client_Janin): """Test that CYSH are filtered (#2898)""" - self._test_janin(universe_tpr, janin_ref_array) + self._test_janin(universe_tpr, janin_ref_array, client_Janin) - def _test_janin(self, u, ref_array): - janin = Janin(u.select_atoms("protein")).run() + def _test_janin(self, u, ref_array, client_Janin): + janin = Janin(u.select_atoms("protein")).run(**client_Janin) # Test precision lowered to account for platform differences with osx assert_allclose(janin.results.angles, ref_array, rtol=0, atol=1.5e-3, @@ -180,8 +189,8 @@ def test_janin_single_frame(self, universe, janin_ref_array): err_msg="error: dihedral angles should " "match test values") - def test_janin_residue_selections(self, universe): - janin = Janin(universe.select_atoms("resname LYS")).run() + def test_janin_residue_selections(self, universe, client_Janin): + janin = Janin(universe.select_atoms("resname LYS")).run(**client_Janin) test_janin = np.load(LYSJaninArray) assert_allclose(janin.results.angles, test_janin, rtol=0, atol=1.5e-3, @@ -213,3 +222,32 @@ def test_janin_attr_warning(self, universe): wmsg = "The `angle` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): assert_equal(janin.angles, janin.results.angles) + + +# tests for parallelization + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (MDAnalysis.analysis.dihedrals.Dihedral, True), + (MDAnalysis.analysis.dihedrals.Ramachandran, True), + (MDAnalysis.analysis.dihedrals.Janin, True), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname._analysis_algorithm_is_parallelizable == is_parallelizable + + +@pytest.mark.parametrize( + "classname,backends", + [ + (MDAnalysis.analysis.dihedrals.Dihedral, + ('serial', 'multiprocessing', 'dask',)), + (MDAnalysis.analysis.dihedrals.Ramachandran, + ('serial', 'multiprocessing', 'dask',)), + (MDAnalysis.analysis.dihedrals.Janin, + ('serial', 'multiprocessing', 'dask',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rms.py b/testsuite/MDAnalysisTests/analysis/test_rms.py index d42993feb4..87822e0b79 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rms.py +++ b/testsuite/MDAnalysisTests/analysis/test_rms.py @@ -267,7 +267,7 @@ def test_custom_groupselection_weights_applied_1D_array(self, universe, client_R select='backbone', groupselections=['name CA and resid 1-5', 'name CA and resid 1'], weights=None, - weights_groupselections=[[1, 0, 0, 0, 0], None]).run(step=49, + weights_groupselections=[[1, 0, 0, 0, 0], None]).run(step=49, **client_RMSD ) @@ -281,7 +281,7 @@ def test_custom_groupselection_weights_applied_mass(self, universe, correct_valu groupselections=['all', 'all'], weights=None, weights_groupselections=['mass', - universe.atoms.masses]).run(step=49, + universe.atoms.masses]).run(step=49, **client_RMSD ) @@ -439,7 +439,7 @@ def test_rmsf_attr_warning(self, universe, client_RMSF): (MDAnalysis.analysis.rms.RMSF, False), ] ) -def test_not_parallelizable(classname, is_parallelizable): +def test_class_is_parallelizable(classname, is_parallelizable): assert classname._analysis_algorithm_is_parallelizable == is_parallelizable From 78edac04b9048ed0986e54fc5429a60f32215da4 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Sat, 28 Sep 2024 13:39:56 -0700 Subject: [PATCH 04/15] [doc] fix versionchanged display in LAMMPS DumpsReader (#4720) fix: versionchanged was not displayed correctly in coordinates.LAMMPS.DumpReader --- package/MDAnalysis/coordinates/LAMMPS.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index e772a3e392..5099c742fc 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -547,6 +547,7 @@ class DumpReader(base.ReaderBase): **kwargs Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase` + .. versionchanged:: 2.7.0 Reading of arbitrary, additional columns is now supported. (Issue #3608) From 56766e9502fc6908b7d2a1b906068b6b74d4c2b0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 21:28:40 +0100 Subject: [PATCH 05/15] Bump the github-actions group with 2 updates (#4723) Bumps the github-actions group with 2 updates: [actions/setup-python](https://github.com/actions/setup-python) and [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `actions/setup-python` from 4 to 5 - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) Updates `pypa/gh-action-pypi-publish` from 1.10.0 to 1.10.2 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.10.0...v1.10.2) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github-actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/deploy.yaml | 8 ++++---- .github/workflows/gh-ci-cron.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 377575ef8c..09a79a64c2 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -142,7 +142,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_source_and_wheels - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: skip_existing: true repository_url: https://test.pypi.org/legacy/ @@ -171,7 +171,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_tests - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: packages_dir: testsuite/dist skip_existing: true @@ -201,7 +201,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_source_and_wheels - uses: pypa/gh-action-pypi-publish@v1.10.0 + uses: pypa/gh-action-pypi-publish@v1.10.2 upload_pypi_mdanalysistests: if: | @@ -227,7 +227,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_tests - uses: pypa/gh-action-pypi-publish@v1.10.0 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: packages_dir: testsuite/dist diff --git a/.github/workflows/gh-ci-cron.yaml b/.github/workflows/gh-ci-cron.yaml index 4585fc4de7..0df5ad460f 100644 --- a/.github/workflows/gh-ci-cron.yaml +++ b/.github/workflows/gh-ci-cron.yaml @@ -40,7 +40,7 @@ jobs: with: os-type: "ubuntu" - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} From 650747d2612feace9b9704e28e08cd06bce46857 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Wed, 2 Oct 2024 10:43:06 +0200 Subject: [PATCH 06/15] Add black configuration ignoring all files (#4717) * add black configuration, ignoring all files * black * black version * adios 39 --- package/pyproject.toml | 6 ++++++ testsuite/pyproject.toml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/package/pyproject.toml b/package/pyproject.toml index 05b0eb3ba1..3d1ea04bdd 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -121,3 +121,9 @@ find = {} MDAnalysis = [ 'analysis/data/*.npy', ] + +[tool.black] +line-length = 79 +target-version = ['py310', 'py311', 'py312'] +extend-exclude = '.' +required-version = '24' diff --git a/testsuite/pyproject.toml b/testsuite/pyproject.toml index 4af3b1d85c..a757271db9 100644 --- a/testsuite/pyproject.toml +++ b/testsuite/pyproject.toml @@ -151,3 +151,9 @@ filterwarnings = [ # NamedStream warnings "ignore:Constructed NamedStream:RuntimeWarning", ] + +[tool.black] +line-length = 79 +target-version = ['py310', 'py311', 'py312'] +extend-exclude = '.' +required-version = '24' From 474be5bbe32270bb9ddf02dc3cab74d3c1312c5e Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 06:17:42 +0200 Subject: [PATCH 07/15] Implementation of Parallelization to analysis.hydrogenbonds.hbond_analysis (#4718) - Fixes #4664 - Parallelization of the backend support to the class HydrogenBondAnalysis in hbond_analysis.py - Moved setting up of donors and acceptors from _prepare() to __init__() (needed to make parallel processing work) - Addition of parallelization tests in test_hydrogenbonds_analysis.py and fixtures in conftest.py - Updated Changelog --------- Co-authored-by: Yuxuan Zhuang Co-authored-by: Oliver Beckstein --- package/CHANGELOG | 3 +- .../analysis/hydrogenbonds/hbond_analysis.py | 36 +++++++++----- .../MDAnalysisTests/analysis/conftest.py | 10 ++++ .../analysis/test_hydrogenbonds_analysis.py | 48 ++++++++++--------- 4 files changed, 62 insertions(+), 35 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 62ab8a0463..ec5fe28eca 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -62,7 +62,8 @@ Enhancements * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) * enables parallelization for analysis.bat.BAT (Issue #4663) * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} - (Issue #4673) + (Issue #4673) + * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py index e0597e2c3d..3bf9d5c27a 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -241,7 +241,7 @@ import numpy as np -from ..base import AnalysisBase, Results +from ..base import AnalysisBase, Results, ResultsGroup from MDAnalysis.lib.distances import capped_distance, calc_angles from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency from MDAnalysis.exceptions import NoDataError @@ -267,6 +267,12 @@ class HydrogenBondAnalysis(AnalysisBase): Perform an analysis of hydrogen bonds in a Universe. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + def __init__(self, universe, donors_sel=None, hydrogens_sel=None, acceptors_sel=None, between=None, d_h_cutoff=1.2, @@ -335,7 +341,9 @@ def __init__(self, universe, .. versionchanged:: 2.4.0 Added use of atom types in selection strings for hydrogen atoms, bond donors, or bond acceptors - + .. versionchanged:: 2.8.0 + Introduced :meth:`get_supported_backends` allowing for parallel execution on + :mod:`multiprocessing` and :mod:`dask` backends. """ self.u = universe @@ -383,6 +391,17 @@ def __init__(self, universe, self.results = Results() self.results.hbonds = None + # Set atom selections if they have not been provided + if self.acceptors_sel is None: + self.acceptors_sel = self.guess_acceptors() + if self.hydrogens_sel is None: + self.hydrogens_sel = self.guess_hydrogens() + + # Select atom groups + self._acceptors = self.u.select_atoms(self.acceptors_sel, + updating=self.update_selections) + self._donors, self._hydrogens = self._get_dh_pairs() + def guess_hydrogens(self, select='all', max_mass=1.1, @@ -699,16 +718,6 @@ def _filter_atoms(self, donors, acceptors): def _prepare(self): self.results.hbonds = [[], [], [], [], [], []] - # Set atom selections if they have not been provided - if self.acceptors_sel is None: - self.acceptors_sel = self.guess_acceptors() - if self.hydrogens_sel is None: - self.hydrogens_sel = self.guess_hydrogens() - - # Select atom groups - self._acceptors = self.u.select_atoms(self.acceptors_sel, - updating=self.update_selections) - self._donors, self._hydrogens = self._get_dh_pairs() def _single_frame(self): @@ -788,6 +797,9 @@ def _conclude(self): self.results.hbonds = np.asarray(self.results.hbonds).T + def _get_aggregator(self): + return ResultsGroup(lookup={'hbonds': ResultsGroup.ndarray_hstack}) + @property def hbonds(self): wmsg = ("The `hbonds` attribute was deprecated in MDAnalysis 2.0.0 " diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 5c6157d3bb..b5fe975dcd 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -10,8 +10,12 @@ from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin from MDAnalysis.analysis.bat import BAT from MDAnalysis.analysis.gnm import GNMAnalysis +from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( + HydrogenBondAnalysis, +) from MDAnalysis.lib.util import is_installed + def params_for_cls(cls, exclude: list[str] = None): """ This part contains fixtures for simultaneous testing @@ -122,3 +126,9 @@ def client_GNMAnalysis(request): @pytest.fixture(scope='module', params=params_for_cls(BAT)) def client_BAT(request): return request.param + +# MDAnalysis.analysis.hydrogenbonds + +@pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis)) +def client_HydrogenBondAnalysis(request): + return request.param \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py index b8ea644fc4..503560a648 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -53,9 +53,9 @@ def universe(): } @pytest.fixture(scope='class') - def h(self, universe): + def h(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, **self.kwargs) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_hbond_analysis(self, h): @@ -181,12 +181,12 @@ def universe(): @staticmethod @pytest.fixture(scope='class') - def hydrogen_bonds(universe): + def hydrogen_bonds(universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis( universe, **TestHydrogenBondAnalysisIdeal.kwargs ) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_count_by_type(self, hydrogen_bonds): @@ -208,9 +208,12 @@ def test_no_bond_info_exception(self, universe): 'd_h_a_angle_cutoff': 120.0 } + u = universe.copy() + n_residues = 2 + u.add_TopologyAttr('mass', [15.999, 1.008, 1.008] * n_residues) + u.add_TopologyAttr('charge', [-1.04, 0.52, 0.52] * n_residues) with pytest.raises(NoDataError, match="no bond information"): - h = HydrogenBondAnalysis(universe, **kwargs) - h._get_dh_pairs() + h = HydrogenBondAnalysis(u, **kwargs) def test_no_bond_donor_sel(self, universe): @@ -263,10 +266,11 @@ def test_no_attr_hbonds(self, universe): with pytest.raises(NoDataError, match=".hbonds attribute is None"): hbonds.lifetime(tau_max=2, intermittency=1) - def test_logging_step_not_1(self, universe, caplog): + def test_logging_step_not_1(self, universe, caplog, + client_HydrogenBondAnalysis): hbonds = HydrogenBondAnalysis(universe, **self.kwargs) # using step 2 - hbonds.run(step=2) + hbonds.run(**client_HydrogenBondAnalysis, step=2) caplog.set_level(logging.WARNING) hbonds.lifetime(tau_max=2, intermittency=1) @@ -342,12 +346,12 @@ def universe(): @staticmethod @pytest.fixture(scope='class') - def hydrogen_bonds(universe): + def hydrogen_bonds(universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis( universe, **TestHydrogenBondAnalysisNoRes.kwargs ) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_no_hydrogen_bonds(self, universe): @@ -441,10 +445,10 @@ def universe(): return u - def test_between_all(self, universe): + def test_between_all(self, universe, client_HydrogenBondAnalysis): # don't specify groups between which to find hydrogen bonds hbonds = HydrogenBondAnalysis(universe, between=None, **self.kwargs) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -457,14 +461,14 @@ def test_between_all(self, universe): expected_hbond_indices) assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances) - def test_between_PW(self, universe): + def test_between_PW(self, universe, client_HydrogenBondAnalysis): # Find only protein-water hydrogen bonds hbonds = HydrogenBondAnalysis( universe, between=["resname PROT", "resname SOL"], **self.kwargs ) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -475,7 +479,7 @@ def test_between_PW(self, universe): expected_hbond_indices) assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances) - def test_between_PW_PP(self, universe): + def test_between_PW_PP(self, universe, client_HydrogenBondAnalysis): # Find protein-water and protein-protein hydrogen bonds (not # water-water) hbonds = HydrogenBondAnalysis( @@ -486,7 +490,7 @@ def test_between_PW_PP(self, universe): ], **self.kwargs ) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -512,7 +516,7 @@ class TestHydrogenBondAnalysisTIP3P_GuessAcceptors_GuessHydrogens_UseTopology_(T 'd_h_a_angle_cutoff': 120.0 } - def test_no_hydrogens(self, universe): + def test_no_hydrogens(self, universe, client_HydrogenBondAnalysis): # If no hydrogens are identified at a given frame, check an # empty donor atom group is created test_kwargs = TestHydrogenBondAnalysisTIP3P.kwargs.copy() @@ -520,7 +524,7 @@ def test_no_hydrogens(self, universe): test_kwargs['hydrogens_sel'] = "name H" # no atoms have name H h = HydrogenBondAnalysis(universe, **test_kwargs) - h.run() + h.run(**client_HydrogenBondAnalysis) assert h._hydrogens.n_atoms == 0 assert h._donors.n_atoms == 0 @@ -629,9 +633,9 @@ def universe(): } @pytest.fixture(scope='class') - def h(self, universe): + def h(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, **self.kwargs) - h.run(start=1, step=2) + h.run(**client_HydrogenBondAnalysis, start=1, step=2) return h def test_hbond_analysis(self, h): @@ -690,11 +694,11 @@ def test_empty_sel(self, universe, seltype): with pytest.warns(UserWarning, match=self.msg.format(seltype)): HydrogenBondAnalysis(universe, **sel_kwarg) - def test_hbond_analysis(self, universe): + def test_hbond_analysis(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, donors_sel=' ', hydrogens_sel=' ', acceptors_sel=' ') - h.run() + h.run(**client_HydrogenBondAnalysis) assert h.donors_sel == '' assert h.hydrogens_sel == '' From 740e74e8c61ea01a4b2120bd369b11a58cb9c304 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:07:07 +0200 Subject: [PATCH 08/15] Implementation of parallelization to `MDAnalysis.analysis.dssp` (#4721) - Fixes #4674 - Parallelization of the backend support to the class DSSP in dssp.py - Addition of parallelization tests in test_dssp.py and fixtures in conftest.py - Updated Changelog --------- Co-authored-by: Yuxuan Zhuang --- package/CHANGELOG | 3 +- package/MDAnalysis/analysis/dssp/dssp.py | 29 ++++++++++++++--- .../MDAnalysisTests/analysis/conftest.py | 11 ++++++- .../MDAnalysisTests/analysis/test_dssp.py | 31 ++++++++++--------- 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index ec5fe28eca..b284ffddee 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -62,7 +62,8 @@ Enhancements * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) * enables parallelization for analysis.bat.BAT (Issue #4663) * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} - (Issue #4673) + (Issue #4673) + * enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) diff --git a/package/MDAnalysis/analysis/dssp/dssp.py b/package/MDAnalysis/analysis/dssp/dssp.py index d2eccf0947..21dd9423e4 100644 --- a/package/MDAnalysis/analysis/dssp/dssp.py +++ b/package/MDAnalysis/analysis/dssp/dssp.py @@ -148,7 +148,7 @@ import numpy as np from MDAnalysis import Universe, AtomGroup -from ..base import AnalysisBase +from ..base import AnalysisBase, ResultsGroup from ...due import due, Doi due.cite( @@ -196,7 +196,7 @@ class DSSP(AnalysisBase): .. Warning:: For DSSP to work properly, your atoms must represent a protein. The hydrogen atom bound to the backbone nitrogen atom is matched by name - as given by the keyword argument `hydrogen_atom`. There may only be + as given by the keyword argument `hydrogen_atom`. There may only be a single backbone nitrogen hydrogen atom per residue; the one exception is proline, for which there should not exist any such hydrogens. The default value of `hydrogen_atom` should handle the common naming @@ -229,8 +229,8 @@ class DSSP(AnalysisBase): (except proline), namely the one bound to the backbone nitrogen. .. Note:: - To work with different hydrogen-naming conventions by default, the - default selection is broad but if hydrogens are incorrectly selected + To work with different hydrogen-naming conventions by default, the + default selection is broad but if hydrogens are incorrectly selected (e.g., a :exc:`ValueError` is raised) you must customize `hydrogen_name` for your specific case. @@ -263,7 +263,7 @@ class DSSP(AnalysisBase): The :attr:`results.dssp_ndarray` attribute holds a ``(n_frames, n_residues, 3)`` shape ndarray with a *one-hot encoding* of *loop* '-' (index 0), *helix* 'H' (index 1), and *sheet* 'E' - (index 2), respectively for each frame of the trajectory. It can be + (index 2), respectively for each frame of the trajectory. It can be used to compute, for instance, the **average secondary structure**: >>> from MDAnalysis.analysis.dssp import translate, DSSP @@ -276,8 +276,23 @@ class DSSP(AnalysisBase): .. versionadded:: 2.8.0 + + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ( + "serial", + "multiprocessing", + "dask", + ) + + def __init__( self, atoms: Union[Universe, AtomGroup], @@ -382,6 +397,10 @@ def _conclude(self): self.results.dssp_ndarray = np.array(self.results.dssp_ndarray) self.results.resids = self._heavy_atoms["CA"].resids + def _get_aggregator(self): + return ResultsGroup( + lookup={"dssp_ndarray": ResultsGroup.flatten_sequence}, + ) def translate(onehot: np.ndarray) -> np.ndarray: """Translate a one-hot encoding summary into char-based secondary structure diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index b5fe975dcd..fc3c8a480c 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -10,6 +10,7 @@ from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin from MDAnalysis.analysis.bat import BAT from MDAnalysis.analysis.gnm import GNMAnalysis +from MDAnalysis.analysis.dssp.dssp import DSSP from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( HydrogenBondAnalysis, ) @@ -127,8 +128,16 @@ def client_GNMAnalysis(request): def client_BAT(request): return request.param + +# MDAnalysis.analysis.dssp.dssp + +@pytest.fixture(scope="module", params=params_for_cls(DSSP)) +def client_DSSP(request): + return request.param + + # MDAnalysis.analysis.hydrogenbonds @pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis)) def client_HydrogenBondAnalysis(request): - return request.param \ No newline at end of file + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_dssp.py b/testsuite/MDAnalysisTests/analysis/test_dssp.py index 403743d972..f7a1e11893 100644 --- a/testsuite/MDAnalysisTests/analysis/test_dssp.py +++ b/testsuite/MDAnalysisTests/analysis/test_dssp.py @@ -10,31 +10,31 @@ # Files that match glob pattern '????.pdb.gz' and matching '????.pdb.dssp' files, # containing the secondary structure assignment string, will be tested automatically. @pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/?????.pdb.gz")) -def test_file_guess_hydrogens(pdb_filename): +def test_file_guess_hydrogens(pdb_filename, client_DSSP): u = mda.Universe(pdb_filename) with open(f"{pdb_filename.rstrip('.gz')}.dssp", "r") as fin: correct_answ = fin.read().strip().split()[0] - run = DSSP(u, guess_hydrogens=True).run() + run = DSSP(u, guess_hydrogens=True).run(**client_DSSP) answ = "".join(run.results.dssp[0]) assert answ == correct_answ -def test_trajectory(): +def test_trajectory(client_DSSP): u = mda.Universe(TPR, XTC).select_atoms("protein").universe - run = DSSP(u).run(stop=10) + run = DSSP(u).run(**client_DSSP, stop=10) first_frame = "".join(run.results.dssp[0]) last_frame = "".join(run.results.dssp[-1]) avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---" protein = mda.Universe(TPR, XTC).select_atoms("protein") - run = DSSP(protein).run(stop=10) + run = DSSP(protein).run(**client_DSSP, stop=10) -def test_atomgroup(): +def test_atomgroup(client_DSSP): protein = mda.Universe(TPR, XTC).select_atoms("protein") - run = DSSP(protein).run(stop=10) + run = DSSP(protein).run(**client_DSSP, stop=10) first_frame = "".join(run.results.dssp[0]) last_frame = "".join(run.results.dssp[-1]) avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) @@ -42,9 +42,9 @@ def test_atomgroup(): assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---" -def test_trajectory_with_hydrogens(): +def test_trajectory_with_hydrogens(client_DSSP): u = mda.Universe(TPR, XTC).select_atoms("protein").universe - run = DSSP(u, guess_hydrogens=False).run(stop=10) + run = DSSP(u, guess_hydrogens=False).run(**client_DSSP, stop=10) first_frame = "".join(run.results.dssp[0]) last_frame = "".join(run.results.dssp[-1]) avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) @@ -53,28 +53,29 @@ def test_trajectory_with_hydrogens(): @pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/2xdgA.pdb.gz")) -def test_trajectory_without_hydrogen_fails(pdb_filename): +def test_trajectory_without_hydrogen_fails(pdb_filename, client_DSSP): u = mda.Universe(pdb_filename) with pytest.raises(ValueError): - DSSP(u, guess_hydrogens=False).run() + DSSP(u, guess_hydrogens=False).run(**client_DSSP) @pytest.mark.parametrize( "pdb_filename", glob.glob(f"{DSSP_FOLDER}/1mr1D_failing.pdb.gz") ) -def test_trajectory_with_uneven_number_of_atoms_fails(pdb_filename): +def test_trajectory_with_uneven_number_of_atoms_fails(pdb_filename, + client_DSSP): u = mda.Universe(pdb_filename) with pytest.raises(ValueError): - DSSP(u, guess_hydrogens=True).run() + DSSP(u, guess_hydrogens=True).run(**client_DSSP) @pytest.mark.parametrize( "pdb_filename", glob.glob(f"{DSSP_FOLDER}/wrong_hydrogens.pdb.gz") ) -def test_exception_raises_with_atom_index(pdb_filename): +def test_exception_raises_with_atom_index(pdb_filename, client_DSSP): u = mda.Universe(pdb_filename) with pytest.raises( ValueError, match="Residue contains*", ): - DSSP(u, guess_hydrogens=False).run() + DSSP(u, guess_hydrogens=False).run(**client_DSSP) From 599c8db9efcd4cd372792a2bcae3cc555f9e2072 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sun, 13 Oct 2024 23:26:00 +0100 Subject: [PATCH 09/15] Add py3.13 support w/ minimal deps (#4732) * Add Python 3.13 to minimal CI testing, and include some minor test shims to accommodate. --- .github/workflows/deploy.yaml | 4 ++-- .github/workflows/gh-ci-cron.yaml | 4 +--- .github/workflows/gh-ci.yaml | 5 +++++ azure-pipelines.yml | 2 +- package/CHANGELOG | 1 + package/pyproject.toml | 3 ++- testsuite/MDAnalysisTests/analysis/test_base.py | 8 ++++---- testsuite/MDAnalysisTests/lib/test_log.py | 4 ++-- 8 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 09a79a64c2..5db1a4f5c1 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -41,7 +41,7 @@ jobs: - [macos-12, macosx_*, x86_64] - [windows-2019, win_amd64, AMD64] - [macos-14, macosx_*, arm64] - python: ["cp310", "cp311", "cp312"] + python: ["cp310", "cp311", "cp312", "cp313"] defaults: run: working-directory: ./package @@ -243,7 +243,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] type: ["FULL", "MIN"] exclude: # Multiple deps don't like windows diff --git a/.github/workflows/gh-ci-cron.yaml b/.github/workflows/gh-ci-cron.yaml index 0df5ad460f..072d1cecb5 100644 --- a/.github/workflows/gh-ci-cron.yaml +++ b/.github/workflows/gh-ci-cron.yaml @@ -41,8 +41,6 @@ jobs: os-type: "ubuntu" - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} # minimally install nightly wheels & core deps - name: nightly_wheels @@ -197,7 +195,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 8a45aec650..c07535e2f7 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -32,6 +32,11 @@ jobs: full-deps: [true, ] codecov: [true, ] include: + - name: python_313 + os: ubuntu-latest + python-version: "3.13" + full-deps: false + codecov: true - name: macOS_monterey_py311 os: macOS-12 python-version: "3.12" diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 250f3ba63b..27b1c0db3f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -47,7 +47,7 @@ jobs: PYTHON_VERSION: '3.12' PYTHON_ARCH: 'x64' BUILD_TYPE: 'wheel' - NUMPY_MIN: '1.26.0' + NUMPY_MIN: '2.1.0' imageName: 'ubuntu-latest' Linux-Python310-64bit-full-wheel: PYTHON_VERSION: '3.10' diff --git a/package/CHANGELOG b/package/CHANGELOG index b284ffddee..de7fddd266 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -56,6 +56,7 @@ Fixes * Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374) Enhancements + * MDAnalysis now supports Python 3.13 (PR #4732) * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) diff --git a/package/pyproject.toml b/package/pyproject.toml index 3d1ea04bdd..24d726c3ab 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -61,6 +61,7 @@ classifiers = [ 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', 'Programming Language :: C', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Bio-Informatics', @@ -124,6 +125,6 @@ MDAnalysis = [ [tool.black] line-length = 79 -target-version = ['py310', 'py311', 'py312'] +target-version = ['py310', 'py311', 'py312', 'py313'] extend-exclude = '.' required-version = '24' diff --git a/testsuite/MDAnalysisTests/analysis/test_base.py b/testsuite/MDAnalysisTests/analysis/test_base.py index 623b2ca5fe..345e0dc671 100644 --- a/testsuite/MDAnalysisTests/analysis/test_base.py +++ b/testsuite/MDAnalysisTests/analysis/test_base.py @@ -345,17 +345,17 @@ def test_verbose_progressbar(u, capsys): def test_verbose_progressbar_run(u, capsys): FrameAnalysis(u.trajectory).run(verbose=True) _, err = capsys.readouterr() - expected = u'100%|██████████| 98/98 [00:00<00:00, 8799.49it/s]' + expected = u'100%|██████████' actual = err.strip().split('\r')[-1] - assert actual[:24] == expected[:24] + assert actual[:15] == expected def test_verbose_progressbar_run_with_kwargs(u, capsys): FrameAnalysis(u.trajectory).run( verbose=True, progressbar_kwargs={'desc': 'custom'}) _, err = capsys.readouterr() - expected = u'custom: 100%|██████████| 98/98 [00:00<00:00, 8799.49it/s]' + expected = u'custom: 100%|██████████' actual = err.strip().split('\r')[-1] - assert actual[:30] == expected[:30] + assert actual[:23] == expected def test_progressbar_multiprocessing(u): diff --git a/testsuite/MDAnalysisTests/lib/test_log.py b/testsuite/MDAnalysisTests/lib/test_log.py index c0f413ba2d..acaa876df9 100644 --- a/testsuite/MDAnalysisTests/lib/test_log.py +++ b/testsuite/MDAnalysisTests/lib/test_log.py @@ -32,9 +32,9 @@ def test_output(self, capsys): for i in ProgressBar(list(range(10))): pass out, err = capsys.readouterr() - expected = u'100%|██████████| 10/10 [00:00<00:00, 583.67it/s]' + expected = u'100%|██████████' actual = err.strip().split('\r')[-1] - assert actual[:24] == expected[:24] + assert actual[:15] == expected def test_disable(self, capsys): for i in ProgressBar(list(range(10)), disable=True): From 571431a169e5881f7d09930c4d7e633df0f2cd40 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 14 Oct 2024 21:12:14 +0200 Subject: [PATCH 10/15] liners black config ignore formatting git blame --- .git-blame-ignore-revs | 2 + .github/workflows/darkerbot.yaml | 62 ------- .github/workflows/linters.yaml | 71 ++------ maintainer/ci/darker-outcomes.py | 238 -------------------------- package/MDAnalysis/due.py | 21 ++- package/MDAnalysis/topology/tables.py | 44 +++-- package/pyproject.toml | 7 +- 7 files changed, 58 insertions(+), 387 deletions(-) create mode 100644 .git-blame-ignore-revs delete mode 100644 .github/workflows/darkerbot.yaml delete mode 100644 maintainer/ci/darker-outcomes.py diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..1a65ab79f5 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# black +feca12100a2d2251fa2b4e4ad9598d9d6e00ba38 diff --git a/.github/workflows/darkerbot.yaml b/.github/workflows/darkerbot.yaml deleted file mode 100644 index 88d0a08515..0000000000 --- a/.github/workflows/darkerbot.yaml +++ /dev/null @@ -1,62 +0,0 @@ -name: Darker PR Bot - -on: - workflow_run: - workflows: [linters] - types: - - completed - - -concurrency: - # Probably overly cautious group naming. - # Commits to develop will cancel each other, but PRs will only cancel - # commits within the same PR - group: "${{ github.ref }}-${{ github.head_ref }}-${{ github.workflow }}" - cancel-in-progress: true - - -jobs: - darker_bot: - if: "github.repository == 'MDAnalysis/mdanalysis'" - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: setup_dependencies - run: | - pip install PyGithub - - - name: 'Download artifact' - uses: actions/github-script@v7 - with: - script: | - let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.payload.workflow_run.id, - }); - let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => { - return artifact.name == "darkerlint" - })[0]; - let download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - let fs = require('fs'); - fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/darker_results.zip`, Buffer.from(download.data)); - - - name: 'Unzip artifact' - run: unzip darker_results.zip - - - name: writer-errors - env: - GITHUB_TOKEN: ${{ github.token }} - run: | - python maintainer/ci/darker-outcomes.py --json status.json diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml index de5eb68fa8..ebc6225036 100644 --- a/.github/workflows/linters.yaml +++ b/.github/workflows/linters.yaml @@ -16,12 +16,10 @@ defaults: shell: bash -l {0} jobs: - darker_lint: + black: if: "github.repository == 'MDAnalysis/mdanalysis'" runs-on: ubuntu-latest timeout-minutes: 10 - permissions: - pull-requests: write defaults: run: shell: bash @@ -35,66 +33,17 @@ jobs: with: python-version: "3.10" - - name: darker-main-code - id: darker-main-code - uses: akaihola/darker@v2.1.1 - continue-on-error: true + - uses: psf/black@stable with: - version: "~=1.6.1" - options: "--check --diff --color" - src: "./package/MDAnalysis" - revision: "HEAD^" - lint: "flake8" - - - name: darker-test-code - id: darker-test-code - uses: akaihola/darker@v2.1.1 - continue-on-error: true + options: "--check --verbose" + src: "./package" + version: "~= 24.0" + + - uses: psf/black@stable with: - version: "~=1.6.1" - options: "--check --diff --color" - src: "./testsuite/MDAnalysisTests" - revision: "HEAD^" - lint: "flake8" - - - name: get-pr-info - uses: actions/github-script@v7 - with: - script: - const prNumber = context.payload.number; - core.exportVariable('PULL_NUMBER', prNumber); - - - name: save-status - env: - MAIN: ${{ steps.darker-main-code.outcome }} - TEST: ${{ steps.darker-test-code.outcome }} - shell: python - run: | - import os - import json - from pathlib import Path - - Path('./darker_results/').mkdir(exist_ok=True) - - d = { - 'main_stat': os.environ['MAIN'], - 'test_stat': os.environ['TEST'], - 'PR_NUM': os.environ['PULL_NUMBER'], - 'RUN_ID': os.environ['GITHUB_RUN_ID'], - } - - with open('darker_results/status.json', 'w') as f: - json.dump(d, f) - - - name: check-json - run: cat darker_results/status.json - - - name: upload-status - uses: actions/upload-artifact@v4 - with: - name: darkerlint - path: darker_results/ - retention-days: 1 + options: "--check --verbose" + src: "./testsuite" + version: "~= 24.0" pylint_check: diff --git a/maintainer/ci/darker-outcomes.py b/maintainer/ci/darker-outcomes.py deleted file mode 100644 index 4d6bb7fc5b..0000000000 --- a/maintainer/ci/darker-outcomes.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python3 - -# MIT License - -# Copyright (c) 2023 Irfan Alibay - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import argparse -import os -from urllib import request -import json -from github import Github - - -parser = argparse.ArgumentParser( - description="Write PR comment for failed darker linting", -) - - -parser.add_argument( - "--json", - type=str, - help="Input JSON file with status results", -) - - -def get_pull_request(repo, pr_num): - """ - Simple method to get a PyGithub PR object from a PR number - - Parameters - ---------- - repo : PyGithub.Repository - Github Repository API object. - pr_num : int - Pull request number. - - Returns - ------- - PyGithub.PullRequest - Pull Request object corresponding to the input PR number. - """ - # Can get PR directly from PR number - return repo.get_pull(pr_num) - - -def get_action_url(repo, pr, run_id, workflow_name, job_name): - """ - Mix PyGithub & Github V3 REST API method to extract the url path to a - github actions workflow job corresponding to a given GITHUB_RUN_ID. - - Parameters - ---------- - repo : PyGithub.Repository - Github Repository API object. - run_id : str - Github actions RUN ID as defined by the github actions environment - variable `GITHUB_RUN_ID`. - workflow_name : str - Name of the workflow to extract a job url for. - job_name : str - Name of the job within the workflow to extract a url for. - - - Returns - ------- - str - URL to github actions workflow job or 'N/A' if a corresponding job name - could not be found. - """ - # Accessing get_workflow directly currently fails when passing a name - # Instead do the roundabout way by getting list of all workflows - linters = [wf for wf in repo.get_workflows() - if wf.name == workflow_name][0] - - # Extract the gh action run - run = [r for r in linters.get_runs(branch=pr.head.ref) - if r.id == int(run_id)][0] - - # The exact job url can't be recovered via the Python API - # Switch over to using the REST API instead - with request.urlopen(run.jobs_url) as url: - data = json.load(url) - - for job in data['jobs']: - if job['name'] == job_name: - return job['html_url'] - - return 'N/A' - - -def bool_outcome(outcome): - """ - Converts github action job status outcome to bool. - - Parameters - ---------- - outcome : str - Github action job step outcome message. - - Returns - ------- - bool - Whether or not the job step was successful. - """ - return True if (outcome == 'success') else False - - -def gen_message(pr, main_stat, test_stat, action_url): - """ - Generate a user facing message on the status of the darker linting - action. - - Parameters - ---------- - pr : PyGithub.PullRequest - Pull Request object representing the target for this message. - main_stat : str - Outcome of darker linting of main package code. - test_stat : str - Outcome of darker linting of testsuite code. - action_url : str - URL pointing to darker linting job log. - - - Returns - ------- - str - Message to be posted to PR author. - """ - - def _format_outcome(stat): - if bool_outcome(stat): - return "✅ Passed" - else: - return "⚠️ Possible failure" - - msg = ('### Linter Bot Results:\n\n' - f'Hi @{pr.user.login}! Thanks for making this PR. ' - 'We linted your code and found the following: \n\n') - - # If everything is ok - if bool_outcome(main_stat) and bool_outcome(test_stat): - msg += ('There are currently no issues detected! 🎉') - else: - msg += ('Some issues were found with the formatting of your code.\n' - '| Code Location | Outcome |\n' - '| --- | --- |\n' - f'| main package | {_format_outcome(main_stat)}|\n' - f'| testsuite | {_format_outcome(test_stat)}|\n' - '\nPlease have a look at the `darker-main-code` and ' - '`darker-test-code` steps here for more details: ' - f'{action_url}\n\n' - '---\n' - '_**Please note:** The `black` linter is purely ' - 'informational, you can safely ignore these outcomes if ' - 'there are no flake8 failures!_') - return msg - - -def post_comment(pr, message, match_string): - """ - Post a comment in a Pull Request. - - If a comment with text matching `match_string` is found in the - Pull Request, the comment will be edited. - - Parameters - ---------- - pr : PyGithub.PullRequest - Pull Request object representing the target for this message. - message : str - The message to post as a comment. - match_string : str - A matching string to recognise if the comment already exists. - """ - # Extract a list of matching comments from PR - comments = [comm for comm in pr.get_issue_comments() if match_string in comm.body] - - if len(comments) > 0: - # Edit the comment in-place - # By default we assume that the bot can write faster than anyone else - comments[0].edit(body=message) - else: - # If the comment doesn't exist, create one - pr.create_issue_comment(message) - - -if __name__ == "__main__": - args = parser.parse_args() - - git = Github(os.environ['GITHUB_TOKEN']) - repo = git.get_repo("MDAnalysis/mdanalysis") - - with open(args.json, 'r') as f: - status = json.load(f) - - run_id = status['RUN_ID'] - print(f"debug run_id: {run_id}") - - # Get Pull Request - pr_num = int(status['PR_NUM']) - print(f"debug pr_num: {pr_num}") - pr = get_pull_request(repo, pr_num) - - # Get the url to the github action job being pointed to - action_url = get_action_url(repo, pr, run_id, - workflow_name='linters', - job_name='darker_lint') - - # Get the message you want to post to users - with open(args.json, 'r') as f: - results_dict = json.load(f) - - message = gen_message(pr, - status['main_stat'], - status['test_stat'], - action_url) - - # Post your comment - post_comment(pr, message, match_string='Linter Bot Results:') diff --git a/package/MDAnalysis/due.py b/package/MDAnalysis/due.py index 0528bb83e1..7ab1e8b55a 100644 --- a/package/MDAnalysis/due.py +++ b/package/MDAnalysis/due.py @@ -26,30 +26,33 @@ """ -__version__ = '0.0.5' +__version__ = "0.0.5" class InactiveDueCreditCollector(object): """Just a stub at the Collector which would not do anything""" + def _donothing(self, *args, **kwargs): """Perform no good and no bad""" - pass # pylint: disable=unnecessary-pass + pass # pylint: disable=unnecessary-pass def dcite(self, *args, **kwargs): """If I could cite I would""" + def nondecorating_decorator(func): return func + return nondecorating_decorator cite = load = add = _donothing def __repr__(self): - return self.__class__.__name__ + '()' + return self.__class__.__name__ + "()" def _donothing_func(*args, **kwargs): """Perform no good and no bad""" - pass # pylint: disable=unnecessary-pass + pass # pylint: disable=unnecessary-pass try: @@ -60,17 +63,21 @@ def _donothing_func(*args, **kwargs): import duecredit from duecredit import due, BibTeX, Doi, Url - if 'due' in locals() and not hasattr(due, 'cite'): + + if "due" in locals() and not hasattr(due, "cite"): raise RuntimeError( - "Imported due lacks .cite. DueCredit is now disabled") + "Imported due lacks .cite. DueCredit is now disabled" + ) except Exception as err: if not isinstance(err, ImportError): import logging import warnings + errmsg = "Failed to import duecredit due to {}".format(str(err)) warnings.warn(errmsg) logging.getLogger("duecredit").error( - "Failed to import duecredit due to {}".format(str(err))) + "Failed to import duecredit due to {}".format(str(err)) + ) # else: # Do not issue any warnings if duecredit is not installed; # this is the user's choice (Issue #1872) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py index 1c0b51b581..ea46421cc4 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/topology/tables.py @@ -46,8 +46,10 @@ .. autodata:: TABLE_VDWRADII """ +from typing import Any -def kv2dict(s, convertor=str): + +def kv2dict(s, convertor: Any = str): """Primitive ad-hoc parser of a key-value record list. * The string *s* should contain each key-value pair on a separate @@ -172,10 +174,12 @@ def kv2dict(s, convertor=str): #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) +# fmt: off elements = ['H', 'LI', 'BE', 'B', 'C', 'N', 'O', 'F', 'NA', 'MG', 'AL', 'P', 'SI', 'S', 'CL', 'K'] +# fmt: on #: Plain-text table with atomic masses in u. TABLE_MASSES = """ @@ -376,28 +380,31 @@ def kv2dict(s, convertor=str): #: .. SeeAlso:: :func:`MDAnalysis.topology.core.guess_bonds` vdwradii = kv2dict(TABLE_VDWRADII, convertor=float) -Z2SYMB = {1: 'H', 2: 'He', - 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', - 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', - 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', - 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', - 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', - 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', - 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', - 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', - 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', - 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', - 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', - 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', - 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Db', - 106: 'Sg', 107: 'Bh', 108: 'Hs', 109: 'Mt', 110: 'Ds', 111: 'Rg', 112: 'Cn', +# fmt: off +Z2SYMB = {1: 'H', 2: 'He', + 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', + 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', + 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', + 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', + 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', + 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', + 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', + 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', + 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', + 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', + 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', + 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', + 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Db', + 106: 'Sg', 107: 'Bh', 108: 'Hs', 109: 'Mt', 110: 'Ds', 111: 'Rg', 112: 'Cn', 113: 'Nh', 114: 'Fl', 115: 'Mc', 116: 'Lv', 117: 'Ts', 118: 'Og'} +# fmt: on -SYMB2Z = {v:k for k, v in Z2SYMB.items()} +SYMB2Z = {v: k for k, v in Z2SYMB.items()} # Conversion between SYBYL atom types and corresponding elements # Tripos MOL2 file format: # https://web.archive.org/web/*/http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf +# fmt: off SYBYL2SYMB = { "H": "H", "H.spc": "H", "H.t3p": "H", "C.3": "C", "C.2": "C", "C.1": "C", "C.ar": "C", "C.cat": "C", @@ -428,4 +435,5 @@ def kv2dict(s, convertor=str): "Se": "Se", "Mo": "Mo", "Sn": "Sn", -} \ No newline at end of file +} +# fmt: on diff --git a/package/pyproject.toml b/package/pyproject.toml index 24d726c3ab..cd7ec3a780 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -126,5 +126,10 @@ MDAnalysis = [ [tool.black] line-length = 79 target-version = ['py310', 'py311', 'py312', 'py313'] -extend-exclude = '.' +include = ''' +( +tables\.py +| due\.py +) +''' required-version = '24' From 48e9e01f1e8a6f6555c3284789a10f2378cc18ba Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 14 Oct 2024 21:22:51 +0200 Subject: [PATCH 11/15] .git-blame-ignore-revs --- .git-blame-ignore-revs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 1a65ab79f5..d40a8a87bd 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,2 @@ # black -feca12100a2d2251fa2b4e4ad9598d9d6e00ba38 +83e5f99051d86ca354b537be8854c40f9b6ce172 From caa04ec8583fb01d0aa967592240e01159c23617 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Fri, 18 Oct 2024 19:24:04 -0700 Subject: [PATCH 12/15] pin distopia<0.3.0 (#4740) - fix #4739 - pin distopia<0.3.0 - temporarily restrict distopia to >=0.2.0,<0.3.0 until MDAnalysis has caught up with distopia API changes - updated CHANGELOG - version check distopia - only enable HAS_DISTOPIA if the appropriate version of distopia is installed - issue RuntimeWarning if incorrect version present - added test --- .github/actions/setup-deps/action.yaml | 2 +- package/CHANGELOG | 1 + package/MDAnalysis/lib/_distopia.py | 15 ++++++- .../MDAnalysisTests/lib/test_distances.py | 42 +++++++++++++++++-- 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/.github/actions/setup-deps/action.yaml b/.github/actions/setup-deps/action.yaml index 97112b0915..cbfd91df7e 100644 --- a/.github/actions/setup-deps/action.yaml +++ b/.github/actions/setup-deps/action.yaml @@ -59,7 +59,7 @@ inputs: dask: default: 'dask' distopia: - default: 'distopia>=0.2.0' + default: 'distopia>=0.2.0,<0.3.0' h5py: default: 'h5py>=2.10' hole2: diff --git a/package/CHANGELOG b/package/CHANGELOG index de7fddd266..37c6067232 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -78,6 +78,7 @@ Enhancements DOI 10.1021/acs.jpcb.7b11988. (Issue #2039, PR #4524) Changes + * only use distopia < 0.3.0 due to API changes (Issue #4739) * The `fetch_mmtf` method has been removed as the REST API service for MMTF files has ceased to exist (Issue #4634) * MDAnalysis now builds against numpy 2.0 rather than the diff --git a/package/MDAnalysis/lib/_distopia.py b/package/MDAnalysis/lib/_distopia.py index 7170cf2a55..5344393fe1 100644 --- a/package/MDAnalysis/lib/_distopia.py +++ b/package/MDAnalysis/lib/_distopia.py @@ -27,6 +27,7 @@ This module is a stub to provide distopia distance functions to `distances.py` as a selectable backend. """ +import warnings # check for distopia try: @@ -36,10 +37,22 @@ else: HAS_DISTOPIA = True + # check for compatibility: currently needs to be >=0.2.0,<0.3.0 (issue + # #4740) No distopia.__version__ available so we have to do some probing. + needed_funcs = ['calc_bonds_no_box_float', 'calc_bonds_ortho_float'] + has_distopia_020 = all([hasattr(distopia, func) for func in needed_funcs]) + if not has_distopia_020: + warnings.warn("Install 'distopia>=0.2.0,<0.3.0' to be used with this " + "release of MDAnalysis. Your installed version of " + "distopia >=0.3.0 will NOT be used.", + category=RuntimeWarning) + del distopia + HAS_DISTOPIA = False + + from .c_distances import ( calc_bond_distance_triclinic as _calc_bond_distance_triclinic_serial, ) -import warnings import numpy as np diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 3fe4b2852b..4f7cd238ba 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -20,6 +20,8 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # +import sys +from unittest.mock import Mock, patch import pytest import numpy as np from numpy.testing import assert_equal, assert_almost_equal, assert_allclose @@ -788,9 +790,10 @@ def test_pbc_wrong_wassenaar_distance(self, backend): # expected. assert np.linalg.norm(point_a - point_b) != dist[0, 0] -@pytest.mark.parametrize("box", + +@pytest.mark.parametrize("box", [ - None, + None, np.array([10., 15., 20., 90., 90., 90.]), # otrho np.array([10., 15., 20., 70.53571, 109.48542, 70.518196]), # TRIC ] @@ -835,6 +838,39 @@ def distopia_conditional_backend(): return ["serial", "openmp"] +def test_HAS_DISTOPIA_incompatible_distopia(): + # warn if distopia is the wrong version and set HAS_DISTOPIA to False + sys.modules.pop("distopia", None) + sys.modules.pop("MDAnalysis.lib._distopia", None) + + # fail any Attribute access for calc_bonds_ortho_float, + # calc_bonds_no_box_float but pretend to have the distopia + # 0.3.0 functions (from + # https://github.com/MDAnalysis/distopia/blob/main/distopia/__init__.py + # __all__): + mock_distopia_030 = Mock(spec=[ + 'calc_bonds_ortho', + 'calc_bonds_no_box', + 'calc_bonds_triclinic', + 'calc_angles_no_box', + 'calc_angles_ortho', + 'calc_angles_triclinic', + 'calc_dihedrals_no_box', + 'calc_dihedrals_ortho', + 'calc_dihedrals_triclinic', + 'calc_distance_array_no_box', + 'calc_distance_array_ortho', + 'calc_distance_array_triclinic', + 'calc_self_distance_array_no_box', + 'calc_self_distance_array_ortho', + 'calc_self_distance_array_triclinic', + ]) + with patch.dict("sys.modules", {"distopia": mock_distopia_030}): + with pytest.warns(RuntimeWarning, + match="Install 'distopia>=0.2.0,<0.3.0' to"): + import MDAnalysis.lib._distopia + assert not MDAnalysis.lib._distopia.HAS_DISTOPIA + class TestCythonFunctions(object): # Unit tests for calc_bonds calc_angles and calc_dihedrals in lib.distances # Tests both numerical results as well as input types as Cython will silently @@ -1597,7 +1633,7 @@ def test_empty_input_self_capped_distance(self, empty_coord, min_cut, box, assert_equal(res[1], np.empty((0,), dtype=np.float64)) else: assert_equal(res, np.empty((0, 2), dtype=np.int64)) - + @pytest.mark.parametrize('box', boxes[:2]) @pytest.mark.parametrize('backend', ['serial', 'openmp']) def test_empty_input_transform_RtoS(self, empty_coord, box, backend): From 05876f2d52c5b4ed92592eff9a9fdcd140920e45 Mon Sep 17 00:00:00 2001 From: Fiona Naughton Date: Sat, 19 Oct 2024 22:53:43 +1100 Subject: [PATCH 13/15] Deprecate encore (#4737) * Deprecate encore for removal in 3.0 --- package/CHANGELOG | 2 ++ package/MDAnalysis/analysis/encore/__init__.py | 8 ++++++++ package/MDAnalysis/analysis/encore/bootstrap.py | 5 +++++ .../analysis/encore/clustering/ClusterCollection.py | 5 +++++ .../analysis/encore/clustering/ClusteringMethod.py | 5 +++++ package/MDAnalysis/analysis/encore/clustering/cluster.py | 5 +++++ package/MDAnalysis/analysis/encore/confdistmatrix.py | 5 +++++ package/MDAnalysis/analysis/encore/covariance.py | 6 ++++++ .../DimensionalityReductionMethod.py | 5 +++++ .../dimensionality_reduction/reduce_dimensionality.py | 5 +++++ package/MDAnalysis/analysis/encore/similarity.py | 5 +++++ .../sphinx/source/documentation_pages/analysis/encore.rst | 6 ++++++ .../source/documentation_pages/analysis/encore/utils.rst | 5 +++++ testsuite/MDAnalysisTests/analysis/test_encore.py | 6 ++++++ 14 files changed, 73 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 37c6067232..1763df44ec 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -100,6 +100,8 @@ Changes numpy.testing.assert_allclose #4438) Deprecations + * The MDAnalysis.anaylsis.encore module has been deprecated in favour of the + mdaencore MDAKit and will be removed in version 3.0.0 (PR #4737) * The MMTF Reader is deprecated and will be removed in version 3.0 as the MMTF format is no longer supported (Issue #4634) * The MDAnalysis.analysis.waterdynamics module has been deprecated in favour diff --git a/package/MDAnalysis/analysis/encore/__init__.py b/package/MDAnalysis/analysis/encore/__init__.py index 2017188580..34b70dd28d 100644 --- a/package/MDAnalysis/analysis/encore/__init__.py +++ b/package/MDAnalysis/analysis/encore/__init__.py @@ -34,6 +34,14 @@ __all__ = ['covariance', 'similarity', 'confdistmatrix', 'clustering'] +import warnings + +wmsg = ("Deprecation in version 2.8.0\n" + "MDAnalysis.analysis.encore is deprecated in favour of the " + "MDAKit mdaencore (https://www.mdanalysis.org/mdaencore/) " + "and will be removed in MDAnalysis version 3.0.0.") +warnings.warn(wmsg, category=DeprecationWarning) + from ...due import due, Doi due.cite(Doi("10.1371/journal.pcbi.1004415"), diff --git a/package/MDAnalysis/analysis/encore/bootstrap.py b/package/MDAnalysis/analysis/encore/bootstrap.py index 22287d98fc..2d50d486dc 100644 --- a/package/MDAnalysis/analysis/encore/bootstrap.py +++ b/package/MDAnalysis/analysis/encore/bootstrap.py @@ -32,6 +32,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np import logging diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py b/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py index 35b48219ab..87879ba107 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py @@ -31,6 +31,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py index b18d6a5435..df13aaff57 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py @@ -32,6 +32,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np import warnings diff --git a/package/MDAnalysis/analysis/encore/clustering/cluster.py b/package/MDAnalysis/analysis/encore/clustering/cluster.py index 9e0ea01fc4..0ad713775d 100644 --- a/package/MDAnalysis/analysis/encore/clustering/cluster.py +++ b/package/MDAnalysis/analysis/encore/clustering/cluster.py @@ -31,6 +31,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np from ..utils import ParallelCalculation, merge_universes diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index 483964d559..2f3e83b94f 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -34,6 +34,11 @@ class to compute an RMSD matrix in such a way is also available. .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ from joblib import Parallel, delayed import numpy as np diff --git a/package/MDAnalysis/analysis/encore/covariance.py b/package/MDAnalysis/analysis/encore/covariance.py index 1fb77b1078..e6768bf698 100644 --- a/package/MDAnalysis/analysis/encore/covariance.py +++ b/package/MDAnalysis/analysis/encore/covariance.py @@ -30,6 +30,12 @@ :Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen .. versionadded:: 0.16.0 + +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py index 10cd28ce4d..cef202843d 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py @@ -32,6 +32,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import logging import warnings diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py index 1432c4a06d..1a35548fbf 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py @@ -31,6 +31,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + """ import numpy as np from ..confdistmatrix import get_distance_matrix diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index 9e1ee2a674..2f41d233d4 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -29,6 +29,11 @@ .. versionadded:: 0.16.0 +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + The module contains implementations of similarity measures between protein ensembles described in :footcite:p:`LindorffLarsen2009`. The implementation and examples are described in :footcite:p:`Tiberti2015`. diff --git a/package/doc/sphinx/source/documentation_pages/analysis/encore.rst b/package/doc/sphinx/source/documentation_pages/analysis/encore.rst index 2051a2e335..d4da461260 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/encore.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/encore.rst @@ -9,6 +9,12 @@ .. versionadded:: 0.16.0 + +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + The module contains implementations of similarity measures between protein ensembles described in :footcite:p:`LindorffLarsen2009`. The implementation and examples are described in :footcite:p:`Tiberti2015`. diff --git a/package/doc/sphinx/source/documentation_pages/analysis/encore/utils.rst b/package/doc/sphinx/source/documentation_pages/analysis/encore/utils.rst index 85a2f6cd41..e1b1e27b71 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/encore/utils.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/encore/utils.rst @@ -2,6 +2,11 @@ Utility functions for ENCORE ============================== +.. deprecated:: 2.8.0 + This module is deprecated in favour of the + MDAKit `mdaencore `_ and will be removed + in MDAnalysis 3.0.0. + .. automodule:: MDAnalysis.analysis.encore.utils :members: diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index bc07c21af7..424aae5427 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -30,6 +30,7 @@ import os import warnings import platform +from importlib import reload import pytest from numpy.testing import assert_equal, assert_allclose @@ -46,6 +47,11 @@ def function(x): return x**2 +def test_moved_to_mdakit_warning(): + wmsg = "MDAnalysis.analysis.encore is deprecated" + with pytest.warns(DeprecationWarning, match=wmsg): + reload(encore) + class TestEncore(object): @pytest.fixture(scope='class') def ens1_template(self): From 9b6974529de5cbcfeb1e811dbaa9a69f5aac819f Mon Sep 17 00:00:00 2001 From: Aya Alaa Date: Sat, 19 Oct 2024 17:21:00 +0300 Subject: [PATCH 14/15] Introduce Guesser classes for different contexts (#3753) * Adds Guesser classes (GuesserBase and DefaultGuesser) * Adds guess_TopologyAttrs method to Universe * Modifies all Topology parsers to remove guessing in parsing and move it to Universe creation --- benchmarks/benchmarks/topology.py | 4 +- package/CHANGELOG | 17 +- package/MDAnalysis/__init__.py | 2 +- package/MDAnalysis/analysis/bat.py | 2 +- package/MDAnalysis/converters/OpenMMParser.py | 44 +- package/MDAnalysis/converters/ParmEd.py | 2 +- package/MDAnalysis/converters/ParmEdParser.py | 2 +- package/MDAnalysis/converters/RDKit.py | 4 +- package/MDAnalysis/converters/RDKitParser.py | 16 +- package/MDAnalysis/coordinates/PDB.py | 1 - package/MDAnalysis/core/groups.py | 23 +- package/MDAnalysis/core/topologyattrs.py | 13 + package/MDAnalysis/core/universe.py | 158 ++++- package/MDAnalysis/guesser/__init__.py | 50 ++ package/MDAnalysis/guesser/base.py | 202 +++++++ package/MDAnalysis/guesser/default_guesser.py | 568 ++++++++++++++++++ .../{topology => guesser}/tables.py | 2 +- package/MDAnalysis/topology/CRDParser.py | 18 +- package/MDAnalysis/topology/DLPolyParser.py | 19 +- package/MDAnalysis/topology/DMSParser.py | 10 +- .../MDAnalysis/topology/ExtendedPDBParser.py | 2 - package/MDAnalysis/topology/FHIAIMSParser.py | 16 +- package/MDAnalysis/topology/GMSParser.py | 14 +- package/MDAnalysis/topology/GROParser.py | 16 +- package/MDAnalysis/topology/GSDParser.py | 1 - package/MDAnalysis/topology/HoomdXMLParser.py | 1 - package/MDAnalysis/topology/ITPParser.py | 54 +- package/MDAnalysis/topology/LAMMPSParser.py | 21 +- package/MDAnalysis/topology/MMTFParser.py | 11 +- package/MDAnalysis/topology/MOL2Parser.py | 17 +- package/MDAnalysis/topology/PDBParser.py | 35 +- package/MDAnalysis/topology/PDBQTParser.py | 12 +- package/MDAnalysis/topology/PQRParser.py | 21 +- package/MDAnalysis/topology/PSFParser.py | 2 +- package/MDAnalysis/topology/TOPParser.py | 9 +- package/MDAnalysis/topology/TPRParser.py | 1 - package/MDAnalysis/topology/TXYZParser.py | 21 +- package/MDAnalysis/topology/XYZParser.py | 17 +- package/MDAnalysis/topology/__init__.py | 7 +- package/MDAnalysis/topology/core.py | 11 - package/MDAnalysis/topology/guessers.py | 526 ---------------- package/MDAnalysis/topology/tpr/obj.py | 2 +- .../documentation_pages/guesser_modules.rst | 61 ++ .../guesser_modules/base.rst | 1 + .../guesser_modules/default_guesser.rst | 1 + .../guesser_modules/init.rst | 1 + .../guesser_modules/tables.rst | 1 + .../documentation_pages/topology/guessers.rst | 2 - .../documentation_pages/topology/tables.rst | 1 - .../documentation_pages/topology_modules.rst | 6 +- package/doc/sphinx/source/index.rst | 18 +- .../MDAnalysisTests/analysis/test_base.py | 2 +- .../analysis/test_dielectric.py | 2 +- .../converters/test_openmm_parser.py | 58 +- .../MDAnalysisTests/converters/test_rdkit.py | 8 +- .../converters/test_rdkit_parser.py | 39 +- testsuite/MDAnalysisTests/coordinates/base.py | 12 +- .../coordinates/test_chainreader.py | 44 +- .../MDAnalysisTests/coordinates/test_h5md.py | 2 +- .../coordinates/test_netcdf.py | 10 +- .../coordinates/test_timestep_api.py | 4 +- .../MDAnalysisTests/coordinates/test_trz.py | 2 +- .../MDAnalysisTests/core/test_atomgroup.py | 9 +- .../core/test_topologyattrs.py | 17 +- .../MDAnalysisTests/core/test_universe.py | 68 ++- .../MDAnalysisTests/guesser/test_base.py | 102 ++++ .../guesser/test_default_guesser.py | 302 ++++++++++ .../parallelism/test_multiprocessing.py | 2 +- testsuite/MDAnalysisTests/topology/base.py | 28 +- .../MDAnalysisTests/topology/test_crd.py | 13 + .../MDAnalysisTests/topology/test_dlpoly.py | 21 +- .../MDAnalysisTests/topology/test_dms.py | 6 +- .../MDAnalysisTests/topology/test_fhiaims.py | 19 +- .../MDAnalysisTests/topology/test_gms.py | 12 +- .../MDAnalysisTests/topology/test_gro.py | 15 +- .../MDAnalysisTests/topology/test_gsd.py | 9 +- .../MDAnalysisTests/topology/test_guessers.py | 205 ------- .../MDAnalysisTests/topology/test_hoomdxml.py | 2 +- .../MDAnalysisTests/topology/test_itp.py | 81 ++- .../topology/test_lammpsdata.py | 27 +- .../MDAnalysisTests/topology/test_minimal.py | 6 +- .../MDAnalysisTests/topology/test_mmtf.py | 15 +- .../MDAnalysisTests/topology/test_mol2.py | 23 +- .../MDAnalysisTests/topology/test_pdb.py | 34 +- .../MDAnalysisTests/topology/test_pdbqt.py | 9 + .../MDAnalysisTests/topology/test_pqr.py | 19 +- .../MDAnalysisTests/topology/test_psf.py | 2 +- .../topology/test_tprparser.py | 2 + .../MDAnalysisTests/topology/test_txyz.py | 18 +- .../MDAnalysisTests/topology/test_xpdb.py | 12 + .../MDAnalysisTests/topology/test_xyz.py | 17 +- .../transformations/test_positionaveraging.py | 6 +- 92 files changed, 2160 insertions(+), 1190 deletions(-) create mode 100644 package/MDAnalysis/guesser/__init__.py create mode 100644 package/MDAnalysis/guesser/base.py create mode 100644 package/MDAnalysis/guesser/default_guesser.py rename package/MDAnalysis/{topology => guesser}/tables.py (99%) delete mode 100644 package/MDAnalysis/topology/guessers.py create mode 100644 package/doc/sphinx/source/documentation_pages/guesser_modules.rst create mode 100644 package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst create mode 100644 package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst create mode 100644 package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst create mode 100644 package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst delete mode 100644 package/doc/sphinx/source/documentation_pages/topology/guessers.rst delete mode 100644 package/doc/sphinx/source/documentation_pages/topology/tables.rst create mode 100644 testsuite/MDAnalysisTests/guesser/test_base.py create mode 100644 testsuite/MDAnalysisTests/guesser/test_default_guesser.py delete mode 100644 testsuite/MDAnalysisTests/topology/test_guessers.py diff --git a/benchmarks/benchmarks/topology.py b/benchmarks/benchmarks/topology.py index 8691515a93..45c3fcf95b 100644 --- a/benchmarks/benchmarks/topology.py +++ b/benchmarks/benchmarks/topology.py @@ -1,6 +1,6 @@ import MDAnalysis import numpy as np -from MDAnalysis.topology import guessers +from MDAnalysis.guesser import DefaultGuesser try: from MDAnalysisTests.datafiles import GRO @@ -26,7 +26,7 @@ def setup(self, num_atoms): def time_guessbonds(self, num_atoms): """Benchmark for guessing bonds""" - guessers.guess_bonds(self.ag, self.ag.positions, + DefaultGuesser(None).guess_bonds(self.ag, self.ag.positions, box=self.ag.dimensions, vdwradii=self.vdwradii) diff --git a/package/CHANGELOG b/package/CHANGELOG index 1763df44ec..64fcb63fe0 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,13 +16,14 @@ The rules for this file: ------------------------------------------------------------------------------- ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder, - tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, + tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies, - talagayev + talagayev, aya9aladdin * 2.8.0 Fixes + * Fix Bohrium (Bh) atomic mass in tables.py (PR #3753) * set `n_parts` to the total number of frames being analyzed if `n_parts` is bigger. (Issue #4685) * Catch higher dimensional indexing in GroupBase & ComponentBase (Issue #4647) @@ -56,6 +57,15 @@ Fixes * Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374) Enhancements + * Removed type and mass guessing from all topology parsers (PR #3753) + * Added guess_TopologyAttrs() API to the Universe to handle attribute + guessing (PR #3753) + * Added the DefaultGuesser class, which is a general-purpose guesser with + the same functionalities as the existing guesser.py methods (PR #3753) + * Added is_value_missing() to `TopologyAttrs` to check for missing + values (PR #3753) + * Added guessed `Element` attribute to the ITPParser to preserve old mass + partial guessing behavior from being broken (PR #3753) * MDAnalysis now supports Python 3.13 (PR #4732) * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) @@ -100,6 +110,9 @@ Changes numpy.testing.assert_allclose #4438) Deprecations + * Unknown masses are set to 0.0 for current version, this will be depracated + in version 3.0.0 and replaced by :class:`Masses`' no_value_label attribute(np.nan) + (PR #3753) * The MDAnalysis.anaylsis.encore module has been deprecated in favour of the mdaencore MDAKit and will be removed in version 3.0.0 (PR #4737) * The MMTF Reader is deprecated and will be removed in version 3.0 diff --git a/package/MDAnalysis/__init__.py b/package/MDAnalysis/__init__.py index 0e9e557460..ca11be4bdf 100644 --- a/package/MDAnalysis/__init__.py +++ b/package/MDAnalysis/__init__.py @@ -181,7 +181,7 @@ _TOPOLOGY_ATTRS: Dict = {} # {attrname: cls} _TOPOLOGY_TRANSPLANTS: Dict = {} # {name: [attrname, method, transplant class]} _TOPOLOGY_ATTRNAMES: Dict = {} # {lower case name w/o _ : name} - +_GUESSERS: Dict = {} # custom exceptions and warnings from .exceptions import ( diff --git a/package/MDAnalysis/analysis/bat.py b/package/MDAnalysis/analysis/bat.py index 5186cb6c88..c8a908f9ea 100644 --- a/package/MDAnalysis/analysis/bat.py +++ b/package/MDAnalysis/analysis/bat.py @@ -175,7 +175,7 @@ class to calculate dihedral angles for a given set of atoms or residues def _sort_atoms_by_mass(atoms, reverse=False): - r"""Sorts a list of atoms by name and then by index + r"""Sorts a list of atoms by mass and then by index The atom index is used as a tiebreaker so that the ordering is reproducible. diff --git a/package/MDAnalysis/converters/OpenMMParser.py b/package/MDAnalysis/converters/OpenMMParser.py index eef9287378..b3402c448e 100644 --- a/package/MDAnalysis/converters/OpenMMParser.py +++ b/package/MDAnalysis/converters/OpenMMParser.py @@ -25,6 +25,9 @@ =================================================================== .. versionadded:: 2.0.0 +.. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place + now through universe.guess_TopologyAttrs() API) Converts an @@ -59,8 +62,7 @@ import warnings from ..topology.base import TopologyReaderBase -from ..topology.tables import SYMB2Z -from ..topology.guessers import guess_types, guess_masses +from ..guesser.tables import SYMB2Z from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, @@ -108,11 +110,6 @@ def _mda_topology_from_omm_topology(self, omm_topology): ------- top : MDAnalysis.core.topology.Topology - Note - ---- - When none of the elements are present in the openmm topolgy, their - atomtypes are guessed using their names and their masses are - then guessed using their atomtypes. When partial elements are present, values from available elements are used whereas the absent elements are assigned an empty string @@ -184,21 +181,32 @@ def _mda_topology_from_omm_topology(self, omm_topology): warnings.warn("Element information missing for some atoms. " "These have been given an empty element record ") if any(i == 'X' for i in atomtypes): - warnings.warn("For absent elements, atomtype has been " - "set to 'X' and mass has been set to 0.0. " - "If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + warnings.warn( + "For absent elements, atomtype has been " + "set to 'X' and mass has been set to 0.0. " + "If needed these can be guessed using " + "universe.guess_TopologyAttrs(" + "to_guess=['masses', 'types']). " + "(for MDAnalysis version 2.x " + "this is done automatically," + " but it will be removed in 3.0).") + attrs.append(Elements(np.array(validated_elements, dtype=object))) else: - atomtypes = guess_types(atomnames) - masses = guess_masses(atomtypes) - wmsg = ("Element information is missing for all the atoms. " - "Elements attribute will not be populated. " - "Atomtype attribute will be guessed using atom " - "name and mass will be guessed using atomtype." - "See MDAnalysis.topology.guessers.") + wmsg = ( + "Element information is missing for all the atoms. " + "Elements attribute will not be populated. " + "Atomtype attribute will be guessed using atom " + "name and mass will be guessed using atomtype." + "For MDAnalysis version 2.x this is done automatically, " + "but it will be removed in MDAnalysis v3.0. " + "These can be guessed using " + "universe.guess_TopologyAttrs(" + "to_guess=['masses', 'types']) " + "See MDAnalysis.guessers.") + warnings.warn(wmsg) else: attrs.append(Elements(np.array(validated_elements, dtype=object))) diff --git a/package/MDAnalysis/converters/ParmEd.py b/package/MDAnalysis/converters/ParmEd.py index 174dc9fad3..cc2e7a4cc5 100644 --- a/package/MDAnalysis/converters/ParmEd.py +++ b/package/MDAnalysis/converters/ParmEd.py @@ -81,12 +81,12 @@ import itertools import warnings +from ..guesser.tables import SYMB2Z import numpy as np from numpy.lib import NumpyVersion from . import base from ..coordinates.base import SingleFrameReaderBase -from ..topology.tables import SYMB2Z from ..core.universe import Universe from ..exceptions import NoDataError diff --git a/package/MDAnalysis/converters/ParmEdParser.py b/package/MDAnalysis/converters/ParmEdParser.py index 55dc48b2f1..86de585fe5 100644 --- a/package/MDAnalysis/converters/ParmEdParser.py +++ b/package/MDAnalysis/converters/ParmEdParser.py @@ -90,7 +90,7 @@ import numpy as np from ..topology.base import TopologyReaderBase, change_squash -from ..topology.tables import Z2SYMB +from ..guesser.tables import Z2SYMB from ..core.topologyattrs import ( Atomids, Atomnames, diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py index 139528440a..b6d806df4c 100644 --- a/package/MDAnalysis/converters/RDKit.py +++ b/package/MDAnalysis/converters/RDKit.py @@ -255,9 +255,7 @@ class RDKitConverter(base.ConverterBase): from MDAnalysisTests.datafiles import PSF, DCD from rdkit.Chem.Descriptors3D import Asphericity - u = mda.Universe(PSF, DCD) - elements = mda.topology.guessers.guess_types(u.atoms.names) - u.add_TopologyAttr('elements', elements) + u = mda.Universe(PSF, DCD, to_guess=['elements']) ag = u.select_atoms("resid 1-10") for ts in u.trajectory: diff --git a/package/MDAnalysis/converters/RDKitParser.py b/package/MDAnalysis/converters/RDKitParser.py index 841f979eec..6bca57a43f 100644 --- a/package/MDAnalysis/converters/RDKitParser.py +++ b/package/MDAnalysis/converters/RDKitParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -47,7 +47,6 @@ import numpy as np from ..topology.base import TopologyReaderBase, change_squash -from ..topology import guessers from ..core.topologyattrs import ( Atomids, Atomnames, @@ -90,6 +89,7 @@ class RDKitParser(TopologyReaderBase): - Atomnames - Aromaticities - Elements + - Types - Masses - Bonds - Resids @@ -97,9 +97,6 @@ class RDKitParser(TopologyReaderBase): - RSChirality - Segids - Guesses the following: - - Atomtypes - Depending on RDKit's input, the following Attributes might be present: - Charges - Resnames @@ -156,6 +153,12 @@ class RDKitParser(TopologyReaderBase): .. versionadded:: 2.0.0 .. versionchanged:: 2.1.0 Added R/S chirality support + .. versionchanged:: 2.8.0 + Removed type guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). If atoms types is not + present in the input rdkit molecule as a _TriposAtomType property, + the type attribute get the same values as the element attribute. + """ format = 'RDKIT' @@ -303,8 +306,7 @@ def parse(self, **kwargs): if atomtypes: attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) else: - atomtypes = guessers.guess_types(names) - attrs.append(Atomtypes(atomtypes, guessed=True)) + atomtypes = np.char.upper(elements) # Partial charges if charges: diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index bce51c43cd..5e9530cac8 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -155,7 +155,6 @@ from ..lib.util import store_init_arguments from . import base from .timestep import Timestep -from ..topology.core import guess_atom_element from ..exceptions import NoDataError diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 91b2c77930..7c9a3650df 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3453,7 +3453,6 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1): ---------- vdwradii : dict, optional Dict relating atom types: vdw radii - fudge_factor : float, optional The factor by which atoms must overlap each other to be considered a bond. Larger values will increase the number of bonds found. [0.55] @@ -3477,8 +3476,8 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1): Corrected misleading docs, and now allows passing of `fudge_factor` and `lower_bound` arguments. """ - from ..topology.core import guess_bonds, guess_angles, guess_dihedrals from .topologyattrs import Bonds, Angles, Dihedrals + from ..guesser.default_guesser import DefaultGuesser def get_TopAttr(u, name, cls): """either get *name* or create one from *cls*""" @@ -3490,22 +3489,20 @@ def get_TopAttr(u, name, cls): return attr # indices of bonds - b = guess_bonds( - self.atoms, - self.atoms.positions, - vdwradii=vdwradii, - box=self.dimensions, - fudge_factor=fudge_factor, - lower_bound=lower_bound, - ) - bondattr = get_TopAttr(self.universe, "bonds", Bonds) + guesser = DefaultGuesser(None, fudge_factor=fudge_factor, + lower_bound=lower_bound, + box=self.dimensions, + vdwradii=vdwradii) + b = guesser.guess_bonds(self.atoms, self.atoms.positions) + + bondattr = get_TopAttr(self.universe, 'bonds', Bonds) bondattr._add_bonds(b, guessed=True) - a = guess_angles(self.bonds) + a = guesser.guess_angles(self.bonds) angleattr = get_TopAttr(self.universe, 'angles', Angles) angleattr._add_bonds(a, guessed=True) - d = guess_dihedrals(self.angles) + d = guesser.guess_dihedrals(self.angles) diheattr = get_TopAttr(self.universe, 'dihedrals', Dihedrals) diheattr._add_bonds(d) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 92fa25a5e5..5e2621dc63 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -518,6 +518,18 @@ def set_segments(self, sg, values): """Set segmentattributes for a given SegmentGroup""" raise NotImplementedError + @classmethod + def are_values_missing(cls, values): + """check if an attribute has a missing value + + .. versionadded:: 2.8.0 + """ + missing_value_label = getattr(cls, 'missing_value_label', None) + + if missing_value_label is np.nan: + return np.isnan(values) + else: + return values == missing_value_label # core attributes @@ -1441,6 +1453,7 @@ class Masses(AtomAttr): attrname = 'masses' singular = 'mass' per_object = 'atom' + missing_value_label = np.nan target_classes = [AtomGroup, ResidueGroup, SegmentGroup, Atom, Residue, Segment] transplants = defaultdict(list) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 739e048339..c0ac6bcf6f 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -86,7 +86,7 @@ from .topology import Topology from .topologyattrs import AtomAttr, ResidueAttr, SegmentAttr, BFACTOR_WARNING from .topologyobjects import TopologyObject - +from ..guesser.base import get_guesser logger = logging.getLogger("MDAnalysis.core.universe") @@ -238,6 +238,25 @@ class Universe(object): vdwradii: dict, ``None``, default ``None`` For use with *guess_bonds*. Supply a dict giving a vdwradii for each atom type which are used in guessing bonds. + context: str or :mod:`Guesser`, default ``'default'`` + Type of the Guesser to be used in guessing TopologyAttrs + to_guess: list[str] (optional, default ``['types', 'masses']``) + TopologyAttrs to be guessed. These TopologyAttrs will be wholly + guessed if they don't exist in the Universe. If they already exist in + the Universe, only empty or missing values will be guessed. + + .. warning:: + In MDAnalysis 2.x, types and masses are being automatically guessed + if they are missing (``to_guess=('types, 'masses')``). + However, starting with release 3.0 **no guessing will be done + by default** and it will be up to the user to request guessing + using ``to_guess`` and ``force_guess``. + + force_guess: list[str], (optional) + TopologyAttrs in this list will be force guessed. If the TopologyAttr + does not already exist in the Universe, this has no effect. If the + TopologyAttr does already exist, all values will be overwritten + by guessed values. fudge_factor: float, default [0.55] For use with *guess_bonds*. Supply the factor by which atoms must overlap each other to be considered a bond. @@ -267,7 +286,7 @@ class Universe(object): dimensions : numpy.ndarray system dimensions (simulation unit cell, if set in the trajectory) at the *current time step* - (see :attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`). + (see :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`). The unit cell can be set for the current time step (but the change is not permanent unless written to a file). atoms : AtomGroup @@ -320,11 +339,18 @@ class Universe(object): .. versionchanged:: 2.5.0 Added fudge_factor and lower_bound parameters for use with *guess_bonds*. + + .. versionchanged:: 2.8.0 + Added :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API + guessing masses and atom types after topology + is read from a registered parser. + """ def __init__(self, topology=None, *coordinates, all_coordinates=False, format=None, topology_format=None, transformations=None, guess_bonds=False, vdwradii=None, fudge_factor=0.55, - lower_bound=0.1, in_memory=False, + lower_bound=0.1, in_memory=False, context='default', + to_guess=('types', 'masses'), force_guess=(), in_memory_step=1, **kwargs): self._trajectory = None # managed attribute holding Reader @@ -333,7 +359,7 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, self.residues = None self.segments = None self.filename = None - + self._context = get_guesser(context) self._kwargs = { 'transformations': transformations, 'guess_bonds': guess_bonds, @@ -381,12 +407,17 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, self._trajectory.add_transformations(*transformations) if guess_bonds: - self.atoms.guess_bonds(vdwradii=vdwradii, fudge_factor=fudge_factor, - lower_bound=lower_bound) + force_guess = list(force_guess) + ['bonds', 'angles', 'dihedrals'] + + self.guess_TopologyAttrs( + context, to_guess, force_guess, vdwradii=vdwradii, **kwargs) + def copy(self): """Return an independent copy of this Universe""" - new = self.__class__(self._topology.copy()) + context = self._context.copy() + new = self.__class__(self._topology.copy(), + to_guess=(), context=context) new.trajectory = self.trajectory.copy() return new @@ -475,7 +506,7 @@ def empty(cls, n_atoms, n_residues=1, n_segments=1, n_frames=1, residue_segindex=residue_segindex, ) - u = cls(top) + u = cls(top, to_guess=()) if n_frames > 1 or trajectory: coords = np.zeros((n_frames, n_atoms, 3), dtype=np.float32) @@ -714,10 +745,10 @@ def __repr__(self): n_atoms=len(self.atoms)) @classmethod - def _unpickle_U(cls, top, traj): + def _unpickle_U(cls, top, traj, context): """Special method used by __reduce__ to deserialise a Universe""" # top is a Topology obj at this point, but Universe can handle that. - u = cls(top) + u = cls(top, to_guess=(), context=context) u.trajectory = traj return u @@ -727,7 +758,7 @@ def __reduce__(self): # transformation (that has AtomGroup inside). Use __reduce__ instead. # Universe's two "legs" of top and traj both serialise themselves. return (self._unpickle_U, (self._topology, - self._trajectory)) + self._trajectory, self._context.copy())) # Properties @property @@ -1459,13 +1490,114 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, "hydrogens with `addHs=True`") numConfs = rdkit_kwargs.pop("numConfs", numConfs) - if not (type(numConfs) is int and numConfs > 0): + if not (isinstance(numConfs, int) and numConfs > 0): raise SyntaxError("numConfs must be a non-zero positive " - "integer instead of {0}".format(numConfs)) + "integer instead of {0}".format(numConfs)) AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs) return cls(mol, **kwargs) + def guess_TopologyAttrs( + self, context=None, to_guess=None, force_guess=None, **kwargs): + """ + Guess and add attributes through a specific context-aware guesser. + + Parameters + ---------- + context: str or :mod:`Guesser` class + For calling a matching guesser class for this specific context + to_guess: Optional[list[str]] + TopologyAttrs to be guessed. These TopologyAttrs will be wholly + guessed if they don't exist in the Universe. If they already exist in + the Universe, only empty or missing values will be guessed. + + .. warning:: + In MDAnalysis 2.x, types and masses are being automatically guessed + if they are missing (``to_guess=('types, 'masses')``). + However, starting with release 3.0 **no guessing will be done + by default** and it will be up to the user to request guessing + using ``to_guess`` and ``force_guess``. + + force_guess: Optional[list[str]] + TopologyAttrs in this list will be force guessed. If the + TopologyAttr does not already exist in the Universe, this has no + effect. If the TopologyAttr does already exist, all values will + be overwritten by guessed values. + **kwargs: extra arguments to be passed to the guesser class + + Examples + -------- + To guess ``masses`` and ``types`` attributes:: + + u.guess_TopologyAttrs(context='default', to_guess=['masses', 'types']) + + .. versionadded:: 2.8.0 + + """ + if not context: + context = self._context + + guesser = get_guesser(context, self.universe, **kwargs) + self._context = guesser + + if to_guess is None: + to_guess = [] + if force_guess is None: + force_guess = [] + + total_guess = list(to_guess) + list(force_guess) + + # Removing duplicates from the guess list while keeping attributes + # order as it is more convenient to guess attributes + # in the same order that the user provided + total_guess = list(dict.fromkeys(total_guess)) + + objects = ['bonds', 'angles', 'dihedrals', 'impropers'] + + # Checking if the universe is empty to avoid errors + # from guesser methods + if self._topology.n_atoms > 0: + + topology_attrs = [att.attrname for att in + self._topology.read_attributes] + + common_attrs = set(to_guess) & set(topology_attrs) + common_attrs = ", ".join(attr for attr in common_attrs) + + if len(common_attrs) > 0: + logger.info( + f'The attribute(s) {common_attrs} have already been read ' + 'from the topology file. The guesser will ' + 'only guess empty values for this attribute, ' + 'if any exists. To overwrite it by completely ' + 'guessed values, you can pass the attribute to' + ' the force_guess parameter instead of ' + 'the to_guess one') + + for attr in total_guess: + if guesser.is_guessable(attr): + fg = attr in force_guess + values = guesser.guess_attr(attr, fg) + + if values is not None: + if attr in objects: + self._add_topology_objects( + attr, values, guessed=True) + else: + guessed_attr = _TOPOLOGY_ATTRS[attr](values, True) + self.add_TopologyAttr(guessed_attr) + logger.info( + f'attribute {attr} has been guessed' + ' successfully.') + + else: + raise ValueError(f'{context} guesser can not guess the' + f' following attribute: {attr}') + + else: + warnings.warn('Can not guess attributes ' + 'for universe with 0 atoms') + def Merge(*args): """Create a new new :class:`Universe` from one or more diff --git a/package/MDAnalysis/guesser/__init__.py b/package/MDAnalysis/guesser/__init__.py new file mode 100644 index 0000000000..b433356290 --- /dev/null +++ b/package/MDAnalysis/guesser/__init__.py @@ -0,0 +1,50 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +Context-specific guessers --- :mod:`MDAnalysis.guesser` +======================================================== + +You can use guesser classes either directly by initiating an instance of it and use its guessing methods or through +the :meth:`guess_TopologyAttrs `: API of the universe. + +The following table lists the currently supported Context-aware Guessers along with +the attributes they can guess. + +.. table:: Table of Supported Guessers + + ============================================== ========== ===================== =================================================== + Name Context Attributes Remarks + ============================================== ========== ===================== =================================================== + :ref:`DefaultGuesser ` default types, elements, general purpose guesser + masses, bonds, + angles, dihedrals, + improper dihedrals + ============================================== ========== ===================== =================================================== + + + + +""" +from . import base +from .default_guesser import DefaultGuesser diff --git a/package/MDAnalysis/guesser/base.py b/package/MDAnalysis/guesser/base.py new file mode 100644 index 0000000000..aab0723aaa --- /dev/null +++ b/package/MDAnalysis/guesser/base.py @@ -0,0 +1,202 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Base guesser classes --- :mod:`MDAnalysis.guesser.base` +================================================================ + +Derive context-specific guesser classes from the base class in this module. + +Classes +------- + +.. autoclass:: GuesserBase + :members: + :inherited-members: + +.. autofunction:: get_guesser + +""" +from .. import _GUESSERS +import numpy as np +from .. import _TOPOLOGY_ATTRS +import logging +from typing import Dict +import copy + +logger = logging.getLogger("MDAnalysis.guesser.base") + + +class _GuesserMeta(type): + """Internal: guesser classes registration + + When classes which inherit from GuesserBase are *defined* + this metaclass makes it known to MDAnalysis. 'context' + attribute are read: + - `context` defines the context of the guesser class for example: + forcefield specific context as MartiniGuesser + and file specific context as PDBGuesser. + + Eg:: + + class FooGuesser(GuesserBase): + format = 'foo' + + .. versionadded:: 2.8.0 + """ + def __init__(cls, name, bases, classdict): + type.__init__(type, name, bases, classdict) + + _GUESSERS[classdict['context'].upper()] = cls + + +class GuesserBase(metaclass=_GuesserMeta): + """Base class for context-specific guessers to inherit from + + Parameters + ---------- + universe : Universe, optional + Supply a Universe to the Guesser. This then becomes the source of atom + attributes to be used in guessing processes. (this is relevant to how + the universe's guess_TopologyAttrs API works. + See :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`). + **kwargs : dict, optional + To pass additional data to the guesser that can be used with + different methods. + + + .. versionadded:: 2.8.0 + + """ + context = 'base' + _guesser_methods: Dict = {} + + def __init__(self, universe=None, **kwargs): + self._universe = universe + self._kwargs = kwargs + + def update_kwargs(self, **kwargs): + self._kwargs.update(kwargs) + + def copy(self): + """Return a copy of this Guesser""" + kwargs = copy.deepcopy(self._kwargs) + new = self.__class__(universe=None, **kwargs) + return new + + def is_guessable(self, attr_to_guess): + """check if the passed atrribute can be guessed by the guesser class + + Parameters + ---------- + guess: str + Attribute to be guessed then added to the Universe + + Returns + ------- + bool + """ + if attr_to_guess.lower() in self._guesser_methods: + return True + + return False + + def guess_attr(self, attr_to_guess, force_guess=False): + """map the attribute to be guessed with the apporpiate guessing method + + Parameters + ---------- + attr_to_guess: str + an atrribute to be guessed then to be added to the universe + force_guess: bool + To indicate wether to only partialy guess the empty values of the + attribute or to overwrite all existing values by guessed one + + Returns + ------- + NDArray of guessed values + + """ + + # check if the topology already has the attribute to partially guess it + if hasattr(self._universe.atoms, attr_to_guess) and not force_guess: + attr_values = np.array( + getattr(self._universe.atoms, attr_to_guess, None)) + + top_attr = _TOPOLOGY_ATTRS[attr_to_guess] + + empty_values = top_attr.are_values_missing(attr_values) + + if True in empty_values: + # pass to the guesser_method boolean mask to only guess the + # empty values + attr_values[empty_values] = self._guesser_methods[attr_to_guess]( + indices_to_guess=empty_values) + return attr_values + + else: + logger.info( + f'There is no empty {attr_to_guess} values. Guesser did ' + f'not guess any new values for {attr_to_guess} attribute') + return None + else: + return np.array(self._guesser_methods[attr_to_guess]()) + + +def get_guesser(context, u=None, **kwargs): + """get an appropiate guesser to the Universe and pass + the Universe to the guesser + + Parameters + ---------- + u: Universe + to be passed to the guesser + context: str or Guesser + **kwargs : dict, optional + Extra arguments are passed to the guesser. + + Returns + ------- + Guesser class + + Raises + ------ + * :exc:`KeyError` upon failing to return a guesser class + + .. versionadded:: 2.8.0 + + """ + if isinstance(context, GuesserBase): + context._universe = u + context.update_kwargs(**kwargs) + return context + try: + if issubclass(context, GuesserBase): + return context(u, **kwargs) + except TypeError: + pass + + try: + guesser = _GUESSERS[context.upper()](u, **kwargs) + except KeyError: + raise KeyError("Unidentified guesser type {0}".format(context)) + return guesser diff --git a/package/MDAnalysis/guesser/default_guesser.py b/package/MDAnalysis/guesser/default_guesser.py new file mode 100644 index 0000000000..a64b023309 --- /dev/null +++ b/package/MDAnalysis/guesser/default_guesser.py @@ -0,0 +1,568 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 + +r""" +Default Guesser +================ +.. _DefaultGuesser: + +DefaultGuesser is a generic guesser class that has basic guessing methods. +This class is a general purpose guesser that can be used with most topologies, +but being generic makes it the less accurate among all guessers. + + + + + +Classes +------- + +.. autoclass:: DefaultGuesser + :members: + :inherited-members: + +""" +from .base import GuesserBase +import numpy as np +import warnings +import math + +import re + +from ..lib import distances +from . import tables + + +class DefaultGuesser(GuesserBase): + """ + This guesser holds generic methods (not directed to specific contexts) for + guessing different topology attribute. It has the same methods which where + originally found in Topology.guesser.py. The attributes that can be + guessed by this class are: + - masses + - types + - elements + - angles + - dihedrals + - bonds + - improper dihedrals + - aromaticities + + You can use this guesser either directly through an instance, or through + the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method. + + Examples + -------- + to guess bonds for a universe:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import two_water_gro + + u = mda.Universe(two_water_gro, context='default', to_guess=['bonds']) + + .. versionadded:: 2.8.0 + + """ + context = 'default' + + def __init__(self, universe, **kwargs): + super().__init__(universe, **kwargs) + self._guesser_methods = { + 'masses': self.guess_masses, + 'types': self.guess_types, + 'elements': self.guess_types, + 'bonds': self.guess_bonds, + 'angles': self.guess_angles, + 'dihedrals': self.guess_dihedrals, + 'impropers': self.guess_improper_dihedrals, + 'aromaticities': self.guess_aromaticities, + } + + def guess_masses(self, atom_types=None, indices_to_guess=None): + """Guess the mass of many atoms based upon their type. + For guessing masses through :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`: + + First try to guess masses from atom elements, if not available, + try to guess masses from types and if not available, try to guess + types. + + Parameters + ---------- + atom_types : Optional[np.ndarray] + Atom types/elements to guess masses from + indices_to_guess : Optional[np.ndarray] + Mask array for partially guess masses for certain atoms + + Returns + ------- + atom_masses : np.ndarray dtype float64 + + Raises + ------ + :exc:`ValueError` + If there are no atom types or elements to guess mass from. + + """ + if atom_types is None: + try: + atom_types = self._universe.atoms.elements + except AttributeError: + try: + atom_types = self._universe.atoms.types + except AttributeError: + try: + atom_types = self.guess_types( + atom_types=self._universe.atoms.names) + except ValueError: + raise ValueError( + "there is no reference attributes" + " (elements, types, or names)" + " in this universe to guess mass from") + + if indices_to_guess is not None: + atom_types = atom_types[indices_to_guess] + + masses = np.array([self.get_atom_mass(atom) + for atom in atom_types], dtype=np.float64) + return masses + + def get_atom_mass(self, element): + """Return the atomic mass in u for *element*. + Masses are looked up in :data:`MDAnalysis.guesser.tables.masses`. + + .. Warning:: Until version 3.0.0 unknown masses are set to 0.0 + + """ + try: + return tables.masses[element] + except KeyError: + try: + return tables.masses[element.upper()] + except KeyError: + warnings.warn( + "Unknown masses are set to 0.0 for current version, " + "this will be deprecated in version 3.0.0 and replaced by" + " Masse's no_value_label (np.nan)", + PendingDeprecationWarning) + return 0.0 + + def guess_atom_mass(self, atomname): + """Guess a mass based on the atom name. + + :func:`guess_atom_element` is used to determine the kind of atom. + + .. warning:: Until version 3.0.0 anything not recognized is simply + set to 0.0; if you rely on the masses you might want to double-check. + """ + return self.get_atom_mass(self.guess_atom_element(atomname)) + + def guess_types(self, atom_types=None, indices_to_guess=None): + """Guess the atom type of many atoms based on atom name + + Parameters + ---------- + atom_types (optional) + atoms names if types guessing is desired to be from names + indices_to_guess (optional) + Mask array for partially guess types for certain atoms + + Returns + ------- + atom_types : np.ndarray dtype object + + Raises + ------ + :exc:`ValueError` + If there is no names to guess types from. + + """ + if atom_types is None: + try: + atom_types = self._universe.atoms.names + except AttributeError: + raise ValueError( + "there is no reference attributes in this universe" + "to guess types from") + + if indices_to_guess is not None: + atom_types = atom_types[indices_to_guess] + + return np.array([self.guess_atom_element(atom) + for atom in atom_types], dtype=object) + + def guess_atom_element(self, atomname): + """Guess the element of the atom from the name. + + Looks in dict to see if element is found, otherwise it uses the first + character in the atomname. The table comes from CHARMM and AMBER atom + types, where the first character is not sufficient to determine the + atom type. Some GROMOS ions have also been added. + + .. Warning: The translation table is incomplete. + This will probably result in some mistakes, + but it still better than nothing! + + See Also + -------- + :func:`guess_atom_type` + :mod:`MDAnalysis.guesser.tables` + """ + NUMBERS = re.compile(r'[0-9]') # match numbers + SYMBOLS = re.compile(r'[*+-]') # match *, +, - + if atomname == '': + return '' + try: + return tables.atomelements[atomname.upper()] + except KeyError: + # strip symbols and numbers + no_symbols = re.sub(SYMBOLS, '', atomname) + name = re.sub(NUMBERS, '', no_symbols).upper() + + # just in case + if name in tables.atomelements: + return tables.atomelements[name] + + while name: + if name in tables.elements: + return name + if name[:-1] in tables.elements: + return name[:-1] + if name[1:] in tables.elements: + return name[1:] + if len(name) <= 2: + return name[0] + name = name[:-1] # probably element is on left not right + + # if it's numbers + return no_symbols + + def guess_bonds(self, atoms=None, coords=None): + r"""Guess if bonds exist between two atoms based on their distance. + + Bond between two atoms is created, if the two atoms are within + + .. math:: + + d < f \cdot (R_1 + R_2) + + of each other, where :math:`R_1` and :math:`R_2` are the VdW radii + of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is + the `same algorithm that VMD uses`_. + + Parameters + ---------- + atoms : AtomGroup + atoms for which bonds should be guessed + fudge_factor : float, optional + The factor by which atoms must overlap eachother to be considered a + bond. Larger values will increase the number of bonds found. [0.55] + vdwradii : dict, optional + To supply custom vdwradii for atoms in the algorithm. Must be a + dict of format {type:radii}. The default table of van der Waals + radii is hard-coded as :data:`MDAnalysis.guesser.tables.vdwradii`. + Any user defined vdwradii passed as an argument will supercede the + table values. [``None``] + lower_bound : float, optional + The minimum bond length. All bonds found shorter than this length + will be ignored. This is useful for parsing PDB with altloc records + where atoms with altloc A and B maybe very close together and + there should be no chemical bond between them. [0.1] + box : array_like, optional + Bonds are found using a distance search, if unit cell information + is given, periodic boundary conditions will be considered in the + distance search. [``None``] + + Returns + ------- + list + List of tuples suitable for use in Universe topology building. + + Warnings + -------- + No check is done after the bonds are guessed to see if Lewis + structure is correct. This is wrong and will burn somebody. + + Raises + ------ + :exc:`ValueError` + If inputs are malformed or `vdwradii` data is missing. + + + .. _`same algorithm that VMD uses`: + http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html + + """ + if atoms is None: + atoms = self._universe.atoms + + if coords is None: + coords = self._universe.atoms.positions + + if len(atoms) != len(coords): + raise ValueError("'atoms' and 'coord' must be the same length") + + fudge_factor = self._kwargs.get('fudge_factor', 0.55) + + # so I don't permanently change it + vdwradii = tables.vdwradii.copy() + user_vdwradii = self._kwargs.get('vdwradii', None) + # this should make algo use their values over defaults + if user_vdwradii: + vdwradii.update(user_vdwradii) + + # Try using types, then elements + if hasattr(atoms, 'types'): + atomtypes = atoms.types + else: + atomtypes = self.guess_types(atom_types=atoms.names) + + # check that all types have a defined vdw + if not all(val in vdwradii for val in set(atomtypes)): + raise ValueError(("vdw radii for types: " + + ", ".join([t for t in set(atomtypes) if + t not in vdwradii]) + + ". These can be defined manually using the" + + f" keyword 'vdwradii'")) + + lower_bound = self._kwargs.get('lower_bound', 0.1) + + box = self._kwargs.get('box', None) + + if box is not None: + box = np.asarray(box) + + # to speed up checking, calculate what the largest possible bond + # atom that would warrant attention. + # then use this to quickly mask distance results later + max_vdw = max([vdwradii[t] for t in atomtypes]) + + bonds = [] + + pairs, dist = distances.self_capped_distance(coords, + max_cutoff=2.0 * max_vdw, + min_cutoff=lower_bound, + box=box) + for idx, (i, j) in enumerate(pairs): + d = (vdwradii[atomtypes[i]] + + vdwradii[atomtypes[j]]) * fudge_factor + if (dist[idx] < d): + bonds.append((atoms[i].index, atoms[j].index)) + return tuple(bonds) + + def guess_angles(self, bonds=None): + """Given a list of Bonds, find all angles that exist between atoms. + + Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, + then (1,2,3) must be an angle. + + Parameters + ---------- + bonds : Bonds + from which angles should be guessed + + Returns + ------- + list of tuples + List of tuples defining the angles. + Suitable for use in u._topology + + + See Also + -------- + :meth:`guess_bonds` + + """ + from ..core.universe import Universe + + angles_found = set() + + if bonds is None: + if hasattr(self._universe.atoms, 'bonds'): + bonds = self._universe.atoms.bonds + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + bonds = temp_u.atoms.bonds + + for b in bonds: + for atom in b: + other_a = b.partner(atom) # who's my friend currently in Bond + for other_b in atom.bonds: + if other_b != b: # if not the same bond I start as + third_a = other_b.partner(atom) + desc = tuple( + [other_a.index, atom.index, third_a.index]) + # first index always less than last + if desc[0] > desc[-1]: + desc = desc[::-1] + angles_found.add(desc) + + return tuple(angles_found) + + def guess_dihedrals(self, angles=None): + """Given a list of Angles, find all dihedrals that exist between atoms. + + Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, + then (1,2,3,4) must be a dihedral. + + Parameters + ---------- + angles : Angles + from which dihedrals should be guessed + + Returns + ------- + list of tuples + List of tuples defining the dihedrals. + Suitable for use in u._topology + + """ + from ..core.universe import Universe + + if angles is None: + if hasattr(self._universe.atoms, 'angles'): + angles = self._universe.atoms.angles + + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + + temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) + + angles = temp_u.atoms.angles + + dihedrals_found = set() + + for b in angles: + a_tup = tuple([a.index for a in b]) # angle as tuple of numbers + # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) + # search the first and last atom of each angle + for atom, prefix in zip([b.atoms[0], b.atoms[-1]], + [a_tup[::-1], a_tup]): + for other_b in atom.bonds: + if not other_b.partner(atom) in b: + third_a = other_b.partner(atom) + desc = prefix + (third_a.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + def guess_improper_dihedrals(self, angles=None): + """Given a list of Angles, find all improper dihedrals + that exist between atoms. + + Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, + then (2, 1, 3, 4) must be an improper dihedral. + ie the improper dihedral is the angle between the planes formed by + (1, 2, 3) and (1, 3, 4) + + Returns + ------- + List of tuples defining the improper dihedrals. + Suitable for use in u._topology + + """ + + from ..core.universe import Universe + + if angles is None: + if hasattr(self._universe.atoms, 'angles'): + angles = self._universe.atoms.angles + + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + + temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) + + angles = temp_u.atoms.angles + + dihedrals_found = set() + + for b in angles: + atom = b[1] # select middle atom in angle + # start of improper tuple + a_tup = tuple([b[a].index for a in [1, 2, 0]]) + # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) + # search the first and last atom of each angle + for other_b in atom.bonds: + other_atom = other_b.partner(atom) + # if this atom isn't in the angle I started with + if other_atom not in b: + desc = a_tup + (other_atom.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + def guess_atom_charge(self, atoms): + """Guess atom charge from the name. + + .. Warning:: Not implemented; simply returns 0. + """ + # TODO: do something slightly smarter, at least use name/element + return 0.0 + + def guess_aromaticities(self, atomgroup=None): + """Guess aromaticity of atoms using RDKit + + Returns + ------- + aromaticities : numpy.ndarray + Array of boolean values for the aromaticity of each atom + + """ + if atomgroup is None: + atomgroup = self._universe.atoms + + mol = atomgroup.convert_to("RDKIT") + return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + + def guess_gasteiger_charges(self, atomgroup): + """Guess Gasteiger partial charges using RDKit + + Parameters + ---------- + atomgroup : mda.core.groups.AtomGroup + Atoms for which the charges will be guessed + + Returns + ------- + charges : numpy.ndarray + Array of float values representing the charge of each atom + + """ + + mol = atomgroup.convert_to("RDKIT") + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + return np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], + dtype=np.float32) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/guesser/tables.py similarity index 99% rename from package/MDAnalysis/topology/tables.py rename to package/MDAnalysis/guesser/tables.py index ea46421cc4..5e373616b7 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/guesser/tables.py @@ -200,7 +200,7 @@ def kv2dict(s, convertor: Any = str): Bk 247 Be 9.012182 Bi 208.98037 -Bh 262 +Bh 264 B 10.811 BR 79.90400 Cd 112.411 diff --git a/package/MDAnalysis/topology/CRDParser.py b/package/MDAnalysis/topology/CRDParser.py index 05dbe9d429..5e4406732f 100644 --- a/package/MDAnalysis/topology/CRDParser.py +++ b/package/MDAnalysis/topology/CRDParser.py @@ -28,8 +28,7 @@ Read a list of atoms from a CHARMM CARD coordinate file (CRD_) to build a basic topology. Reads atom ids (ATOMNO), atom names (TYPES), resids (RESID), residue numbers (RESNO), residue names (RESNames), segment ids -(SEGID) and tempfactor (Weighting). Atom element and mass are guessed based on -the name of the atom. +(SEGID) and tempfactor (Weighting). Residues are detected through a change is either resid or resname while segments are detected according to changes in segid. @@ -49,13 +48,10 @@ from ..lib.util import openany, FORTRANReader from .base import TopologyReaderBase, change_squash -from . import guessers from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnames, Resnums, @@ -76,9 +72,9 @@ class CRDParser(TopologyReaderBase): - Resnums - Segids - Guesses the following attributes: - - Atomtypes - - Masses + .. versionchanged:: 2.8.0 + Type and mass are not longer guessed here. Until 3.0 these will still be + set by default through through universe.guess_TopologyAttrs() API. """ format = 'CRD' @@ -141,10 +137,6 @@ def parse(self, **kwargs): resnums = np.array(resnums, dtype=np.int32) segids = np.array(segids, dtype=object) - # Guess some attributes - atomtypes = guessers.guess_types(atomnames) - masses = guessers.guess_masses(atomtypes) - atom_residx, (res_resids, res_resnames, res_resnums, res_segids) = change_squash( (resids, resnames), (resids, resnames, resnums, segids)) res_segidx, (seg_segids,) = change_squash( @@ -154,8 +146,6 @@ def parse(self, **kwargs): attrs=[ Atomids(atomids), Atomnames(atomnames), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Tempfactors(tempfactors), Resids(res_resids), Resnames(res_resnames), diff --git a/package/MDAnalysis/topology/DLPolyParser.py b/package/MDAnalysis/topology/DLPolyParser.py index 489e6675bd..b85a0d188c 100644 --- a/package/MDAnalysis/topology/DLPolyParser.py +++ b/package/MDAnalysis/topology/DLPolyParser.py @@ -29,9 +29,6 @@ DLPoly files have the following Attributes: - Atomnames - Atomids -Guesses the following attributes: - - Atomtypes - - Masses .. _Poly: http://www.stfc.ac.uk/SCD/research/app/ccg/software/DL_POLY/44516.aspx @@ -46,14 +43,11 @@ """ import numpy as np -from . import guessers from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -65,6 +59,9 @@ class ConfigParser(TopologyReaderBase): """DL_Poly CONFIG file parser .. versionadded:: 0.10.1 + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = 'CONFIG' @@ -109,14 +106,9 @@ def parse(self, **kwargs): else: ids = np.arange(n_atoms) - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) - attrs = [ Atomnames(names), Atomids(ids), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), @@ -176,14 +168,9 @@ def parse(self, **kwargs): else: ids = np.arange(n_atoms) - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) - attrs = [ Atomnames(names), Atomids(ids), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py index 1240c9b757..f165272fc2 100644 --- a/package/MDAnalysis/topology/DMSParser.py +++ b/package/MDAnalysis/topology/DMSParser.py @@ -44,7 +44,6 @@ import sqlite3 import os -from . import guessers from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( @@ -53,7 +52,6 @@ Bonds, Charges, ChainIDs, - Atomtypes, Masses, Resids, Resnums, @@ -87,12 +85,14 @@ class DMSParser(TopologyReaderBase): - Resids Segment: - Segids - Guesses the following attributes - - Atomtypes .. _DESRES: http://www.deshawresearch.com .. _Desmond: http://www.deshawresearch.com/resources_desmond.html .. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'DMS' @@ -161,7 +161,6 @@ def dict_factory(cursor, row): attrs['bond'] = bondlist attrs['bondorder'] = bondorder - atomtypes = guessers.guess_types(attrs['name']) topattrs = [] # Bundle in Atom level objects for attr, cls in [ @@ -173,7 +172,6 @@ def dict_factory(cursor, row): ('chain', ChainIDs), ]: topattrs.append(cls(attrs[attr])) - topattrs.append(Atomtypes(atomtypes, guessed=True)) # Residues atom_residx, (res_resids, diff --git a/package/MDAnalysis/topology/ExtendedPDBParser.py b/package/MDAnalysis/topology/ExtendedPDBParser.py index 2138d38692..ec6e1e527d 100644 --- a/package/MDAnalysis/topology/ExtendedPDBParser.py +++ b/package/MDAnalysis/topology/ExtendedPDBParser.py @@ -78,8 +78,6 @@ class ExtendedPDBParser(PDBParser.PDBParser): - bonds - formalcharges - Guesses the following Attributes: - - masses See Also -------- diff --git a/package/MDAnalysis/topology/FHIAIMSParser.py b/package/MDAnalysis/topology/FHIAIMSParser.py index 688a3e1626..fcf95691f3 100644 --- a/package/MDAnalysis/topology/FHIAIMSParser.py +++ b/package/MDAnalysis/topology/FHIAIMSParser.py @@ -47,15 +47,12 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, Atomids, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -69,10 +66,9 @@ class FHIAIMSParser(TopologyReaderBase): Creates the following attributes: - Atomnames - Guesses the following attributes: - - Atomtypes - - Masses - + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = ['IN', 'FHIAIMS'] @@ -100,14 +96,8 @@ def parse(self, **kwargs): names = np.asarray(names) natoms = len(names) - # Guessing time - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(names) - attrs = [Atomnames(names), Atomids(np.arange(natoms) + 1), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/GMSParser.py b/package/MDAnalysis/topology/GMSParser.py index 022fb99070..812207ed67 100644 --- a/package/MDAnalysis/topology/GMSParser.py +++ b/package/MDAnalysis/topology/GMSParser.py @@ -48,15 +48,12 @@ import re import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -75,11 +72,12 @@ class GMSParser(TopologyReaderBase): Creates the following Attributes: - names - atomic charges - Guesses: - - types - - masses .. versionadded:: 0.9.1 + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'GMS' @@ -112,15 +110,11 @@ def parse(self, **kwargs): at_charges.append(at_charge) #TODO: may be use coordinates info from _m.group(3-5) ?? - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) n_atoms = len(names) attrs = [ Atomids(np.arange(n_atoms) + 1), Atomnames(np.array(names, dtype=object)), AtomicCharges(np.array(at_charges, dtype=np.int32)), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/GROParser.py b/package/MDAnalysis/topology/GROParser.py index 23fb0af565..6bcaec24cb 100644 --- a/package/MDAnalysis/topology/GROParser.py +++ b/package/MDAnalysis/topology/GROParser.py @@ -28,7 +28,6 @@ Read a list of atoms from a GROMOS/Gromacs GRO coordinate file to build a basic topology. -Atom types and masses are guessed. See Also -------- @@ -49,9 +48,7 @@ from ..lib.util import openany from ..core.topologyattrs import ( Atomnames, - Atomtypes, Atomids, - Masses, Resids, Resnames, Resnums, @@ -59,7 +56,6 @@ ) from ..core.topology import Topology from .base import TopologyReaderBase, change_squash -from . import guessers class GROParser(TopologyReaderBase): @@ -71,9 +67,10 @@ class GROParser(TopologyReaderBase): - atomids - atomnames - Guesses the following attributes - - atomtypes - - masses + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'GRO' @@ -129,9 +126,6 @@ def parse(self, **kwargs): for s in starts: resids[s:] += 100000 - # Guess types and masses - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) residx, (new_resids, new_resnames) = change_squash( (resids, resnames), (resids, resnames)) @@ -141,11 +135,9 @@ def parse(self, **kwargs): attrs = [ Atomnames(names), Atomids(indices), - Atomtypes(atomtypes, guessed=True), Resids(new_resids), Resnums(new_resids.copy()), Resnames(new_resnames), - Masses(masses, guessed=True), Segids(np.array(['SYSTEM'], dtype=object)) ] diff --git a/package/MDAnalysis/topology/GSDParser.py b/package/MDAnalysis/topology/GSDParser.py index f1ae72d287..bd62d0f5f9 100644 --- a/package/MDAnalysis/topology/GSDParser.py +++ b/package/MDAnalysis/topology/GSDParser.py @@ -54,7 +54,6 @@ import os import numpy as np -from . import guessers from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( diff --git a/package/MDAnalysis/topology/HoomdXMLParser.py b/package/MDAnalysis/topology/HoomdXMLParser.py index b8e7baa061..f2d1cea952 100644 --- a/package/MDAnalysis/topology/HoomdXMLParser.py +++ b/package/MDAnalysis/topology/HoomdXMLParser.py @@ -49,7 +49,6 @@ import xml.etree.ElementTree as ET import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index 932430c0a4..d855216027 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -27,7 +27,8 @@ Reads a GROMACS ITP_ or TOP_ file to build the system. The topology will contain atom IDs, segids, residue IDs, residue names, atom names, atom types, -charges, chargegroups, masses (guessed if not found), moltypes, and molnums. +charges, chargegroups, masses, moltypes, and molnums. +Any masses that are in the file will be read; any missing values will be guessed. Bonds, angles, dihedrals and impropers are also read from the file. If an ITP file is passed without a ``[ molecules ]`` directive, passing @@ -106,7 +107,7 @@ u = mda.Universe(ITP_tip5p, EXTRA_ATOMS=True, HW1_CHARGE=3, HW2_CHARGE=3) -These keyword variables are **case-sensitive**. Note that if you set keywords to +These keyword variables are **case-sensitive**. Note that if you set keywords to ``False`` or ``None``, they will be treated as if they are not defined in #ifdef conditions. For example, the universe below will only have 5 atoms. :: @@ -132,8 +133,10 @@ import logging import numpy as np +import warnings from ..lib.util import openany -from . import guessers +from ..guesser.tables import SYMB2Z +from ..guesser.default_guesser import DefaultGuesser from .base import TopologyReaderBase, change_squash, reduce_singular from ..core.topologyattrs import ( Atomids, @@ -152,6 +155,7 @@ Dihedrals, Impropers, AtomAttr, + Elements, ) from ..core.topology import Topology @@ -216,7 +220,7 @@ def define(self, line): except ValueError: _, variable = line.split() value = True - + # kwargs overrides files if variable not in self._original_defines: self.defines[variable] = value @@ -252,7 +256,7 @@ def skip_until_else(self, infile): break else: raise IOError('Missing #endif in {}'.format(self.current_file)) - + def skip_until_endif(self, infile): """Skip lines until #endif""" for line in self.clean_file_lines(infile): @@ -333,9 +337,9 @@ def parse_atoms(self, line): lst.append('') def parse_bonds(self, line): - self.add_param(line, self.bonds, n_funct=2, + self.add_param(line, self.bonds, n_funct=2, funct_values=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) - + def parse_angles(self, line): self.add_param(line, self.angles, n_funct=3, funct_values=(1, 2, 3, 4, 5, 6, 8, 10)) @@ -355,7 +359,7 @@ def parse_settles(self, line): # water molecules. # In ITP files this is defined with only the # oxygen atom index. The next two atoms are - # assumed to be hydrogens. Unlike TPRParser, + # assumed to be hydrogens. Unlike TPRParser, # the manual only lists this format (as of 2019). # These are treated as 2 bonds. # No angle component is included to avoid discrepancies @@ -471,6 +475,12 @@ class ITPParser(TopologyReaderBase): .. versionchanged:: 2.2.0 no longer adds angles for water molecules with SETTLE constraint + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + Added guessed elements if all elements are valid to preserve partial + mass guessing behavior + """ format = 'ITP' @@ -503,7 +513,7 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', self._molecules = [] # for order self.current_mol = None self.parser = self._pass - self.system_molecules = [] + self.system_molecules = [] # Open and check itp validity with openany(self.filename) as itpfile: @@ -523,10 +533,10 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', elif self.current_mol: self.parser = self.current_mol.parsers.get(section, self._pass) - + else: self.parser = self._pass - + else: self.parser(line) @@ -575,13 +585,21 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', if not all(self.masses): empty = self.masses == '' - self.masses[empty] = guessers.guess_masses( - guessers.guess_types(self.types)[empty]) - attrs.append(Masses(np.array(self.masses, dtype=np.float64), - guessed=True)) + self.masses[empty] = Masses.missing_value_label + + attrs.append(Masses(np.array(self.masses, dtype=np.float64), + guessed=False)) + + self.elements = DefaultGuesser(None).guess_types(self.types) + if all(e.capitalize() in SYMB2Z for e in self.elements): + attrs.append(Elements(np.array(self.elements, + dtype=object), guessed=True)) + else: - attrs.append(Masses(np.array(self.masses, dtype=np.float64), - guessed=False)) + warnings.warn("Element information is missing, elements attribute " + "will not be populated. If needed these can be " + "guessed using universe.guess_TopologyAttrs(" + "to_guess=['elements']).") # residue stuff resids = np.array(self.resids, dtype=np.int32) diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 85bf2dec8f..62664b568b 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -81,7 +81,6 @@ import functools import warnings -from . import guessers from ..lib.util import openany, conv_float from ..lib.mdamath import triclinic_box from .base import TopologyReaderBase, squash_by @@ -182,6 +181,10 @@ class as the topology and coordinate reader share many common see :ref:`atom_style_kwarg`. .. versionadded:: 0.9.0 + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'DATA' @@ -252,9 +255,9 @@ def _interpret_atom_style(atom_style): if missing_attrs: raise ValueError("atom_style string missing required field(s): {}" "".format(', '.join(missing_attrs))) - + return style_dict - + def parse(self, **kwargs): """Parses a LAMMPS_ DATA file. @@ -300,7 +303,7 @@ def parse(self, **kwargs): type, sect = self._parse_bond_section(sects[L], nentries, mapping) except KeyError: type, sect = [], [] - + top.add_TopologyAttr(attr(sect, type)) return top @@ -323,7 +326,7 @@ def read_DATA_timestep(self, n_atoms, TS_class, TS_kwargs, self.style_dict = None else: self.style_dict = self._interpret_atom_style(atom_style) - + header, sects = self.grab_datafile() unitcell = self._parse_box(header) @@ -361,7 +364,7 @@ def _parse_pos(self, datalines): style_dict = {'id': 0, 'x': 3, 'y': 4, 'z': 5} else: style_dict = self.style_dict - + for i, line in enumerate(datalines): line = line.split() @@ -520,10 +523,6 @@ def _parse_atoms(self, datalines, massdict=None): for i, at in enumerate(types): masses[i] = massdict[at] attrs.append(Masses(masses)) - else: - # Guess them - masses = guessers.guess_masses(types) - attrs.append(Masses(masses, guessed=True)) residx, resids = squash_by(resids)[:2] n_residues = len(resids) @@ -610,7 +609,7 @@ def parse(self, **kwargs): indices = np.zeros(natoms, dtype=int) types = np.zeros(natoms, dtype=object) - + atomline = fin.readline() # ITEM ATOMS attrs = atomline.split()[2:] # attributes on coordinate line col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids diff --git a/package/MDAnalysis/topology/MMTFParser.py b/package/MDAnalysis/topology/MMTFParser.py index 51bc16c8ac..e9332e9d68 100644 --- a/package/MDAnalysis/topology/MMTFParser.py +++ b/package/MDAnalysis/topology/MMTFParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -38,6 +38,9 @@ .. versionchanged:: 2.0.0 Aliased ``bfactors`` topologyattribute to ``tempfactors``. ``tempfactors`` is deprecated and will be removed in 3.0 (Issue #1901) +.. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). Reads the following topology attributes: @@ -47,7 +50,6 @@ - tempfactor - bonds - charge - - masses (guessed) - name - occupancy - type @@ -76,7 +78,6 @@ from . import base -from . import guessers from ..core.topology import Topology from ..core.topologyattrs import ( AltLocs, @@ -87,7 +88,6 @@ Bonds, Charges, ICodes, - Masses, Occupancies, Resids, Resnames, @@ -188,8 +188,7 @@ def iter_atoms(field): charges = Charges(list(iter_atoms('formalChargeList'))) names = Atomnames(list(iter_atoms('atomNameList'))) types = Atomtypes(list(iter_atoms('elementList'))) - masses = Masses(guessers.guess_masses(types.values), guessed=True) - attrs.extend([charges, names, types, masses]) + attrs.extend([charges, names, types]) #optional are empty list if empty, sometimes arrays if len(mtop.atom_id_list): diff --git a/package/MDAnalysis/topology/MOL2Parser.py b/package/MDAnalysis/topology/MOL2Parser.py index 96d9dbdbd4..4345ca0efe 100644 --- a/package/MDAnalysis/topology/MOL2Parser.py +++ b/package/MDAnalysis/topology/MOL2Parser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -44,7 +44,6 @@ import os import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase, squash_by from ..core.topologyattrs import ( @@ -54,14 +53,13 @@ Bonds, Charges, Elements, - Masses, Resids, Resnums, Resnames, Segids, ) from ..core.topology import Topology -from .tables import SYBYL2SYMB +from ..guesser.tables import SYBYL2SYMB import warnings @@ -79,8 +77,6 @@ class MOL2Parser(TopologyReaderBase): - Bonds - Elements - Guesses the following: - - masses Notes ----- @@ -95,7 +91,7 @@ class MOL2Parser(TopologyReaderBase): 2. If no atoms have ``resname`` field, resnames attribute will not be set; If some atoms have ``resname`` while some do not, :exc:`ValueError` will occur. - + 3. If "NO_CHARGES" shows up in "@MOLECULE" section and no atoms have the ``charge`` field, charges attribute will not be set; If "NO_CHARGES" shows up while ``charge`` field appears, @@ -129,6 +125,10 @@ class MOL2Parser(TopologyReaderBase): Parse elements from atom types. .. versionchanged:: 2.2.0 Read MOL2 files with optional columns omitted. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'MOL2' @@ -235,15 +235,12 @@ def parse(self, **kwargs): f"atoms: {invalid_elements}. " "These have been given an empty element record.") - masses = guessers.guess_masses(validated_elements) - attrs = [] attrs.append(Atomids(np.array(ids, dtype=np.int32))) attrs.append(Atomnames(np.array(names, dtype=object))) attrs.append(Atomtypes(np.array(types, dtype=object))) if has_charges: attrs.append(Charges(np.array(charges, dtype=np.float32))) - attrs.append(Masses(masses, guessed=True)) if not np.all(validated_elements == ''): attrs.append(Elements(validated_elements)) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index e1e08dd04c..bad6d2bc6d 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -39,8 +39,8 @@ .. Note:: - The parser processes atoms and their names. Masses are guessed and set to 0 - if unknown. Partial charges are not set. Elements are parsed if they are + The parser processes atoms and their names. + Partial charges are not set. Elements are parsed if they are valid. If partially missing or incorrect, empty records are assigned. See Also @@ -61,8 +61,7 @@ import numpy as np import warnings -from .guessers import guess_masses, guess_types -from .tables import SYMB2Z +from ..guesser.tables import SYMB2Z from ..lib import util from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -75,7 +74,6 @@ Atomtypes, Elements, ICodes, - Masses, Occupancies, RecordTypes, Resids, @@ -169,9 +167,6 @@ class PDBParser(TopologyReaderBase): - bonds - formalcharges - Guesses the following Attributes: - - masses - See Also -------- :class:`MDAnalysis.coordinates.PDB.PDBReader` @@ -197,6 +192,9 @@ class PDBParser(TopologyReaderBase): .. versionchanged:: 2.5.0 Formal charges will not be populated if an unknown entry is encountered, instead a UserWarning is emitted. + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = ['PDB', 'ENT'] @@ -322,16 +320,8 @@ def _parseatoms(self): (occupancies, Occupancies, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) - # Guessed attributes - # masses from types if they exist # OPT: We do this check twice, maybe could refactor to avoid this - if not any(elements): - atomtypes = guess_types(names) - attrs.append(Atomtypes(atomtypes, guessed=True)) - warnings.warn("Element information is missing, elements attribute " - "will not be populated. If needed these can be " - "guessed using MDAnalysis.topology.guessers.") - else: + if any(elements): # Feed atomtypes as raw element column, but validate elements atomtypes = elements attrs.append(Atomtypes(np.array(elements, dtype=object))) @@ -344,10 +334,16 @@ def _parseatoms(self): wmsg = (f"Unknown element {elem} found for some atoms. " f"These have been given an empty element record. " f"If needed they can be guessed using " - f"MDAnalysis.topology.guessers.") + f"universe.guess_TopologyAttrs(context='default'," + " to_guess=['elements']).") warnings.warn(wmsg) validated_elements.append('') attrs.append(Elements(np.array(validated_elements, dtype=object))) + else: + warnings.warn("Element information is missing, elements attribute " + "will not be populated. If needed these can be" + " guessed using universe.guess_TopologyAttrs(" + "context='default', to_guess=['elements']).") if any(formalcharges): try: @@ -374,9 +370,6 @@ def _parseatoms(self): else: attrs.append(FormalCharges(np.array(formalcharges, dtype=int))) - masses = guess_masses(atomtypes) - attrs.append(Masses(masses, guessed=True)) - # Residue level stuff from here resids = np.array(resids, dtype=np.int32) resnames = np.array(resnames, dtype=object) diff --git a/package/MDAnalysis/topology/PDBQTParser.py b/package/MDAnalysis/topology/PDBQTParser.py index a05ca35267..9764082021 100644 --- a/package/MDAnalysis/topology/PDBQTParser.py +++ b/package/MDAnalysis/topology/PDBQTParser.py @@ -35,7 +35,7 @@ Notes ----- Only reads atoms and their names; connectivity is not -deduced. Masses are guessed and set to 0 if unknown. +deduced. See Also @@ -57,7 +57,6 @@ """ import numpy as np -from . import guessers from ..lib import util from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -68,7 +67,6 @@ Atomtypes, Charges, ICodes, - Masses, Occupancies, RecordTypes, Resids, @@ -97,14 +95,15 @@ class PDBQTParser(TopologyReaderBase): - tempfactors - charges - Guesses the following: - - masses .. versionchanged:: 0.18.0 Added parsing of Record types .. versionchanged:: 2.7.0 Columns 67 - 70 in ATOM records, corresponding to the field *footnote*, are now ignored. See Autodock's `reference`_. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). .. _reference: https://autodock.scripps.edu/wp-content/uploads/sites/56/2021/10/AutoDock4.2.6_UserGuide.pdf @@ -151,8 +150,6 @@ def parse(self, **kwargs): n_atoms = len(serials) - masses = guessers.guess_masses(atomtypes) - attrs = [] for attrlist, Attr, dtype in ( (record_types, RecordTypes, object), @@ -165,7 +162,6 @@ def parse(self, **kwargs): (atomtypes, Atomtypes, object), ): attrs.append(Attr(np.array(attrlist, dtype=dtype))) - attrs.append(Masses(masses, guessed=True)) resids = np.array(resids, dtype=np.int32) icodes = np.array(icodes, dtype=object) diff --git a/package/MDAnalysis/topology/PQRParser.py b/package/MDAnalysis/topology/PQRParser.py index e5f7b6415b..1adcd7fba2 100644 --- a/package/MDAnalysis/topology/PQRParser.py +++ b/package/MDAnalysis/topology/PQRParser.py @@ -49,7 +49,6 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from ..core.topologyattrs import ( Atomids, @@ -57,7 +56,6 @@ Atomtypes, Charges, ICodes, - Masses, Radii, RecordTypes, Resids, @@ -82,9 +80,6 @@ class PQRParser(TopologyReaderBase): - Resnames - Segids - Guesses the following: - - atomtypes (if not present, Gromacs generated PQR files have these) - - masses .. versionchanged:: 0.9.0 Read chainID from a PQR file and use it as segid (before we always used @@ -95,6 +90,10 @@ class PQRParser(TopologyReaderBase): Added parsing of Record types Can now read PQR files from Gromacs, these provide atom type as last column but don't have segids + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'PQR' @@ -191,20 +190,14 @@ def parse(self, **kwargs): n_atoms = len(serials) - if not elements: - atomtypes = guessers.guess_types(names) - guessed_types = True - else: + attrs = [] + if elements: atomtypes = elements - guessed_types = False - masses = guessers.guess_masses(atomtypes) + attrs.append(Atomtypes(atomtypes, False)) - attrs = [] attrs.append(Atomids(np.array(serials, dtype=np.int32))) attrs.append(Atomnames(np.array(names, dtype=object))) attrs.append(Charges(np.array(charges, dtype=np.float32))) - attrs.append(Atomtypes(atomtypes, guessed=guessed_types)) - attrs.append(Masses(masses, guessed=True)) attrs.append(RecordTypes(np.array(record_types, dtype=object))) attrs.append(Radii(np.array(radii, dtype=np.float32))) diff --git a/package/MDAnalysis/topology/PSFParser.py b/package/MDAnalysis/topology/PSFParser.py index 1961d21c7b..70cd38d51f 100644 --- a/package/MDAnalysis/topology/PSFParser.py +++ b/package/MDAnalysis/topology/PSFParser.py @@ -49,7 +49,7 @@ import numpy as np from ..lib.util import openany, atoi -from . import guessers + from .base import TopologyReaderBase, squash_by, change_squash from ..core.topologyattrs import ( Atomids, diff --git a/package/MDAnalysis/topology/TOPParser.py b/package/MDAnalysis/topology/TOPParser.py index 3153357b21..9113750cf9 100644 --- a/package/MDAnalysis/topology/TOPParser.py +++ b/package/MDAnalysis/topology/TOPParser.py @@ -75,7 +75,7 @@ As of version 2.0.0, elements are no longer guessed if ATOMIC_NUMBER records are missing. In those scenarios, if elements are necessary, users will have to invoke the element guessers after parsing the topology file. Please see - :mod:`MDAnalysis.topology.guessers` for more details. + :mod:`MDAnalysis.guessers` for more details. .. _`PARM parameter/topology file specification`: https://ambermd.org/FileFormats.php#topo.cntrl @@ -91,7 +91,7 @@ import numpy as np import itertools -from .tables import Z2SYMB +from ..guesser.tables import Z2SYMB from ..lib.util import openany, FORTRANReader from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -294,14 +294,15 @@ def next_getter(): if 'elements' not in attrs: msg = ("ATOMIC_NUMBER record not found, elements attribute will " "not be populated. If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + "universe.guess_TopologyAttrs(to_guess=['elements']).") logger.warning(msg) warnings.warn(msg) elif np.any(attrs['elements'].values == ""): # only send out one warning that some elements are unknown msg = ("Unknown ATOMIC_NUMBER value found for some atoms, these " "have been given an empty element record. If needed these " - "can be guessed using MDAnalysis.topology.guessers.") + "can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['elements']).") logger.warning(msg) warnings.warn(msg) diff --git a/package/MDAnalysis/topology/TPRParser.py b/package/MDAnalysis/topology/TPRParser.py index d6f3717bf1..396211d071 100644 --- a/package/MDAnalysis/topology/TPRParser.py +++ b/package/MDAnalysis/topology/TPRParser.py @@ -166,7 +166,6 @@ __copyright__ = "GNU Public Licence, v2" -from . import guessers from ..lib.util import openany from .tpr import utils as tpr_utils from .tpr import setting as S diff --git a/package/MDAnalysis/topology/TXYZParser.py b/package/MDAnalysis/topology/TXYZParser.py index 56e6cc59b7..0781488c9d 100644 --- a/package/MDAnalysis/topology/TXYZParser.py +++ b/package/MDAnalysis/topology/TXYZParser.py @@ -47,17 +47,16 @@ import numpy as np import warnings -from . import guessers -from .tables import SYMB2Z +from ..guesser.tables import SYMB2Z from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology +from ..guesser.tables import SYMB2Z from ..core.topologyattrs import ( Atomnames, Atomids, Atomtypes, Bonds, - Masses, Resids, Resnums, Segids, @@ -77,6 +76,10 @@ class TXYZParser(TopologyReaderBase): .. versionadded:: 0.17.0 .. versionchanged:: 2.4.0 Adding the `Element` attribute if all names are valid element symbols. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = ['TXYZ', 'ARC'] @@ -95,6 +98,7 @@ def parse(self, **kwargs): names = np.zeros(natoms, dtype=object) types = np.zeros(natoms, dtype=object) bonds = [] + # Find first atom line, maybe there's box information fline = inf.readline() try: @@ -120,26 +124,23 @@ def parse(self, **kwargs): if i < other_atom: bonds.append((i, other_atom)) - # Guessing time - masses = guessers.guess_masses(names) - attrs = [Atomnames(names), Atomids(atomids), Atomtypes(types), Bonds(tuple(bonds)), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), ] if all(n.capitalize() in SYMB2Z for n in names): attrs.append(Elements(np.array(names, dtype=object))) - + else: warnings.warn("Element information is missing, elements attribute " "will not be populated. If needed these can be " - "guessed using MDAnalysis.topology.guessers.") - + "guessed using universe.guess_TopologyAttrs(" + "to_guess=['elements']).") + top = Topology(natoms, 1, 1, attrs=attrs) diff --git a/package/MDAnalysis/topology/XYZParser.py b/package/MDAnalysis/topology/XYZParser.py index 4162e34351..cb0df129e0 100644 --- a/package/MDAnalysis/topology/XYZParser.py +++ b/package/MDAnalysis/topology/XYZParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -40,15 +40,12 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, Atomids, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -62,14 +59,15 @@ class XYZParser(TopologyReaderBase): Creates the following attributes: - Atomnames - Guesses the following attributes: - - Atomtypes - - Masses .. versionadded:: 0.9.1 .. versionchanged: 1.0.0 Store elements attribute, based on XYZ atom names + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'XYZ' @@ -91,14 +89,9 @@ def parse(self, **kwargs): name = inf.readline().split()[0] names[i] = name - # Guessing time - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(names) attrs = [Atomnames(names), Atomids(np.arange(natoms) + 1), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 32df510f47..b1b756b538 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -33,9 +33,8 @@ As a minimum, all topology parsers will provide atom ids, atom types, masses, resids, resnums and segids as well as assigning all atoms to residues and all residues to segments. For systems without residues and segments, this results -in there being a single residue and segment to which all atoms belong. Often -when data is not provided by a file, it will be guessed based on other data in -the file. In the event that this happens, a UserWarning will always be issued. +in there being a single residue and segment to which all atoms belong. +In the event that this happens, a UserWarning will always be issued. The following table lists the currently supported topology formats along with the attributes they provide. @@ -134,7 +133,7 @@ :mod:`MDAnalysis.topology.XYZParser` TXYZ [#a]_ txyz, names, atomids, Tinker_ XYZ File Parser. Reads atom labels, numbers - arc masses, types, and connectivity; masses are guessed from atoms names. + arc masses, types, and connectivity. bonds :mod:`MDAnalysis.topology.TXYZParser` GAMESS [#a]_ gms, names, GAMESS_ output parser. Read only atoms of assembly diff --git a/package/MDAnalysis/topology/core.py b/package/MDAnalysis/topology/core.py index 91596a0fef..3ed1c7a346 100644 --- a/package/MDAnalysis/topology/core.py +++ b/package/MDAnalysis/topology/core.py @@ -26,11 +26,6 @@ The various topology parsers make use of functions and classes in this module. They are mostly of use to developers. -See Also --------- -:mod:`MDAnalysis.topology.tables` - for some hard-coded atom information that is used by functions such as - :func:`guess_atom_type` and :func:`guess_atom_mass`. """ @@ -40,12 +35,6 @@ from collections import defaultdict # Local imports -from . import tables -from .guessers import ( - guess_atom_element, guess_atom_type, - get_atom_mass, guess_atom_mass, guess_atom_charge, - guess_bonds, guess_angles, guess_dihedrals, guess_improper_dihedrals, -) from ..core._get_readers import get_parser_for from ..lib.util import cached diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py deleted file mode 100644 index a0847036de..0000000000 --- a/package/MDAnalysis/topology/guessers.py +++ /dev/null @@ -1,526 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -""" -Guessing unknown Topology information --- :mod:`MDAnalysis.topology.guessers` -============================================================================= - -In general `guess_atom_X` returns the guessed value for a single value, -while `guess_Xs` will work on an array of many atoms. - - -Example uses of guessers ------------------------- - -Guessing elements from atom names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Currently, it is possible to guess elements from atom names using -:func:`guess_atom_element` (or the synonymous :func:`guess_atom_type`). This can -be done in the following manner:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_atom_element - from MDAnalysisTests.datafiles import PRM7 - - u = mda.Universe(PRM7) - - print(u.atoms.names[1]) # returns the atom name H1 - - element = guess_atom_element(u.atoms.names[1]) - - print(element) # returns element H - -In the above example, we take an atom named H1 and use -:func:`guess_atom_element` to guess the element hydrogen (i.e. H). It is -important to note that element guessing is not always accurate. Indeed in cases -where the atom type is not recognised, we may end up with the wrong element. -For example:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_atom_element - from MDAnalysisTests.datafiles import PRM19SBOPC - - u = mda.Universe(PRM19SBOPC) - - print(u.atoms.names[-1]) # returns the atom name EPW - - element = guess_atom_element(u.atoms.names[-1]) - - print(element) # returns element P - -Here we find that virtual site atom 'EPW' was given the element P, which -would not be an expected result. We therefore always recommend that users -carefully check the outcomes of any guessers. - -In some cases, one may want to guess elements for an entire universe and add -this guess as a topology attribute. This can be done using :func:`guess_types` -in the following manner:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_types - from MDAnalysisTests.datafiles import PRM7 - - u = mda.Universe(PRM7) - - guessed_elements = guess_types(u.atoms.names) - - u.add_TopologyAttr('elements', guessed_elements) - - print(u.atoms.elements) # returns an array of guessed elements - -More information on adding topology attributes can found in the `user guide`_. - - -.. Links - -.. _user guide: https://www.mdanalysis.org/UserGuide/examples/constructing_universe.html#Adding-topology-attributes - -""" -import numpy as np -import warnings -import re - -from ..lib import distances -from . import tables - - -def guess_masses(atom_types): - """Guess the mass of many atoms based upon their type - - Parameters - ---------- - atom_types - Type of each atom - - Returns - ------- - atom_masses : np.ndarray dtype float64 - """ - validate_atom_types(atom_types) - masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64) - return masses - - -def validate_atom_types(atom_types): - """Vaildates the atom types based on whether they are available in our tables - - Parameters - ---------- - atom_types - Type of each atom - - Returns - ------- - None - - .. versionchanged:: 0.20.0 - Try uppercase atom type name as well - """ - for atom_type in np.unique(atom_types): - try: - tables.masses[atom_type] - except KeyError: - try: - tables.masses[atom_type.upper()] - except KeyError: - warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type)) - - -def guess_types(atom_names): - """Guess the atom type of many atoms based on atom name - - Parameters - ---------- - atom_names - Name of each atom - - Returns - ------- - atom_types : np.ndarray dtype object - """ - return np.array([guess_atom_element(name) for name in atom_names], dtype=object) - - -def guess_atom_type(atomname): - """Guess atom type from the name. - - At the moment, this function simply returns the element, as - guessed by :func:`guess_atom_element`. - - - See Also - -------- - :func:`guess_atom_element` - :mod:`MDAnalysis.topology.tables` - - - """ - return guess_atom_element(atomname) - - -NUMBERS = re.compile(r'[0-9]') # match numbers -SYMBOLS = re.compile(r'[*+-]') # match *, +, - - -def guess_atom_element(atomname): - """Guess the element of the atom from the name. - - Looks in dict to see if element is found, otherwise it uses the first - character in the atomname. The table comes from CHARMM and AMBER atom - types, where the first character is not sufficient to determine the atom - type. Some GROMOS ions have also been added. - - .. Warning: The translation table is incomplete. This will probably result - in some mistakes, but it still better than nothing! - - See Also - -------- - :func:`guess_atom_type` - :mod:`MDAnalysis.topology.tables` - """ - if atomname == '': - return '' - try: - return tables.atomelements[atomname.upper()] - except KeyError: - # strip symbols - no_symbols = re.sub(SYMBOLS, '', atomname) - - # split name by numbers - no_numbers = re.split(NUMBERS, no_symbols) - no_numbers = list(filter(None, no_numbers)) #remove '' - # if no_numbers is not empty, use the first element of no_numbers - name = no_numbers[0].upper() if no_numbers else '' - - # just in case - if name in tables.atomelements: - return tables.atomelements[name] - - while name: - if name in tables.elements: - return name - if name[:-1] in tables.elements: - return name[:-1] - if name[1:] in tables.elements: - return name[1:] - if len(name) <= 2: - return name[0] - name = name[:-1] # probably element is on left not right - - # if it's numbers - return no_symbols - - -def guess_bonds(atoms, coords, box=None, **kwargs): - r"""Guess if bonds exist between two atoms based on their distance. - - Bond between two atoms is created, if the two atoms are within - - .. math:: - - d < f \cdot (R_1 + R_2) - - of each other, where :math:`R_1` and :math:`R_2` are the VdW radii - of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is - the `same algorithm that VMD uses`_. - - Parameters - ---------- - atoms : AtomGroup - atoms for which bonds should be guessed - coords : array - coordinates of the atoms (i.e., `AtomGroup.positions)`) - fudge_factor : float, optional - The factor by which atoms must overlap eachother to be considered a - bond. Larger values will increase the number of bonds found. [0.55] - vdwradii : dict, optional - To supply custom vdwradii for atoms in the algorithm. Must be a dict - of format {type:radii}. The default table of van der Waals radii is - hard-coded as :data:`MDAnalysis.topology.tables.vdwradii`. Any user - defined vdwradii passed as an argument will supercede the table - values. [``None``] - lower_bound : float, optional - The minimum bond length. All bonds found shorter than this length will - be ignored. This is useful for parsing PDB with altloc records where - atoms with altloc A and B maybe very close together and there should be - no chemical bond between them. [0.1] - box : array_like, optional - Bonds are found using a distance search, if unit cell information is - given, periodic boundary conditions will be considered in the distance - search. [``None``] - - Returns - ------- - list - List of tuples suitable for use in Universe topology building. - - Warnings - -------- - No check is done after the bonds are guessed to see if Lewis - structure is correct. This is wrong and will burn somebody. - - Raises - ------ - :exc:`ValueError` if inputs are malformed or `vdwradii` data is missing. - - - .. _`same algorithm that VMD uses`: - http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html - - .. versionadded:: 0.7.7 - .. versionchanged:: 0.9.0 - Updated method internally to use more :mod:`numpy`, should work - faster. Should also use less memory, previously scaled as - :math:`O(n^2)`. *vdwradii* argument now augments table list - rather than replacing entirely. - """ - # why not just use atom.positions? - if len(atoms) != len(coords): - raise ValueError("'atoms' and 'coord' must be the same length") - - fudge_factor = kwargs.get('fudge_factor', 0.55) - - vdwradii = tables.vdwradii.copy() # so I don't permanently change it - user_vdwradii = kwargs.get('vdwradii', None) - if user_vdwradii: # this should make algo use their values over defaults - vdwradii.update(user_vdwradii) - - # Try using types, then elements - atomtypes = atoms.types - - # check that all types have a defined vdw - if not all(val in vdwradii for val in set(atomtypes)): - raise ValueError(("vdw radii for types: " + - ", ".join([t for t in set(atomtypes) if - not t in vdwradii]) + - ". These can be defined manually using the" + - " keyword 'vdwradii'")) - - lower_bound = kwargs.get('lower_bound', 0.1) - - if box is not None: - box = np.asarray(box) - - # to speed up checking, calculate what the largest possible bond - # atom that would warrant attention. - # then use this to quickly mask distance results later - max_vdw = max([vdwradii[t] for t in atomtypes]) - - bonds = [] - - pairs, dist = distances.self_capped_distance(coords, - max_cutoff=2.0*max_vdw, - min_cutoff=lower_bound, - box=box) - for idx, (i, j) in enumerate(pairs): - d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor - if (dist[idx] < d): - bonds.append((atoms[i].index, atoms[j].index)) - return tuple(bonds) - - -def guess_angles(bonds): - """Given a list of Bonds, find all angles that exist between atoms. - - Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, - then (1,2,3) must be an angle. - - Returns - ------- - list of tuples - List of tuples defining the angles. - Suitable for use in u._topology - - - See Also - -------- - :meth:`guess_bonds` - - - .. versionadded 0.9.0 - """ - angles_found = set() - - for b in bonds: - for atom in b: - other_a = b.partner(atom) # who's my friend currently in Bond - for other_b in atom.bonds: - if other_b != b: # if not the same bond I start as - third_a = other_b.partner(atom) - desc = tuple([other_a.index, atom.index, third_a.index]) - if desc[0] > desc[-1]: # first index always less than last - desc = desc[::-1] - angles_found.add(desc) - - return tuple(angles_found) - - -def guess_dihedrals(angles): - """Given a list of Angles, find all dihedrals that exist between atoms. - - Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, - then (1,2,3,4) must be a dihedral. - - Returns - ------- - list of tuples - List of tuples defining the dihedrals. - Suitable for use in u._topology - - .. versionadded 0.9.0 - """ - dihedrals_found = set() - - for b in angles: - a_tup = tuple([a.index for a in b]) # angle as tuple of numbers - # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) - # search the first and last atom of each angle - for atom, prefix in zip([b.atoms[0], b.atoms[-1]], - [a_tup[::-1], a_tup]): - for other_b in atom.bonds: - if not other_b.partner(atom) in b: - third_a = other_b.partner(atom) - desc = prefix + (third_a.index,) - if desc[0] > desc[-1]: - desc = desc[::-1] - dihedrals_found.add(desc) - - return tuple(dihedrals_found) - - -def guess_improper_dihedrals(angles): - """Given a list of Angles, find all improper dihedrals that exist between - atoms. - - Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, - then (2, 1, 3, 4) must be an improper dihedral. - ie the improper dihedral is the angle between the planes formed by - (1, 2, 3) and (1, 3, 4) - - Returns - ------- - List of tuples defining the improper dihedrals. - Suitable for use in u._topology - - .. versionadded 0.9.0 - """ - dihedrals_found = set() - - for b in angles: - atom = b[1] # select middle atom in angle - # start of improper tuple - a_tup = tuple([b[a].index for a in [1, 2, 0]]) - # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) - # search the first and last atom of each angle - for other_b in atom.bonds: - other_atom = other_b.partner(atom) - # if this atom isn't in the angle I started with - if not other_atom in b: - desc = a_tup + (other_atom.index,) - if desc[0] > desc[-1]: - desc = desc[::-1] - dihedrals_found.add(desc) - - return tuple(dihedrals_found) - - -def get_atom_mass(element): - """Return the atomic mass in u for *element*. - - Masses are looked up in :data:`MDAnalysis.topology.tables.masses`. - - .. Warning:: Unknown masses are set to 0.0 - - .. versionchanged:: 0.20.0 - Try uppercase atom type name as well - """ - try: - return tables.masses[element] - except KeyError: - try: - return tables.masses[element.upper()] - except KeyError: - return 0.0 - - -def guess_atom_mass(atomname): - """Guess a mass based on the atom name. - - :func:`guess_atom_element` is used to determine the kind of atom. - - .. warning:: Anything not recognized is simply set to 0; if you rely on the - masses you might want to double check. - """ - return get_atom_mass(guess_atom_element(atomname)) - - -def guess_atom_charge(atomname): - """Guess atom charge from the name. - - .. Warning:: Not implemented; simply returns 0. - """ - # TODO: do something slightly smarter, at least use name/element - return 0.0 - - -def guess_aromaticities(atomgroup): - """Guess aromaticity of atoms using RDKit - - Parameters - ---------- - atomgroup : mda.core.groups.AtomGroup - Atoms for which the aromaticity will be guessed - - Returns - ------- - aromaticities : numpy.ndarray - Array of boolean values for the aromaticity of each atom - - - .. versionadded:: 2.0.0 - """ - mol = atomgroup.convert_to("RDKIT") - return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) - - -def guess_gasteiger_charges(atomgroup): - """Guess Gasteiger partial charges using RDKit - - Parameters - ---------- - atomgroup : mda.core.groups.AtomGroup - Atoms for which the charges will be guessed - - Returns - ------- - charges : numpy.ndarray - Array of float values representing the charge of each atom - - - .. versionadded:: 2.0.0 - """ - mol = atomgroup.convert_to("RDKIT") - from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges - ComputeGasteigerCharges(mol, throwOnParamFailure=True) - return np.array([atom.GetDoubleProp("_GasteigerCharge") - for atom in mol.GetAtoms()], - dtype=np.float32) diff --git a/package/MDAnalysis/topology/tpr/obj.py b/package/MDAnalysis/topology/tpr/obj.py index 0524d77c6e..5f5040c7db 100644 --- a/package/MDAnalysis/topology/tpr/obj.py +++ b/package/MDAnalysis/topology/tpr/obj.py @@ -32,7 +32,7 @@ """ from collections import namedtuple -from ..tables import Z2SYMB +from ...guesser.tables import Z2SYMB TpxHeader = namedtuple( "TpxHeader", [ diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst new file mode 100644 index 0000000000..7747fdc380 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst @@ -0,0 +1,61 @@ +.. Contains the formatted docstrings from the guesser modules located in 'mdanalysis/package/MDAnalysis/guesser' + +************************** +Guesser modules +************************** +This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.Universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose +is to be tailored guesser classes that target specific file format or force field (eg. PDB file format, or Martini forcefield). +Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that doesn't fit all topologies. + +Example uses of guessers +------------------------ + +Guessing using :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` as following:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import PDB + + u = mda.Universe(PDB) + print(hasattr(u.atoms, 'elements')) # returns False + u.guess_TopologyAttrs(to_guess=['elements']) + print(u.atoms.elements) # print ['N' 'H' 'H' ... 'NA' 'NA' 'NA'] + +In the above example, we passed ``elements`` as the attribute we want to guess, and +:meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` guess then add it as a topology +attribute to the ``AtomGroup`` of the universe. + +If the attribute already exist in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. +To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of the to_guess one as following:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import PRM12 +  + u = mda.Universe(PRM12, context='default', to_guess=()) # types ['HW', 'OW', ..] + + u.guess_TopologyAttrs(force_guess=['types']) # types ['H', 'O', ..] + +N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` + +.. rubric:: available guessers +.. toctree:: + :maxdepth: 1 + + guesser_modules/init + guesser_modules/default_guesser + + +.. rubric:: guesser core modules + +The remaining pages are primarily of interest to developers as they +contain functions and classes that are used in the implementation of +the context-specific guessers. + +.. toctree:: + :maxdepth: 1 + + guesser_modules/base + guesser_modules/tables diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst new file mode 100644 index 0000000000..cfba20f17b --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.base diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst new file mode 100644 index 0000000000..a3f3f89715 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.default_guesser diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst new file mode 100644 index 0000000000..6fa6449c5c --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.__init__ diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst new file mode 100644 index 0000000000..6116739fe8 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.tables diff --git a/package/doc/sphinx/source/documentation_pages/topology/guessers.rst b/package/doc/sphinx/source/documentation_pages/topology/guessers.rst deleted file mode 100644 index e6449f5ddc..0000000000 --- a/package/doc/sphinx/source/documentation_pages/topology/guessers.rst +++ /dev/null @@ -1,2 +0,0 @@ -.. automodule:: MDAnalysis.topology.guessers - :members: diff --git a/package/doc/sphinx/source/documentation_pages/topology/tables.rst b/package/doc/sphinx/source/documentation_pages/topology/tables.rst deleted file mode 100644 index f4d579ec9c..0000000000 --- a/package/doc/sphinx/source/documentation_pages/topology/tables.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: MDAnalysis.topology.tables diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index ed8caba8ce..01f3ab32e2 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -28,10 +28,10 @@ topology file format in the *topology_format* keyword argument to topology/CRDParser topology/DLPolyParser topology/DMSParser - topology/FHIAIMSParser + topology/FHIAIMSParser topology/GMSParser topology/GROParser - topology/GSDParser + topology/GSDParser topology/HoomdXMLParser topology/ITPParser topology/LAMMPSParser @@ -59,6 +59,4 @@ the topology readers. topology/base topology/core - topology/guessers - topology/tables topology/tpr_util diff --git a/package/doc/sphinx/source/index.rst b/package/doc/sphinx/source/index.rst index e6171630c2..29e800d6a5 100644 --- a/package/doc/sphinx/source/index.rst +++ b/package/doc/sphinx/source/index.rst @@ -82,9 +82,9 @@ can be installed either with conda_ or pip_. conda ----- -First installation with conda_: +First installation with conda_: -.. code-block:: bash +.. code-block:: bash conda config --add channels conda-forge conda install mdanalysis @@ -93,7 +93,7 @@ which will automatically install a *full set of dependencies*. To upgrade later: -.. code-block:: bash +.. code-block:: bash conda update mdanalysis @@ -102,14 +102,14 @@ pip Installation with `pip`_ and a *minimal set of dependencies*: -.. code-block:: bash +.. code-block:: bash pip install --upgrade MDAnalysis To install with a *full set of dependencies* (which includes everything needed for :mod:`MDAnalysis.analysis`), add the ``[analysis]`` tag: -.. code-block:: bash +.. code-block:: bash pip install --upgrade MDAnalysis[analysis] @@ -121,7 +121,7 @@ If you want to `run the tests`_ or use example files to follow some of the examples in the documentation or the tutorials_, also install the ``MDAnalysisTests`` package: -.. code-block:: bash +.. code-block:: bash conda install mdanalysistests # with conda pip install --upgrade MDAnalysisTests # with pip @@ -187,12 +187,13 @@ Thank you! :caption: Documentation :numbered: :hidden: - + ./documentation_pages/overview ./documentation_pages/topology ./documentation_pages/selections ./documentation_pages/analysis_modules ./documentation_pages/topology_modules + ./documentation_pages/guesser_modules ./documentation_pages/coordinates_modules ./documentation_pages/converters ./documentation_pages/trajectory_transformations @@ -205,7 +206,7 @@ Thank you! ./documentation_pages/units ./documentation_pages/exceptions ./documentation_pages/references - + Indices and tables ================== @@ -213,4 +214,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/testsuite/MDAnalysisTests/analysis/test_base.py b/testsuite/MDAnalysisTests/analysis/test_base.py index 345e0dc671..68b86fc943 100644 --- a/testsuite/MDAnalysisTests/analysis/test_base.py +++ b/testsuite/MDAnalysisTests/analysis/test_base.py @@ -276,7 +276,7 @@ def test_parallelizable_transformations(): # pick any transformation that would allow # for parallelizable attribute from MDAnalysis.transformations import NoJump - u = mda.Universe(XTC) + u = mda.Universe(XTC, to_guess=()) u.trajectory.add_transformations(NoJump()) # test that serial works diff --git a/testsuite/MDAnalysisTests/analysis/test_dielectric.py b/testsuite/MDAnalysisTests/analysis/test_dielectric.py index ac3de34b65..21992cf5d5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_dielectric.py +++ b/testsuite/MDAnalysisTests/analysis/test_dielectric.py @@ -57,7 +57,7 @@ def test_temperature(self, ag): assert_allclose(eps.results['eps_mean'], 9.621, rtol=1e-03) def test_non_charges(self): - u = mda.Universe(DCD_TRICLINIC) + u = mda.Universe(DCD_TRICLINIC, to_guess=()) with pytest.raises(NoDataError, match="No charges defined given atomgroup."): DielectricConstant(u.atoms).run() diff --git a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py index 5b67ca5924..d3d923294f 100644 --- a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py @@ -53,9 +53,15 @@ class OpenMMTopologyBase(ParserBase): "bonds", "chainIDs", "elements", + "types" ] expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format="OPENMMTOPOLOGY") @@ -130,6 +136,14 @@ def test_masses(self, top): else: assert top.masses.values == [] + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format="OPENMMTOPOLOGY") + u_guessed_attrs = [attr.attrname for attr + in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs + class OpenMMAppTopologyBase(OpenMMTopologyBase): parser = mda.converters.OpenMMParser.OpenMMAppTopologyParser @@ -142,14 +156,25 @@ class OpenMMAppTopologyBase(OpenMMTopologyBase): "bonds", "chainIDs", "elements", + "types" ] expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format="OPENMMAPP") assert isinstance(u, mda.Universe) + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format="OPENMMAPP") + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class TestOpenMMTopologyParser(OpenMMTopologyBase): ref_filename = app.PDBFile(CONECT).topology @@ -171,10 +196,13 @@ def test_with_partial_elements(self): wmsg1 = ("Element information missing for some atoms. " "These have been given an empty element record ") - wmsg2 = ("For absent elements, atomtype has been " - "set to 'X' and mass has been set to 0.0. " - "If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + wmsg2 = ( + "For absent elements, atomtype has been " + "set to 'X' and mass has been set to 0.0. " + "If needed these can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['masses', 'types']). " + "(for MDAnalysis version 2.x this is done automatically," + " but it will be removed in 3.0).") with pytest.warns(UserWarning) as warnings: mda_top = self.parser(self.ref_filename).parse() @@ -182,6 +210,8 @@ def test_with_partial_elements(self): assert mda_top.types.values[3388] == 'X' assert mda_top.elements.values[3344] == '' assert mda_top.elements.values[3388] == '' + assert mda_top.masses.values[3344] == 0.0 + assert mda_top.masses.values[3388] == 0.0 assert len(warnings) == 2 assert str(warnings[0].message) == wmsg1 @@ -194,14 +224,20 @@ def test_no_elements_warn(): for a in omm_top.atoms(): a.element = None - wmsg = ("Element information is missing for all the atoms. " - "Elements attribute will not be populated. " - "Atomtype attribute will be guessed using atom " - "name and mass will be guessed using atomtype." - "See MDAnalysis.topology.guessers.") - - with pytest.warns(UserWarning, match=wmsg): + wmsg = ( + "Element information is missing for all the atoms. " + "Elements attribute will not be populated. " + "Atomtype attribute will be guessed using atom " + "name and mass will be guessed using atomtype." + "For MDAnalysis version 2.x this is done automatically, " + "but it will be removed in MDAnalysis v3.0. " + "These can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['masses', 'types']) " + "See MDAnalysis.guessers.") + + with pytest.warns(UserWarning) as warnings: mda_top = parser(omm_top).parse() + assert str(warnings[0].message) == wmsg def test_invalid_element_symbols(): diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py index 85860162f7..59c15af4c1 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py @@ -26,9 +26,10 @@ from io import StringIO import MDAnalysis as mda +from MDAnalysis.guesser.default_guesser import DefaultGuesser + import numpy as np import pytest -from MDAnalysis.topology.guessers import guess_atom_element from MDAnalysisTests.datafiles import GRO, PDB_full, PDB_helix, mol2_molecule from MDAnalysisTests.util import import_not_available from numpy.testing import assert_allclose, assert_equal @@ -146,7 +147,8 @@ def pdb(self): def mol2(self): u = mda.Universe(mol2_molecule) # add elements - elements = np.array([guess_atom_element(x) for x in u.atoms.types], + guesser = DefaultGuesser(None) + elements = np.array([guesser.guess_atom_element(x) for x in u.atoms.types], dtype=object) u.add_TopologyAttr('elements', elements) return u @@ -154,7 +156,7 @@ def mol2(self): @pytest.fixture def peptide(self): u = mda.Universe(GRO) - elements = mda.topology.guessers.guess_types(u.atoms.names) + elements = mda.guesser.DefaultGuesser(None).guess_types(u.atoms.names) u.add_TopologyAttr('elements', elements) return u.select_atoms("resid 2-12") diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py index e54672902d..e376ff09e3 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py @@ -39,16 +39,19 @@ class RDKitParserBase(ParserBase): parser = mda.converters.RDKitParser.RDKitParser expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', - 'segids', - 'bonds', - ] - + 'resids', 'resnums', 'chiralities', 'segids', 'bonds', + ] + expected_n_atoms = 0 expected_n_residues = 1 expected_n_segments = 1 expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): u = mda.Universe(filename, format='RDKIT') assert isinstance(u, mda.Universe) @@ -56,11 +59,18 @@ def test_creates_universe(self, filename): def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='RDKIT') + u_guessed_attrs = [a.attrname for a in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs + class TestRDKitParserMOL2(RDKitParserBase): ref_filename = mol2_molecule - expected_attrs = RDKitParserBase.expected_attrs + ['charges'] + expected_attrs = RDKitParserBase.expected_attrs + ['charges', 'types'] expected_n_atoms = 49 expected_n_residues = 1 @@ -138,6 +148,10 @@ def test_aromaticity(self, top, filename): atom.GetIsAromatic() for atom in filename.GetAtoms()]) assert_equal(expected, top.aromaticities.values) + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['N.am', 'S.o2', + 'N.am', 'N.am', 'O.2', 'O.2', 'C.3']) class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix @@ -145,7 +159,6 @@ class TestRDKitParserPDB(RDKitParserBase): expected_attrs = RDKitParserBase.expected_attrs + [ 'resnames', 'altLocs', 'chainIDs', 'occupancies', 'icodes', 'tempfactors'] - guessed_attrs = ['types'] expected_n_atoms = 137 expected_n_residues = 13 @@ -165,12 +178,14 @@ def test_partial_residueinfo_raise_error(self, filename): mh = Chem.AddHs(mol, addResidueInfo=True) mda.Universe(mh) + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['N', 'H', 'C', 'H', 'C', 'H', 'H']) + class TestRDKitParserSMILES(RDKitParserBase): ref_filename = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" - guessed_attrs = ['types'] - expected_n_atoms = 24 expected_n_residues = 1 expected_n_segments = 1 @@ -186,8 +201,6 @@ def filename(self): class TestRDKitParserSDF(RDKitParserBase): ref_filename = SDF_molecule - guessed_attrs = ['types'] - expected_n_atoms = 49 expected_n_residues = 1 expected_n_segments = 1 @@ -200,3 +213,7 @@ def filename(self): def test_bond_orders(self, top, filename): expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected + + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['CA', 'C', 'C', 'C', 'C', 'C', 'O']) diff --git a/testsuite/MDAnalysisTests/coordinates/base.py b/testsuite/MDAnalysisTests/coordinates/base.py index 3de8cfb9ff..39770e1460 100644 --- a/testsuite/MDAnalysisTests/coordinates/base.py +++ b/testsuite/MDAnalysisTests/coordinates/base.py @@ -504,7 +504,7 @@ def test_timeseries_values(self, reader, slice): @pytest.mark.parametrize('asel', ("index 1", "index 2", "index 1 to 3")) def test_timeseries_asel_shape(self, reader, asel): - atoms = mda.Universe(reader.filename).select_atoms(asel) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(asel) timeseries = reader.timeseries(atoms, order='fac') assert(timeseries.shape[0] == len(reader)) assert(timeseries.shape[1] == len(atoms)) @@ -513,28 +513,28 @@ def test_timeseries_asel_shape(self, reader, asel): def test_timeseries_empty_asel(self, reader): with pytest.warns(UserWarning, match="Empty string to select atoms, empty group returned."): - atoms = mda.Universe(reader.filename).select_atoms(None) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(None) with pytest.raises(ValueError, match="Timeseries requires at least"): reader.timeseries(asel=atoms) def test_timeseries_empty_atomgroup(self, reader): with pytest.warns(UserWarning, match="Empty string to select atoms, empty group returned."): - atoms = mda.Universe(reader.filename).select_atoms(None) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(None) with pytest.raises(ValueError, match="Timeseries requires at least"): reader.timeseries(atomgroup=atoms) def test_timeseries_asel_warns_deprecation(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") with pytest.warns(DeprecationWarning, match="asel argument to"): timeseries = reader.timeseries(asel=atoms, order='fac') def test_timeseries_atomgroup(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") timeseries = reader.timeseries(atomgroup=atoms, order='fac') def test_timeseries_atomgroup_asel_mutex(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") with pytest.raises(ValueError, match="Cannot provide both"): timeseries = reader.timeseries(atomgroup=atoms, asel=atoms, order='fac') diff --git a/testsuite/MDAnalysisTests/coordinates/test_chainreader.py b/testsuite/MDAnalysisTests/coordinates/test_chainreader.py index 2049b68727..9f81b8c325 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_chainreader.py +++ b/testsuite/MDAnalysisTests/coordinates/test_chainreader.py @@ -52,12 +52,12 @@ def transformed(ref): return mda.Universe(PSF, [DCD, CRD, DCD, CRD, DCD, CRD, CRD], transformations=[translate([10,10,10])]) - + def test_regular_repr(self): u = mda.Universe(PSF, [DCD, CRD, DCD]) assert_equal("", u.trajectory.__repr__()) - - + + def test_truncated_repr(self, universe): assert_equal("", universe.trajectory.__repr__()) @@ -135,8 +135,8 @@ def test_write_dcd(self, universe, tmpdir): ts_new._pos, self.prec, err_msg="Coordinates disagree at frame {0:d}".format( - ts_orig.frame)) - + ts_orig.frame)) + def test_transform_iteration(self, universe, transformed): vector = np.float32([10,10,10]) # # Are the transformations applied and @@ -151,7 +151,7 @@ def test_transform_iteration(self, universe, transformed): frame = ts.frame ref = universe.trajectory[frame].positions + vector assert_almost_equal(ts.positions, ref, decimal = 6) - + def test_transform_slice(self, universe, transformed): vector = np.float32([10,10,10]) # what happens when we slice the trajectory? @@ -159,7 +159,7 @@ def test_transform_slice(self, universe, transformed): frame = ts.frame ref = universe.trajectory[frame].positions + vector assert_almost_equal(ts.positions, ref, decimal = 6) - + def test_transform_switch(self, universe, transformed): vector = np.float32([10,10,10]) # grab a frame: @@ -170,7 +170,7 @@ def test_transform_switch(self, universe, transformed): assert_almost_equal(transformed.trajectory[10].positions, newref, decimal = 6) # what happens when we comeback to the previous frame? assert_almost_equal(transformed.trajectory[2].positions, ref, decimal = 6) - + def test_transfrom_rewind(self, universe, transformed): vector = np.float32([10,10,10]) ref = universe.trajectory[0].positions + vector @@ -221,13 +221,13 @@ def test_set_all_format_lammps(self): assert_equal(time_values, np.arange(11)) def test_set_format_tuples_and_format(self): - universe = mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), + universe = mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), (TRR, 'trr')], format='gro') assert universe.trajectory.n_frames == 23 assert_equal(universe.trajectory.filenames, [PDB, GRO, GRO, XTC, TRR]) - + with pytest.raises(TypeError) as errinfo: - mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), + mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), (TRR, 'trr')], format='pdb') assert 'Unable to read' in str(errinfo.value) @@ -268,7 +268,7 @@ def build_trajectories(folder, sequences, fmt='xtc'): fnames = [] for index, subseq in enumerate(sequences): coords = np.zeros((len(subseq), 1, 3), dtype=np.float32) + index - u = mda.Universe(utop._topology, coords) + u = mda.Universe(utop._topology, coords, to_guess=()) out_traj = mda.Writer(template.format(index), n_atoms=len(u.atoms)) fnames.append(out_traj.filename) with out_traj: @@ -320,7 +320,7 @@ def __init__(self, seq, n_frames, order): def test_order(self, seq_info, tmpdir, fmt): folder = str(tmpdir) utop, fnames = build_trajectories(folder, sequences=seq_info.seq, fmt=fmt) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert u.trajectory.n_frames == seq_info.n_frames for i, ts in enumerate(u.trajectory): assert_almost_equal(i, ts.time, decimal=4) @@ -331,14 +331,14 @@ def test_start_frames(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert_equal(u.trajectory._start_frames, [0, 2, 4]) def test_missing(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [5, 6, 7, 8, 9]) utop, fnames = build_trajectories(folder, sequences=sequences,) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert u.trajectory.n_frames == 9 def test_warning(self, tmpdir): @@ -347,7 +347,7 @@ def test_warning(self, tmpdir): sequences = ([0, 1, 2, 3], [5, 6, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.warns(UserWarning): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_interleaving_error(self, tmpdir): folder = str(tmpdir) @@ -355,7 +355,7 @@ def test_interleaving_error(self, tmpdir): sequences = ([0, 2, 4, 6], [1, 3, 5, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.raises(RuntimeError): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_easy_trigger_warning(self, tmpdir): folder = str(tmpdir) @@ -372,14 +372,14 @@ def test_easy_trigger_warning(self, tmpdir): warnings.filterwarnings( action='ignore', category=ImportWarning) - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_single_frames(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [5, ]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.raises(RuntimeError): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_mixed_filetypes(self): with pytest.raises(ValueError): @@ -388,9 +388,9 @@ def test_mixed_filetypes(self): def test_unsupported_filetypes(self): with pytest.raises(NotImplementedError): mda.Universe(PSF, [DCD, DCD], continuous=True) - # see issue 2353. The PDB reader has multiple format endings. To ensure - # the not implemented error is thrown we do a check here. A more - # careful test in the future would be a dummy reader with multiple + # see issue 2353. The PDB reader has multiple format endings. To ensure + # the not implemented error is thrown we do a check here. A more + # careful test in the future would be a dummy reader with multiple # formats, just in case PDB will allow continuous reading in the future. with pytest.raises(ValueError): mda.Universe(PDB, [PDB, XTC], continuous=True) diff --git a/testsuite/MDAnalysisTests/coordinates/test_h5md.py b/testsuite/MDAnalysisTests/coordinates/test_h5md.py index d3fec51f81..76e80a2a46 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_h5md.py +++ b/testsuite/MDAnalysisTests/coordinates/test_h5md.py @@ -449,7 +449,7 @@ def test_parse_n_atoms(self, h5md_file, outfile, group1, group2): except KeyError: continue - u = mda.Universe(outfile) + u = mda.Universe(outfile, to_guess=()) assert_equal(u.atoms.n_atoms, n_atoms_in_dset) def test_parse_n_atoms_error(self, h5md_file, outfile): diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index c009682421..7e365dad51 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -492,7 +492,7 @@ def test_scale_factor_coordinates(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 2.0 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.positions[0], expected, self.prec) @@ -502,7 +502,7 @@ def test_scale_factor_velocities(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 3.0 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.velocities[0], expected, self.prec) @@ -512,7 +512,7 @@ def test_scale_factor_forces(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 10.0 * 4.184 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.forces[0], expected, self.prec) @@ -526,7 +526,7 @@ def test_scale_factor_box(self, tmpdir, mutation, expected): params = self.gen_params(keypair=mutation, restart=False) with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.dimensions, expected, self.prec) @@ -599,7 +599,7 @@ def test_ioerror(self, tmpdir): with tmpdir.as_cwd(): self.create_ncdf(params) with pytest.raises(IOError): - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) u.trajectory.close() u.trajectory[-1] diff --git a/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py b/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py index 2d0228fcd5..423a255cc4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py +++ b/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py @@ -119,7 +119,7 @@ def test_iter(self, ts): def test_repr(self, ts): assert_equal(type(repr(ts)), str) - + def test_repr_with_box(self, ts): assert("with unit cell dimensions" in repr(ts)) @@ -698,7 +698,7 @@ def test_dt(self, universe): def test_atomgroup_dims_access(uni): uni_args, uni_kwargs = uni # check that AtomGroup.dimensions always returns a copy - u = mda.Universe(*uni_args, **uni_kwargs) + u = mda.Universe(*uni_args, **uni_kwargs, to_guess=()) ag = u.atoms[:10] diff --git a/testsuite/MDAnalysisTests/coordinates/test_trz.py b/testsuite/MDAnalysisTests/coordinates/test_trz.py index e0803aa797..e0f20f961a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_trz.py +++ b/testsuite/MDAnalysisTests/coordinates/test_trz.py @@ -162,7 +162,7 @@ def test_read_zero_box(self, tmpdir): with mda.Writer(outfile, n_atoms=10) as w: w.write(u) - u2 = mda.Universe(outfile, n_atoms=10) + u2 = mda.Universe(outfile, n_atoms=10, to_guess=()) assert u2.dimensions is None diff --git a/testsuite/MDAnalysisTests/core/test_atomgroup.py b/testsuite/MDAnalysisTests/core/test_atomgroup.py index 4456362c49..fe51cf2407 100644 --- a/testsuite/MDAnalysisTests/core/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/core/test_atomgroup.py @@ -129,7 +129,7 @@ def test_write_frames(self, u, tmpdir, frames): ref_positions = np.stack([ts.positions.copy() for ts in selection]) u.atoms.write(destination, frames=frames) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) @@ -145,7 +145,7 @@ def test_write_frame_iterator(self, u, tmpdir, frames): ref_positions = np.stack([ts.positions.copy() for ts in selection]) u.atoms.write(destination, frames=selection) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) @@ -155,7 +155,7 @@ def test_write_frame_iterator(self, u, tmpdir, frames): def test_write_frame_none(self, u, tmpdir, extension, compression): destination = str(tmpdir / 'test.' + extension + compression) u.atoms.write(destination, frames=None) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions for ts in u_new.trajectory]) # Most format only save 3 decimals; XTC even has only 2. assert_array_almost_equal(u.atoms.positions[None, ...], @@ -165,7 +165,8 @@ def test_write_frame_none(self, u, tmpdir, extension, compression): def test_write_frames_all(self, u, tmpdir, compression): destination = str(tmpdir / 'test.dcd' + compression) u.atoms.write(destination, frames='all') - u_new = mda.Universe(destination) + + u_new = mda.Universe(destination, to_guess=()) ref_positions = np.stack([ts.positions.copy() for ts in u.trajectory]) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) diff --git a/testsuite/MDAnalysisTests/core/test_topologyattrs.py b/testsuite/MDAnalysisTests/core/test_topologyattrs.py index a0c1acdf4b..5489c381af 100644 --- a/testsuite/MDAnalysisTests/core/test_topologyattrs.py +++ b/testsuite/MDAnalysisTests/core/test_topologyattrs.py @@ -82,7 +82,7 @@ def attr(self, top): @pytest.fixture() def universe(self, top): - return mda.Universe(top) + return mda.Universe(top, to_guess=()) def test_len(self, attr): assert len(attr) == len(attr.values) @@ -197,6 +197,17 @@ def test_next_emptyresidue(self, u): assert groupsize == 0 assert groupsize == len(u.residues[[]].atoms) + def test_missing_values(self, attr): + assert_equal(attr.are_values_missing(self.values), np.array( + [False, False, False, False, False, False, + False, False, False, False])) + + def test_missing_value_label(self): + self.attrclass.missing_value_label = 'FOO' + values = np.array(['NA', 'C', 'N', 'FOO']) + assert_equal(self.attrclass.are_values_missing(values), + np.array([False, False, False, True])) + class AggregationMixin(TestAtomAttr): def test_get_residues(self, attr): @@ -216,6 +227,10 @@ def test_get_segment(self, attr): class TestMasses(AggregationMixin): attrclass = tpattrs.Masses + def test_missing_masses(self): + values = [1., 2., np.nan, 3.] + assert_equal(self.attrclass.are_values_missing(values), + np.array([False, False, True, False])) class TestCharges(AggregationMixin): values = np.array([+2, -1, 0, -1, +1, +2, 0, 0, 0, -1]) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 80259df5e7..3e41a38a96 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -47,7 +47,7 @@ GRO, TRR, two_water_gro, two_water_gro_nonames, TRZ, TRZ_psf, - PDB, MMTF, + PDB, MMTF, CONECT, ) import MDAnalysis as mda @@ -239,7 +239,7 @@ def test_from_bad_smiles(self): def test_no_Hs(self): smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" - u = mda.Universe.from_smiles(smi, addHs=False, + u = mda.Universe.from_smiles(smi, addHs=False, generate_coordinates=False, format='RDKIT') assert u.atoms.n_atoms == 14 assert len(u.bonds.indices) == 15 @@ -261,7 +261,7 @@ def test_generate_coordinates_numConfs(self): def test_rdkit_kwargs(self): # test for bad kwarg: # Unfortunately, exceptions from Boost cannot be passed to python, - # we cannot `from Boost.Python import ArgumentError` and use it with + # we cannot `from Boost.Python import ArgumentError` and use it with # pytest.raises(ArgumentError), so "this is the way" try: u = mda.Universe.from_smiles("CCO", rdkit_kwargs=dict(abc=42)) @@ -274,7 +274,7 @@ def test_rdkit_kwargs(self): u1 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=42)) u2 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=51)) with pytest.raises(AssertionError) as e: - assert_equal(u1.trajectory.coordinate_array, + assert_equal(u1.trajectory.coordinate_array, u2.trajectory.coordinate_array) assert "Mismatched elements: 15 / 15 (100%)" in str(e.value) @@ -385,6 +385,41 @@ def test_list(self): ref = translate([10,10,10])(uref.trajectory.ts) assert_almost_equal(u.trajectory.ts.positions, ref, decimal=6) + +class TestGuessTopologyAttrs(object): + def test_automatic_type_and_mass_guessing(self): + u = mda.Universe(PDB_small) + assert_equal(len(u.atoms.masses), 3341) + assert_equal(len(u.atoms.types), 3341) + + def test_no_type_and_mass_guessing(self): + u = mda.Universe(PDB_small, to_guess=()) + assert not hasattr(u.atoms, 'masses') + assert not hasattr(u.atoms, 'types') + + def test_invalid_context(self): + u = mda.Universe(PDB_small) + with pytest.raises(KeyError): + u.guess_TopologyAttrs(context='trash', to_guess=['masses']) + + def test_invalid_attributes(self): + u = mda.Universe(PDB_small) + with pytest.raises(ValueError): + u.guess_TopologyAttrs(to_guess=['trash']) + + def test_guess_masses_before_types(self): + u = mda.Universe(PDB_small, to_guess=('masses', 'types')) + assert_equal(len(u.atoms.masses), 3341) + assert_equal(len(u.atoms.types), 3341) + + def test_guessing_read_attributes(self): + u = mda.Universe(PSF) + old_types = u.atoms.types + u.guess_TopologyAttrs(force_guess=['types']) + with pytest.raises(AssertionError): + assert_equal(old_types, u.atoms.types) + + class TestGuessMasses(object): """Tests the Mass Guesser in topology.guessers """ @@ -433,7 +468,7 @@ def test_universe_guess_bonds_no_vdwradii(self): def test_universe_guess_bonds_with_vdwradii(self, vdw): """Unknown atom types, but with vdw radii here to save the day""" u = mda.Universe(two_water_gro_nonames, guess_bonds=True, - vdwradii=vdw) + vdwradii=vdw) self._check_universe(u) assert u.kwargs['guess_bonds'] assert_equal(vdw, u.kwargs['vdwradii']) @@ -450,7 +485,7 @@ def test_universe_guess_bonds_arguments(self): are being passed correctly. """ u = mda.Universe(two_water_gro, guess_bonds=True) - + self._check_universe(u) assert u.kwargs["guess_bonds"] assert u.kwargs["fudge_factor"] @@ -516,6 +551,17 @@ def test_guess_bonds_periodicity(self): self._check_atomgroup(ag, u) + def guess_bonds_with_to_guess(self): + u = mda.Universe(two_water_gro) + has_bonds = hasattr(u.atoms, 'bonds') + u.guess_TopologyAttrs(to_guess=['bonds']) + assert not has_bonds + assert u.atoms.bonds + + def test_guess_read_bonds(self): + u = mda.Universe(CONECT) + assert len(u.bonds) == 72 + class TestInMemoryUniverse(object): def test_reader_w_timeseries(self): @@ -747,10 +793,14 @@ def test_add_connection(self, universe, attr, values): ('impropers', [(1, 2, 3)]), ) ) - def add_connection_error(self, universe, attr, values): + def test_add_connection_error(self, universe, attr, values): with pytest.raises(ValueError): universe.add_TopologyAttr(attr, values) + def test_add_attr_length_error(self, universe): + with pytest.raises(ValueError): + universe.add_TopologyAttr('masses', np.array([1, 2, 3], dtype=np.float64)) + class TestDelTopologyAttr(object): @pytest.fixture() @@ -827,7 +877,7 @@ def potatoes(self): return "potoooooooo" transplants["Universe"].append(("potatoes", potatoes)) - + universe.add_TopologyAttr("tubers") assert universe.potatoes() == "potoooooooo" universe.del_TopologyAttr("tubers") @@ -1375,6 +1425,6 @@ def test_only_top(self): with pytest.warns(UserWarning, match="No coordinate reader found for"): - u = mda.Universe(t) + u = mda.Universe(t, to_guess=()) assert len(u.atoms) == 10 diff --git a/testsuite/MDAnalysisTests/guesser/test_base.py b/testsuite/MDAnalysisTests/guesser/test_base.py new file mode 100644 index 0000000000..b429826647 --- /dev/null +++ b/testsuite/MDAnalysisTests/guesser/test_base.py @@ -0,0 +1,102 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest +import numpy as np +import MDAnalysis as mda +from MDAnalysis.guesser.base import GuesserBase, get_guesser +from MDAnalysis.core.topology import Topology +from MDAnalysis.core.topologyattrs import Masses, Atomnames, Atomtypes +import MDAnalysis.tests.datafiles as datafiles +from numpy.testing import assert_allclose, assert_equal + + +class TesttBaseGuesser(): + + def test_get_guesser(self): + class TestGuesser1(GuesserBase): + context = 'test1' + + class TestGuesser2(GuesserBase): + context = 'test2' + + assert get_guesser(TestGuesser1).context == 'test1' + assert get_guesser('test1').context == 'test1' + assert get_guesser(TestGuesser2()).context == 'test2' + + def test_get_guesser_with_universe(self): + class TestGuesser1(GuesserBase): + context = 'test1' + + u = mda.Universe.empty(n_atoms=5) + guesser = get_guesser(TestGuesser1(), u, foo=1) + + assert len(guesser._universe.atoms) == 5 + assert 'foo' in guesser._kwargs + + def test_guess_invalid_attribute(self): + with pytest.raises(ValueError, + match='default guesser can not guess ' + 'the following attribute: foo'): + mda.Universe(datafiles.PDB, to_guess=['foo']) + + def test_guess_attribute_with_missing_parent_attr(self): + names = Atomnames(np.array(['C', 'HB', 'HA', 'O'], dtype=object)) + masses = Masses( + np.array([np.nan, np.nan, np.nan, np.nan], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[names, masses, ]) + u = mda.Universe(top, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [12.01100, 1.00800, 1.00800, 15.99900]), atol=0) + + def test_force_guessing(self): + names = Atomnames(np.array(['C', 'H', 'H', 'O'], dtype=object)) + types = Atomtypes(np.array(['1', '2', '3', '4'], dtype=object)) + top = Topology(4, 1, 1, attrs=[names, types, ]) + u = mda.Universe(top, force_guess=['types']) + assert_equal(u.atoms.types, ['C', 'H', 'H', 'O']) + + def test_partial_guessing(self): + types = Atomtypes(np.array(['C', 'H', 'H', 'O'], dtype=object)) + masses = Masses(np.array([0, np.nan, np.nan, 0], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[types, masses, ]) + u = mda.Universe(top, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [0, 1.00800, 1.00800, 0]), atol=0) + + def test_force_guess_priority(self): + "check that passing the attribute to force_guess have higher power" + types = Atomtypes(np.array(['C', 'H', 'H', 'O'], dtype=object)) + masses = Masses(np.array([0, np.nan, np.nan, 0], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[types, masses, ]) + u = mda.Universe(top, to_guess=['masses'], force_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [12.01100, 1.00800, 1.00800, 15.99900]), atol=0) + + def test_partial_guess_attr_with_unknown_no_value_label(self): + "trying to partially guess attribute tha doesn't have declared" + "no_value_label should gives no effect" + names = Atomnames(np.array(['C', 'H', 'H', 'O'], dtype=object)) + types = Atomtypes(np.array(['', '', '', ''], dtype=object)) + top = Topology(4, 1, 1, attrs=[names, types, ]) + u = mda.Universe(top, to_guess=['types']) + assert_equal(u.atoms.types, ['', '', '', '']) diff --git a/testsuite/MDAnalysisTests/guesser/test_default_guesser.py b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py new file mode 100644 index 0000000000..2aacb28d4f --- /dev/null +++ b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py @@ -0,0 +1,302 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest +from pytest import approx +import MDAnalysis as mda + +from numpy.testing import assert_equal, assert_allclose +import numpy as np +from MDAnalysis.core.topologyattrs import Angles, Atomtypes, Atomnames, Masses +from MDAnalysis.guesser.default_guesser import DefaultGuesser +from MDAnalysis.core.topology import Topology +from MDAnalysisTests import make_Universe +from MDAnalysisTests.core.test_fragments import make_starshape +import MDAnalysis.tests.datafiles as datafiles +from MDAnalysisTests.util import import_not_available + +try: + from rdkit import Chem + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges +except ImportError: + pass + +requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), + reason="requires RDKit") + + +@pytest.fixture +def default_guesser(): + return DefaultGuesser(None) + + +class TestGuessMasses(object): + def test_guess_masses_from_universe(self): + topology = Topology(3, attrs=[Atomtypes(['C', 'C', 'H'])]) + u = mda.Universe(topology) + + assert isinstance(u.atoms.masses, np.ndarray) + assert_allclose(u.atoms.masses, np.array( + [12.011, 12.011, 1.008]), atol=0) + + def test_guess_masses_from_guesser_object(self, default_guesser): + elements = ['H', 'Ca', 'Am'] + values = np.array([1.008, 40.08000, 243.0]) + assert_allclose(default_guesser.guess_masses( + elements), values, atol=0) + + def test_guess_masses_warn(self): + topology = Topology(2, attrs=[Atomtypes(['X', 'Z'])]) + msg = "Unknown masses are set to 0.0 for current version, " + "this will be depracated in version 3.0.0 and replaced by" + " Masse's no_value_label (np.nan)" + with pytest.warns(PendingDeprecationWarning, match=msg): + u = mda.Universe(topology, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array([0.0, 0.0]), atol=0) + + @pytest.mark.parametrize('element, value', (('H', 1.008), ('XYZ', 0.0),)) + def test_get_atom_mass(self, element, value, default_guesser): + default_guesser.get_atom_mass(element) == approx(value) + + def test_guess_atom_mass(self, default_guesser): + assert default_guesser.guess_atom_mass('1H') == approx(1.008) + + def test_guess_masses_with_no_reference_elements(self): + u = mda.Universe.empty(3) + with pytest.raises(ValueError, + match=('there is no reference attributes ')): + u.guess_TopologyAttrs('default', ['masses']) + + +class TestGuessTypes(object): + + def test_guess_types(self): + topology = Topology(2, attrs=[Atomnames(['MG2+', 'C12'])]) + u = mda.Universe(topology, to_guess=['types']) + assert isinstance(u.atoms.types, np.ndarray) + assert_equal(u.atoms.types, np.array(['MG', 'C'], dtype=object)) + + def test_guess_atom_element(self, default_guesser): + assert default_guesser.guess_atom_element('MG2+') == 'MG' + + def test_guess_atom_element_empty(self, default_guesser): + assert default_guesser.guess_atom_element('') == '' + + def test_guess_atom_element_singledigit(self, default_guesser): + assert default_guesser.guess_atom_element('1') == '1' + + def test_guess_atom_element_1H(self, default_guesser): + assert default_guesser.guess_atom_element('1H') == 'H' + assert default_guesser.guess_atom_element('2H') == 'H' + + def test_partial_guess_elements(self, default_guesser): + names = np.array(['BR123', 'Hk', 'C12'], dtype=object) + elements = np.array(['BR', 'C'], dtype=object) + guessed_elements = default_guesser.guess_types( + atom_types=names, indices_to_guess=[True, False, True]) + assert_equal(elements, guessed_elements) + + def test_guess_elements_from_no_data(self): + top = Topology(5) + msg = "there is no reference attributes in this universe" + "to guess types from" + with pytest.raises(ValueError, match=(msg)): + mda.Universe(top, to_guess=['types']) + + @pytest.mark.parametrize('name, element', ( + ('AO5*', 'O'), + ('F-', 'F'), + ('HB1', 'H'), + ('OC2', 'O'), + ('1he2', 'H'), + ('3hg2', 'H'), + ('OH-', 'O'), + ('HO', 'H'), + ('he', 'H'), + ('zn', 'ZN'), + ('Ca2+', 'CA'), + ('CA', 'C'), + )) + def test_guess_element_from_name(self, name, element, default_guesser): + assert default_guesser.guess_atom_element(name) == element + + +def test_guess_charge(default_guesser): + # this always returns 0.0 + assert default_guesser.guess_atom_charge('this') == approx(0.0) + + +def test_guess_bonds_Error(): + u = make_Universe(trajectory=True) + msg = "This Universe does not contain name information" + with pytest.raises(ValueError, match=msg): + u.guess_TopologyAttrs(to_guess=['bonds']) + + +def test_guess_bond_vdw_error(): + u = mda.Universe(datafiles.PDB) + with pytest.raises(ValueError, match="vdw radii for types: DUMMY"): + DefaultGuesser(u).guess_bonds(u.atoms) + + +def test_guess_bond_coord_error(default_guesser): + msg = "atoms' and 'coord' must be the same length" + with pytest.raises(ValueError, match=msg): + default_guesser.guess_bonds(['N', 'O', 'C'], [[1, 2, 3]]) + + +def test_guess_angles_with_no_bonds(): + "Test guessing angles for atoms with no bonds" + " information without adding bonds to universe " + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['angles']) + assert hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def test_guess_impropers(default_guesser): + u = make_starshape() + + ag = u.atoms[:5] + guessed_angles = default_guesser.guess_angles(ag.bonds) + u.add_TopologyAttr(Angles(guessed_angles)) + + vals = default_guesser.guess_improper_dihedrals(ag.angles) + assert_equal(len(vals), 12) + + +def test_guess_dihedrals_with_no_angles(): + "Test guessing dihedrals for atoms with no angles " + "information without adding bonds or angles to universe" + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['dihedrals']) + assert hasattr(u, 'dihedrals') + assert not hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def test_guess_impropers_with_angles(): + "Test guessing impropers for atoms with angles " + "and bonds information " + u = mda.Universe(datafiles.two_water_gro, + to_guess=['bonds', 'angles', 'impropers']) + u.guess_TopologyAttrs(to_guess=['impropers']) + assert hasattr(u, 'impropers') + assert hasattr(u, 'angles') + assert hasattr(u, 'bonds') + + +def test_guess_impropers_with_no_angles(): + "Test guessing impropers for atoms with no angles " + "information without adding bonds or angles to universe" + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['impropers']) + assert hasattr(u, 'impropers') + assert not hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def bond_sort(arr): + # sort from low to high, also within a tuple + # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) + out = [] + for (i, j) in arr: + if i > j: + i, j = j, i + out.append((i, j)) + return sorted(out) + + +def test_guess_bonds_water(): + u = mda.Universe(datafiles.two_water_gro) + bonds = bond_sort(DefaultGuesser( + None, box=u.dimensions).guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(bonds, ((0, 1), + (0, 2), + (3, 4), + (3, 5))) + + +def test_guess_bonds_adk(): + u = mda.Universe(datafiles.PSF, datafiles.DCD) + u.guess_TopologyAttrs(force_guess=['types']) + guesser = DefaultGuesser(None) + bonds = bond_sort(guesser.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + + +def test_guess_bonds_peptide(): + u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) + u.guess_TopologyAttrs(force_guess=['types']) + guesser = DefaultGuesser(None) + bonds = bond_sort(guesser.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_aromaticities(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + expected = np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + u = mda.Universe(mol) + guesser = DefaultGuesser(None) + values = guesser.guess_aromaticities(u.atoms) + u.guess_TopologyAttrs(to_guess=['aromaticities']) + assert_equal(values, expected) + assert_equal(u.atoms.aromaticities, expected) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_gasteiger_charges(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + expected = np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], dtype=np.float32) + u = mda.Universe(mol) + guesser = DefaultGuesser(None) + values = guesser.guess_gasteiger_charges(u.atoms) + assert_equal(values, expected) + + +@requires_rdkit +def test_aromaticity(): + u = mda.Universe(datafiles.PDB_small, + to_guess=['elements', 'aromaticities']) + c_aromatic = u.select_atoms('resname PHE and name CD1') + assert_equal(c_aromatic.aromaticities[0], True) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index 6b29aec6a5..e92ff80bf1 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -90,7 +90,7 @@ def u(request): if len(request.param) == 1: f = request.param[0] - return mda.Universe(f) + return mda.Universe(f, to_guess=()) else: top, trj = request.param return mda.Universe(top, trj) diff --git a/testsuite/MDAnalysisTests/topology/base.py b/testsuite/MDAnalysisTests/topology/base.py index 7833f0a51e..142e7954ab 100644 --- a/testsuite/MDAnalysisTests/topology/base.py +++ b/testsuite/MDAnalysisTests/topology/base.py @@ -25,8 +25,7 @@ import MDAnalysis as mda from MDAnalysis.core.topology import Topology -mandatory_attrs = ['ids', 'masses', 'types', - 'resids', 'resnums', 'segids'] +mandatory_attrs = ['ids', 'resids', 'resnums', 'segids'] class ParserBase(object): @@ -62,25 +61,16 @@ def test_mandatory_attributes(self, top): def test_expected_attributes(self, top): # Extra attributes as declared in specific implementations - for attr in self.expected_attrs+self.guessed_attrs: + for attr in self.expected_attrs: assert hasattr(top, attr), 'Missing expected attribute: {}'.format(attr) - + def test_no_unexpected_attributes(self, top): attrs = set(self.expected_attrs - + self.guessed_attrs + mandatory_attrs - + ['indices', 'resindices', 'segindices']) + + ['indices', 'resindices', 'segindices'] + self.guessed_attrs) for attr in top.attrs: assert attr.attrname in attrs, 'Unexpected attribute: {}'.format(attr.attrname) - def test_guessed_attributes(self, top): - # guessed attributes must be declared as guessed - for attr in top.attrs: - val = attr.is_guessed - if not val in (True, False): # only for simple yes/no cases - continue - assert val == (attr.attrname in self.guessed_attrs), 'Attr "{}" guessed= {}'.format(attr, val) - def test_size(self, top): """Check that the Topology is correctly sized""" assert top.n_atoms == self.expected_n_atoms, '{} atoms read, {} expected in {}'.format( @@ -100,3 +90,13 @@ def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename) assert isinstance(u, mda.Universe) + + def test_guessed_attributes(self, filename): + """check that the universe created with certain parser have the same + guessed attributes as when it was guessed inside the parser""" + u = mda.Universe(filename) + u_guessed_attrs = [attr.attrname for attr + in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs diff --git a/testsuite/MDAnalysisTests/topology/test_crd.py b/testsuite/MDAnalysisTests/topology/test_crd.py index 846bff496f..3062ba12f3 100644 --- a/testsuite/MDAnalysisTests/topology/test_crd.py +++ b/testsuite/MDAnalysisTests/topology/test_crd.py @@ -27,6 +27,8 @@ CRD, ) +from numpy.testing import assert_allclose + class TestCRDParser(ParserBase): parser = mda.topology.CRDParser.CRDParser @@ -35,6 +37,17 @@ class TestCRDParser(ParserBase): 'resids', 'resnames', 'resnums', 'segids'] guessed_attrs = ['masses', 'types'] + expected_n_atoms = 3341 expected_n_residues = 214 expected_n_segments = 1 + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert (u.atoms.types[:7] == expected).all() diff --git a/testsuite/MDAnalysisTests/topology/test_dlpoly.py b/testsuite/MDAnalysisTests/topology/test_dlpoly.py index 01ea6632b1..a21f7134ca 100644 --- a/testsuite/MDAnalysisTests/topology/test_dlpoly.py +++ b/testsuite/MDAnalysisTests/topology/test_dlpoly.py @@ -20,7 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import pytest import MDAnalysis as mda @@ -43,14 +43,30 @@ def test_creates_universe(self, filename): u = mda.Universe(filename, topology_format=self.format) assert isinstance(u, mda.Universe) + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format=self.format) + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class DLPBase2(DLPUniverse): expected_attrs = ['ids', 'names'] - guessed_attrs = ['types', 'masses'] + guessed_attrs = ['masses', 'types'] + expected_n_atoms = 216 expected_n_residues = 1 expected_n_segments = 1 + def test_guesssed_masses(self, filename): + u = mda.Universe(filename, topology_format=self.format) + assert_allclose(u.atoms.masses[0], 39.102) + assert_allclose(u.atoms.masses[4], 35.45) + + def test_guessed_types(self, filename): + u = mda.Universe(filename, topology_format=self.format) + assert u.atoms.types[0] == 'K' + assert u.atoms.types[4] == 'CL' + def test_names(self, top): assert top.names.values[0] == 'K+' assert top.names.values[4] == 'Cl-' @@ -70,7 +86,6 @@ class TestDLPConfigParser(DLPBase2): class DLPBase(DLPUniverse): expected_attrs = ['ids', 'names'] - guessed_attrs = ['types', 'masses'] expected_n_atoms = 3 expected_n_residues = 1 expected_n_segments = 1 diff --git a/testsuite/MDAnalysisTests/topology/test_dms.py b/testsuite/MDAnalysisTests/topology/test_dms.py index 2740c392d2..d9f7944aaa 100644 --- a/testsuite/MDAnalysisTests/topology/test_dms.py +++ b/testsuite/MDAnalysisTests/topology/test_dms.py @@ -21,7 +21,6 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import MDAnalysis as mda - from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import DMS_DOMAINS, DMS_NO_SEGID @@ -62,6 +61,11 @@ def test_atomsels(self, filename): s5 = u.select_atoms("resname ALA") assert len(s5) == 190 + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert (u.atoms.types[:7] == expected).all() + class TestDMSParserNoSegid(TestDMSParser): ref_filename = DMS_NO_SEGID diff --git a/testsuite/MDAnalysisTests/topology/test_fhiaims.py b/testsuite/MDAnalysisTests/topology/test_fhiaims.py index 4596ccfa2b..3909747387 100644 --- a/testsuite/MDAnalysisTests/topology/test_fhiaims.py +++ b/testsuite/MDAnalysisTests/topology/test_fhiaims.py @@ -20,13 +20,14 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal, assert_almost_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import FHIAIMS + class TestFHIAIMS(ParserBase): parser = mda.topology.FHIAIMSParser.FHIAIMSParser expected_attrs = ['names', 'elements'] @@ -40,15 +41,17 @@ def test_names(self, top): assert_equal(top.names.values, ['O', 'H', 'H', 'O', 'H', 'H']) - def test_types(self, top): - assert_equal(top.types.values, + def test_guessed_types(self, filename): + u = mda.Universe(filename) + assert_equal(u.atoms.types, ['O', 'H', 'H', 'O', 'H', 'H']) + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses, + [15.999, 1.008, 1.008, 15.999, + 1.008, 1.008]) + def test_elements(self, top): assert_equal(top.elements.values, ['O', 'H', 'H', 'O', 'H', 'H']) - - def test_masses(self, top): - assert_almost_equal(top.masses.values, - [15.999, 1.008, 1.008, 15.999, - 1.008, 1.008]) diff --git a/testsuite/MDAnalysisTests/topology/test_gms.py b/testsuite/MDAnalysisTests/topology/test_gms.py index 87b1795b35..cb187adad3 100644 --- a/testsuite/MDAnalysisTests/topology/test_gms.py +++ b/testsuite/MDAnalysisTests/topology/test_gms.py @@ -20,7 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda @@ -52,6 +52,16 @@ def test_types(self, top): assert_equal(top.atomiccharges.values, [8, 1, 1, 8, 1, 1]) + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [15.999, 1.008, 1.008, 15.999, 1.008, 1.008] + assert_allclose(u.atoms.masses, expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['O', 'H', 'H', 'O', 'H', 'H'] + assert (u.atoms.types == expected).all() + class TestGMSSYMOPT(GMSBase): expected_n_atoms = 4 diff --git a/testsuite/MDAnalysisTests/topology/test_gro.py b/testsuite/MDAnalysisTests/topology/test_gro.py index 6a457cecb8..f95deea52b 100644 --- a/testsuite/MDAnalysisTests/topology/test_gro.py +++ b/testsuite/MDAnalysisTests/topology/test_gro.py @@ -34,13 +34,13 @@ GRO_residwrap_0base, GRO_sameresid_diffresname, ) -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose class TestGROParser(ParserBase): parser = mda.topology.GROParser.GROParser ref_filename = GRO - expected_attrs = ['ids', 'names', 'resids', 'resnames', 'masses'] + expected_attrs = ['ids', 'names', 'resids', 'resnames'] guessed_attrs = ['masses', 'types'] expected_n_atoms = 47681 expected_n_residues = 11302 @@ -52,6 +52,16 @@ def test_attr_size(self, top): assert len(top.resids) == top.n_residues assert len(top.resnames) == top.n_residues + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert_equal(u.atoms.types[:7], expected) + class TestGROWideBox(object): """Tests for Issue #548""" @@ -75,6 +85,7 @@ def test_parse_missing_atomname_IOerror(): with pytest.raises(IOError): p.parse() + class TestGroResidWrapping(object): # resid is 5 digit field, so is limited to 100k # check that parser recognises when resids have wrapped diff --git a/testsuite/MDAnalysisTests/topology/test_gsd.py b/testsuite/MDAnalysisTests/topology/test_gsd.py index 41c3cb8b81..b80df4ede1 100644 --- a/testsuite/MDAnalysisTests/topology/test_gsd.py +++ b/testsuite/MDAnalysisTests/topology/test_gsd.py @@ -28,7 +28,6 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import GSD from MDAnalysisTests.datafiles import GSD_bonds -from numpy.testing import assert_equal import os @@ -36,19 +35,19 @@ class GSDBase(ParserBase): parser = mda.topology.GSDParser.GSDParser expected_attrs = ['ids', 'names', 'resids', 'resnames', 'masses', - 'charges', 'radii', + 'charges', 'radii', 'types', 'bonds', 'angles', 'dihedrals', 'impropers'] expected_n_bonds = 0 expected_n_angles = 0 expected_n_dihedrals = 0 expected_n_impropers = 0 - + def test_attr_size(self, top): assert len(top.ids) == top.n_atoms assert len(top.names) == top.n_atoms assert len(top.resids) == top.n_residues assert len(top.resnames) == top.n_residues - + def test_atoms(self, top): assert top.n_atoms == self.expected_n_atoms @@ -72,7 +71,7 @@ def test_dihedrals(self, top): assert isinstance(top.angles.values[0], tuple) else: assert top.dihedrals.values == [] - + def test_impropers(self, top): assert len(top.impropers.values) == self.expected_n_impropers if self.expected_n_impropers: diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py deleted file mode 100644 index 1d946f22c8..0000000000 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ /dev/null @@ -1,205 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -import pytest -from numpy.testing import assert_equal -import numpy as np - -import MDAnalysis as mda -from MDAnalysis.topology import guessers -from MDAnalysis.core.topologyattrs import Angles - -from MDAnalysisTests import make_Universe -from MDAnalysisTests.core.test_fragments import make_starshape -import MDAnalysis.tests.datafiles as datafiles - -from MDAnalysisTests.util import import_not_available - - -try: - from rdkit import Chem - from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges -except ImportError: - pass - -requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), - reason="requires RDKit") - - -class TestGuessMasses(object): - def test_guess_masses(self): - out = guessers.guess_masses(['C', 'C', 'H']) - - assert isinstance(out, np.ndarray) - assert_equal(out, np.array([12.011, 12.011, 1.008])) - - def test_guess_masses_warn(self): - with pytest.warns(UserWarning): - guessers.guess_masses(['X']) - - def test_guess_masses_miss(self): - out = guessers.guess_masses(['X', 'Z']) - assert_equal(out, np.array([0.0, 0.0])) - - @pytest.mark.parametrize('element, value', (('H', 1.008), ('XYZ', 0.0), )) - def test_get_atom_mass(self, element, value): - assert guessers.get_atom_mass(element) == value - - def test_guess_atom_mass(self): - assert guessers.guess_atom_mass('1H') == 1.008 - - -class TestGuessTypes(object): - # guess_types - # guess_atom_type - # guess_atom_element - def test_guess_types(self): - out = guessers.guess_types(['MG2+', 'C12']) - - assert isinstance(out, np.ndarray) - assert_equal(out, np.array(['MG', 'C'], dtype=object)) - - def test_guess_atom_element(self): - assert guessers.guess_atom_element('MG2+') == 'MG' - - def test_guess_atom_element_empty(self): - assert guessers.guess_atom_element('') == '' - - def test_guess_atom_element_singledigit(self): - assert guessers.guess_atom_element('1') == '1' - - def test_guess_atom_element_1H(self): - assert guessers.guess_atom_element('1H') == 'H' - assert guessers.guess_atom_element('2H') == 'H' - - @pytest.mark.parametrize('name, element', ( - ('AO5*', 'O'), - ('F-', 'F'), - ('HB1', 'H'), - ('OC2', 'O'), - ('1he2', 'H'), - ('3hg2', 'H'), - ('OH-', 'O'), - ('HO', 'H'), - ('he', 'H'), - ('zn', 'ZN'), - ('Ca2+', 'CA'), - ('CA', 'C'), - ('N0A', 'N'), - ('C0U', 'C'), - ('C0S', 'C'), - ('Na+', 'NA'), - ('Cu2+', 'CU') - )) - def test_guess_element_from_name(self, name, element): - assert guessers.guess_atom_element(name) == element - - -def test_guess_charge(): - # this always returns 0.0 - assert guessers.guess_atom_charge('this') == 0.0 - - -def test_guess_bonds_Error(): - u = make_Universe(trajectory=True) - with pytest.raises(ValueError): - guessers.guess_bonds(u.atoms[:4], u.atoms.positions[:5]) - - -def test_guess_impropers(): - u = make_starshape() - - ag = u.atoms[:5] - - u.add_TopologyAttr(Angles(guessers.guess_angles(ag.bonds))) - - vals = guessers.guess_improper_dihedrals(ag.angles) - assert_equal(len(vals), 12) - - -def bond_sort(arr): - # sort from low to high, also within a tuple - # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) - out = [] - for (i, j) in arr: - if i > j: - i, j = j, i - out.append((i, j)) - return sorted(out) - -def test_guess_bonds_water(): - u = mda.Universe(datafiles.two_water_gro) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions)) - assert_equal(bonds, ((0, 1), - (0, 2), - (3, 4), - (3, 5))) - -def test_guess_bonds_adk(): - u = mda.Universe(datafiles.PSF, datafiles.DCD) - u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) - assert_equal(np.sort(u.bonds.indices, axis=0), - np.sort(bonds, axis=0)) - -def test_guess_bonds_peptide(): - u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) - u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) - assert_equal(np.sort(u.bonds.indices, axis=0), - np.sort(bonds, axis=0)) - - -@pytest.mark.parametrize("smi", [ - "c1ccccc1", - "C1=CC=CC=C1", - "CCO", - "c1ccccc1Cc1ccccc1", - "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", -]) -@requires_rdkit -def test_guess_aromaticities(smi): - mol = Chem.MolFromSmiles(smi) - mol = Chem.AddHs(mol) - expected = np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) - u = mda.Universe(mol) - values = guessers.guess_aromaticities(u.atoms) - assert_equal(values, expected) - - -@pytest.mark.parametrize("smi", [ - "c1ccccc1", - "C1=CC=CC=C1", - "CCO", - "c1ccccc1Cc1ccccc1", - "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", -]) -@requires_rdkit -def test_guess_gasteiger_charges(smi): - mol = Chem.MolFromSmiles(smi) - mol = Chem.AddHs(mol) - ComputeGasteigerCharges(mol, throwOnParamFailure=True) - expected = np.array([atom.GetDoubleProp("_GasteigerCharge") - for atom in mol.GetAtoms()], dtype=np.float32) - u = mda.Universe(mol) - values = guessers.guess_gasteiger_charges(u.atoms) - assert_equal(values, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_hoomdxml.py b/testsuite/MDAnalysisTests/topology/test_hoomdxml.py index 018470647f..d85a25c846 100644 --- a/testsuite/MDAnalysisTests/topology/test_hoomdxml.py +++ b/testsuite/MDAnalysisTests/topology/test_hoomdxml.py @@ -21,7 +21,6 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from numpy.testing import assert_almost_equal - import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -34,6 +33,7 @@ class TestHoomdXMLParser(ParserBase): expected_attrs = [ 'types', 'masses', 'charges', 'radii', 'bonds', 'angles', 'dihedrals', 'impropers' ] + expected_n_atoms = 769 expected_n_residues = 1 expected_n_segments = 1 diff --git a/testsuite/MDAnalysisTests/topology/test_itp.py b/testsuite/MDAnalysisTests/topology/test_itp.py index 035c05bb98..e5cea0e215 100644 --- a/testsuite/MDAnalysisTests/topology/test_itp.py +++ b/testsuite/MDAnalysisTests/topology/test_itp.py @@ -49,6 +49,9 @@ class BaseITP(ParserBase): 'resids', 'resnames', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] + + guessed_attrs = ['elements', ] + expected_n_atoms = 63 expected_n_residues = 10 expected_n_segments = 1 @@ -64,13 +67,13 @@ def universe(self, filename): def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds - + def test_angles_total_counts(self, top): assert len(top.angles.values) == self.expected_n_angles def test_dihedrals_total_counts(self, top): assert len(top.dihedrals.values) == self.expected_n_dihedrals - + def test_impropers_total_counts(self, top): assert len(top.impropers.values) == self.expected_n_impropers @@ -86,7 +89,7 @@ class TestITP(BaseITP): expected_n_angles = 91 expected_n_dihedrals = 30 expected_n_impropers = 29 - + def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 3 assert len(universe.atoms[[42]].bonds) == 1 @@ -95,7 +98,7 @@ def test_bonds_values(self, top): vals = top.bonds.values for b in ((0, 1), (0, 2), (0, 3), (3, 4)): assert b in vals - + def test_bonds_type(self, universe): assert universe.bonds[0].type == 2 @@ -107,7 +110,7 @@ def test_angles_values(self, top): vals = top.angles.values for b in ((1, 0, 2), (1, 0, 3), (2, 0, 3)): assert (b in vals) or (b[::-1] in vals) - + def test_angles_type(self, universe): assert universe.angles[0].type == 2 @@ -123,7 +126,7 @@ def test_dihedrals_values(self, top): vals = top.dihedrals.values for b in ((1, 0, 3, 5), (0, 3, 5, 7)): assert (b in vals) or (b[::-1] in vals) - + def test_dihedrals_type(self, universe): assert universe.dihedrals[0].type == (1, 1) @@ -134,7 +137,7 @@ def test_impropers_values(self, top): vals = top.impropers.values for b in ((3, 0, 5, 4), (5, 3, 7, 6)): assert (b in vals) or (b[::-1] in vals) - + def test_impropers_type(self, universe): assert universe.impropers[0].type == 2 @@ -142,12 +145,13 @@ def test_impropers_type(self, universe): class TestITPNoMass(ParserBase): parser = mda.topology.ITPParser.ITPParser ref_filename = ITP_nomass - expected_attrs = ['ids', 'names', 'types', 'masses', + expected_attrs = ['ids', 'names', 'types', 'charges', 'chargegroups', 'resids', 'resnames', 'segids', 'moltypes', 'molnums', - 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = ['masses'] + 'bonds', 'angles', 'dihedrals', 'impropers', 'masses', ] + guessed_attrs = ['elements', ] + expected_n_atoms = 60 expected_n_residues = 1 expected_n_segments = 1 @@ -157,18 +161,18 @@ def universe(self, filename): return mda.Universe(filename) def test_mass_guess(self, universe): - assert universe.atoms[0].mass not in ('', None) + assert not np.isnan(universe.atoms[0].mass) class TestITPAtomtypes(ParserBase): parser = mda.topology.ITPParser.ITPParser ref_filename = ITP_atomtypes - expected_attrs = ['ids', 'names', 'types', 'masses', + expected_attrs = ['ids', 'names', 'types', 'charges', 'chargegroups', - 'resids', 'resnames', + 'resids', 'resnames', 'masses', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = ['masses'] + expected_n_atoms = 4 expected_n_residues = 1 expected_n_segments = 1 @@ -202,7 +206,8 @@ class TestITPCharges(ParserBase): 'resids', 'resnames', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = [] + guessed_attrs = ['elements', ] + expected_n_atoms = 9 expected_n_residues = 3 expected_n_segments = 1 @@ -220,6 +225,7 @@ def test_charge_parse(self, universe): def test_masses_are_read(self, universe): assert_allclose(universe.atoms.masses, [100] * 9) + class TestDifferentDirectivesITP(BaseITP): ref_filename = ITP_edited @@ -245,6 +251,13 @@ def test_dihedrals_identity(self, universe): class TestITPNoKeywords(BaseITP): + expected_attrs = ['ids', 'names', 'types', + 'charges', 'chargegroups', + 'resids', 'resnames', + 'segids', 'moltypes', 'molnums', + 'bonds', 'angles', 'dihedrals', 'impropers', 'masses', ] + guessed_attrs = ['elements', 'masses', ] + """ Test reading ITP files *without* defined keywords. @@ -253,7 +266,7 @@ class TestITPNoKeywords(BaseITP): #ifndef HW1_CHARGE #define HW1_CHARGE 0.241 #endif - + [ atoms ] 1 opls_118 1 SOL OW 1 0 2 opls_119 1 SOL HW1 1 HW1_CHARGE @@ -263,8 +276,6 @@ class TestITPNoKeywords(BaseITP): expected_n_residues = 1 expected_n_segments = 1 - guessed_attrs = ['masses'] - expected_n_bonds = 2 # FLEXIBLE not set -> SETTLE constraint -> water has no angle expected_n_angles = 0 @@ -284,7 +295,12 @@ def test_defines(self, top): assert_allclose(top.charges.values[1], 0.241) assert_allclose(top.charges.values[2], 0.241) - + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses, + [15.999, 15.999, 15.999, 15.999, 15.999]) + + class TestITPKeywords(TestITPNoKeywords): """ Test reading ITP files *with* defined keywords. @@ -296,13 +312,13 @@ class TestITPKeywords(TestITPNoKeywords): @pytest.fixture def universe(self, filename): - return mda.Universe(filename, FLEXIBLE=True, EXTRA_ATOMS=True, + return mda.Universe(filename, FLEXIBLE=True, EXTRA_ATOMS=True, HW1_CHARGE=1, HW2_CHARGE=3) @pytest.fixture() def top(self, filename): with self.parser(filename) as p: - yield p.parse(FLEXIBLE=True, EXTRA_ATOMS=True, + yield p.parse(FLEXIBLE=True, EXTRA_ATOMS=True, HW1_CHARGE=1, HW2_CHARGE=3) def test_whether_settles_types(self, universe): @@ -341,7 +357,7 @@ def universe(self, filename): def top(self, filename): with self.parser(filename) as p: yield p.parse(HEAVY_H=True, EXTRA_ATOMS=True, HEAVY_SIX=True) - + def test_heavy_atom(self, universe): assert universe.atoms[5].mass > 40 @@ -380,6 +396,13 @@ def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format='ITP', include_dir=GMX_DIR) + def test_guessed_attributes(self, filename): + """check that the universe created with certain parser have the same + guessed attributes as when it was guessed inside the parser""" + u = mda.Universe(filename, topology_format='ITP', include_dir=GMX_DIR) + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + def test_sequential(self, universe): resids = np.array(list(range(2, 12)) + list(range(13, 23))) assert_equal(universe.residues.resids[:20], resids) @@ -453,3 +476,17 @@ def test_relative_path(self, tmpdir): with subsubdir.as_cwd(): u = mda.Universe("../test.itp") assert len(u.atoms) == 1 + + +def test_missing_elements_no_attribute(): + """Check that: + + 1) a warning is raised if elements are missing + 2) the elements attribute is not set + """ + wmsg = ("Element information is missing, elements attribute " + "will not be populated. If needed these can be ") + with pytest.warns(UserWarning, match=wmsg): + u = mda.Universe(ITP_atomtypes) + with pytest.raises(AttributeError): + _ = u.atoms.elements diff --git a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py index 44a8af7236..2f65eda3cb 100644 --- a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py +++ b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py @@ -21,7 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import numpy as np from io import StringIO @@ -92,6 +92,11 @@ def test_improper_member(self, top): def test_creates_universe(self, filename): u = mda.Universe(filename, format='DATA') + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='DATA') + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class TestLammpsData(LammpsBase): """Tests the reading of lammps .data topology files. @@ -263,8 +268,7 @@ def test_interpret_atom_style_missing(): class TestDumpParser(ParserBase): - expected_attrs = ['types'] - guessed_attrs = ['masses'] + expected_attrs = ['types', 'masses'] expected_n_atoms = 24 expected_n_residues = 1 expected_n_segments = 1 @@ -284,13 +288,28 @@ def test_masses_warning(self): with self.parser(self.ref_filename) as p: with pytest.warns(UserWarning, match='Guessed all Masses to 1.0'): p.parse() - + + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + def test_id_ordering(self): # ids are nonsequential in file, but should get rearranged u = mda.Universe(self.ref_filename, format='LAMMPSDUMP') # the 4th in file has id==13, but should have been sorted assert u.atoms[3].id == 4 + def test_guessed_masses(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + expected = [1., 1., 1., 1., 1., 1., 1.] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + expected = ['2', '1', '1', '2', '1', '1', '2'] + assert (u.atoms.types[:7] == expected).all() + # this tests that topology can still be constructed if non-standard or uneven # column present. class TestDumpParserLong(TestDumpParser): diff --git a/testsuite/MDAnalysisTests/topology/test_minimal.py b/testsuite/MDAnalysisTests/topology/test_minimal.py index 6c275f0217..60f009b44b 100644 --- a/testsuite/MDAnalysisTests/topology/test_minimal.py +++ b/testsuite/MDAnalysisTests/topology/test_minimal.py @@ -60,7 +60,7 @@ def test_minimal_parser(filename, expected_n_atoms): @working_readers def test_universe_with_minimal(filename, expected_n_atoms): - u = mda.Universe(filename) + u = mda.Universe(filename, to_guess=()) assert len(u.atoms) == expected_n_atoms @@ -81,7 +81,7 @@ def test_minimal_parser_fail(filename,n_atoms): @nonworking_readers def test_minimal_n_atoms_kwarg(filename, n_atoms): # test that these can load when we supply the number of atoms - u = mda.Universe(filename, n_atoms=n_atoms) + u = mda.Universe(filename, n_atoms=n_atoms, to_guess=()) assert len(u.atoms) == n_atoms @@ -107,6 +107,6 @@ def test_memory_minimal_parser(array, order): @memory_reader def test_memory_universe(array, order): - u = mda.Universe(array, order=order) + u = mda.Universe(array, order=order, to_guess=()) assert len(u.atoms) == 10 diff --git a/testsuite/MDAnalysisTests/topology/test_mmtf.py b/testsuite/MDAnalysisTests/topology/test_mmtf.py index b05bd1767a..9e2a85f778 100644 --- a/testsuite/MDAnalysisTests/topology/test_mmtf.py +++ b/testsuite/MDAnalysisTests/topology/test_mmtf.py @@ -1,5 +1,5 @@ import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import mmtf from unittest import mock @@ -9,6 +9,7 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import MMTF, MMTF_gz, MMTF_skinny, MMTF_skinny2 + class MMTFBase(ParserBase): expected_attrs = [ 'ids', 'names', 'types', 'altLocs', 'tempfactors', 'occupancies', @@ -44,7 +45,7 @@ class TestMMTFSkinny(MMTFBase): expected_n_residues = 134 expected_n_segments = 2 - + class TestMMTFSkinny2(MMTFBase): parser = mda.topology.MMTFParser.MMTFParser ref_filename = MMTF_skinny2 @@ -96,6 +97,10 @@ def test_icodes(self, u): def test_altlocs(self, u): assert all(u.atoms.altLocs[:3] == '') + def test_guessed_masses(self, u): + expected = [15.999, 12.011, 12.011, 15.999, 12.011, 15.999, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + class TestMMTFUniverseFromDecoder(TestMMTFUniverse): @pytest.fixture() @@ -119,6 +124,10 @@ def test_universe_models(self, u): assert isinstance(m, AtomGroup) assert len(m) == 570 + def test_guessed_masses(self, u): + expected = [15.999, 12.011, 12.011, 15.999, 12.011, 15.999, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + class TestMMTFgzUniverseFromDecoder(TestMMTFgzUniverse): @pytest.fixture() @@ -128,7 +137,7 @@ def u(self): class TestSelectModels(object): - # tests for 'model' keyword in select_atoms + # tests for 'model' keyword in select_atoms @pytest.fixture() def u(self): return mda.Universe(MMTF_gz) diff --git a/testsuite/MDAnalysisTests/topology/test_mol2.py b/testsuite/MDAnalysisTests/topology/test_mol2.py index 7378cdb01f..b6084b861e 100644 --- a/testsuite/MDAnalysisTests/topology/test_mol2.py +++ b/testsuite/MDAnalysisTests/topology/test_mol2.py @@ -23,11 +23,12 @@ from io import StringIO import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import pytest import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase + from MDAnalysisTests.datafiles import ( mol2_molecule, mol2_molecules, @@ -178,6 +179,7 @@ class TestMOL2Base(ParserBase): 'ids', 'names', 'types', 'charges', 'resids', 'resnames', 'bonds', 'elements', ] + guessed_attrs = ['masses'] expected_n_atoms = 49 expected_n_residues = 1 @@ -235,11 +237,16 @@ def test_wrong_elements_warnings(): with pytest.warns(UserWarning, match='Unknown elements found') as record: u = mda.Universe(StringIO(mol2_wrong_element), format='MOL2') - # One warning from invalid elements, one from invalid masses - assert len(record) == 2 + # One warning from invalid elements, one from masses PendingDeprecationWarning + assert len(record) == 3 - expected = np.array(['N', '', ''], dtype=object) - assert_equal(u.atoms.elements, expected) + expected_elements = np.array(['N', '', ''], dtype=object) + guseed_masses = np.array([14.007, 0.0, 0.0], dtype=float) + gussed_types = np.array(['N.am', 'X.o2', 'XX.am']) + + assert_equal(u.atoms.elements, expected_elements) + assert_equal(u.atoms.types, gussed_types) + assert_allclose(u.atoms.masses, guseed_masses) def test_all_wrong_elements_warnings(): @@ -301,3 +308,9 @@ def test_unformat(): with pytest.raises(ValueError, match='Some atoms in the mol2 file'): u = mda.Universe(StringIO(mol2_resname_unformat), format='MOL2') + + +def test_guessed_masses(): + u = mda.Universe(mol2_molecules) + assert_allclose(u.atoms.masses[:7], [14.007, 32.06, + 14.007, 14.007, 15.999, 15.999, 12.011]) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index 01fbcd8a5a..c176d50be1 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -24,7 +24,7 @@ import pytest import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -43,6 +43,7 @@ ) from MDAnalysis.topology.PDBParser import PDBParser from MDAnalysis import NoDataError +from MDAnalysis.guesser import tables _PDBPARSER = mda.topology.PDBParser.PDBParser @@ -252,8 +253,6 @@ def test_PDB_hex(): @pytest.mark.filterwarnings("error:Failed to guess the mass") def test_PDB_metals(): - from MDAnalysis.topology import tables - u = mda.Universe(StringIO(PDB_metals), format='PDB') assert len(u.atoms) == 4 @@ -310,11 +309,32 @@ def test_wrong_elements_warnings(): column which have been parsed and returns an appropriate warning. """ with pytest.warns(UserWarning, match='Unknown element XX found'): - u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB') + u = mda.Universe(StringIO(PDB_wrong_ele,), format='PDB') + + expected_elements = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], + dtype=object) + gussed_types = np.array(['N', '', 'C', 'O', 'XX', 'CU', 'Fe', 'MG']) + guseed_masses = np.array([14.007, 0.0, 12.011, 15.999, 0.0, + 63.546, 55.847, 24.305], dtype=float) + + assert_equal(u.atoms.elements, expected_elements) + assert_equal(u.atoms.types, gussed_types) + assert_allclose(u.atoms.masses, guseed_masses) - expected = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], - dtype=object) - assert_equal(u.atoms.elements, expected) + +def test_guessed_masses_and_types_values(): + """Test that guessed masses and types have the expected values for universe + constructed from PDB file. + """ + u = mda.Universe(PDB, format='PDB') + gussed_types = np.array(['N', 'H', 'H', 'H', 'C', 'H', 'C', 'H', 'H', 'C']) + guseed_masses = [14.007, 1.008, 1.008, 1.008, + 12.011, 1.008, 12.011, 1.008, 1.008, 12.011] + failed_type_guesses = u.atoms.types == "" + + assert_allclose(u.atoms.masses[:10], guseed_masses) + assert_equal(u.atoms.types[:10], gussed_types) + assert not failed_type_guesses.any() def test_nobonds_error(): diff --git a/testsuite/MDAnalysisTests/topology/test_pdbqt.py b/testsuite/MDAnalysisTests/topology/test_pdbqt.py index c81f60cdc8..b2511a889e 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdbqt.py +++ b/testsuite/MDAnalysisTests/topology/test_pdbqt.py @@ -23,11 +23,14 @@ import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase + from MDAnalysisTests.datafiles import ( PDBQT_input, # pdbqt_inputpdbqt.pdbqt PDBQT_tyrosol, # tyrosol.pdbqt.bz2 ) +from numpy.testing import assert_allclose + class TestPDBQT(ParserBase): parser = mda.topology.PDBQTParser.PDBQTParser @@ -47,11 +50,17 @@ class TestPDBQT(ParserBase): "occupancies", "tempfactors", ] + guessed_attrs = ['masses'] expected_n_atoms = 1805 expected_n_residues = 199 # resids go 2-102 then 2-99 expected_n_segments = 2 # res2-102 are A, 2-99 are B + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses[:7], [14.007, 0., + 0., 12.011, 12.011, 0., 12.011]) + def test_footnote(): """just test that the Universe is built even in the presence of a diff --git a/testsuite/MDAnalysisTests/topology/test_pqr.py b/testsuite/MDAnalysisTests/topology/test_pqr.py index 569b964e3b..aa03d789ac 100644 --- a/testsuite/MDAnalysisTests/topology/test_pqr.py +++ b/testsuite/MDAnalysisTests/topology/test_pqr.py @@ -21,8 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from io import StringIO -from numpy.testing import assert_equal, assert_almost_equal -import pytest +from numpy.testing import assert_equal, assert_almost_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -52,12 +51,26 @@ def test_attr_size(self, top): assert len(top.resnames) == top.n_residues assert len(top.segids) == top.n_segments + expected_masses = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + expected_types = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses[:7], self.expected_masses) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + assert (u.atoms.types[:7] == self.expected_types).all() + class TestPQRParser2(TestPQRParser): ref_filename = PQR_icodes expected_n_atoms = 5313 expected_n_residues = 474 + expected_masses = [14.007, 12.011, 12.011, 15.999, 12.011, 12.011, 12.011] + expected_types = ['N', 'C', 'C', 'O', 'C', 'C', 'C'] + def test_record_types(): u = mda.Universe(PQR_icodes) @@ -81,6 +94,7 @@ def test_record_types(): ENDMDL ''' + def test_gromacs_flavour(): u = mda.Universe(StringIO(GROMACS_PQR), format='PQR') @@ -88,7 +102,6 @@ def test_gromacs_flavour(): # topology things assert u.atoms[0].type == 'O' assert u.atoms[0].segid == 'SYSTEM' - assert not u._topology.types.is_guessed assert_almost_equal(u.atoms[0].radius, 1.48, decimal=5) assert_almost_equal(u.atoms[0].charge, -0.67, decimal=5) # coordinatey things diff --git a/testsuite/MDAnalysisTests/topology/test_psf.py b/testsuite/MDAnalysisTests/topology/test_psf.py index 72a4622c1b..895f418514 100644 --- a/testsuite/MDAnalysisTests/topology/test_psf.py +++ b/testsuite/MDAnalysisTests/topology/test_psf.py @@ -148,7 +148,7 @@ def test_angles_total_counts(self, top): def test_dihedrals_total_counts(self, top): assert len(top.dihedrals.values) == 0 - + def test_impropers_total_counts(self, top): assert len(top.impropers.values) == 0 diff --git a/testsuite/MDAnalysisTests/topology/test_tprparser.py b/testsuite/MDAnalysisTests/topology/test_tprparser.py index 023a52dee4..bd1444a566 100644 --- a/testsuite/MDAnalysisTests/topology/test_tprparser.py +++ b/testsuite/MDAnalysisTests/topology/test_tprparser.py @@ -73,6 +73,8 @@ class TPRAttrs(ParserBase): parser = MDAnalysis.topology.TPRParser.TPRParser expected_attrs = [ "ids", + "types", + "masses", "names", "elements", "resids", diff --git a/testsuite/MDAnalysisTests/topology/test_txyz.py b/testsuite/MDAnalysisTests/topology/test_txyz.py index 17ca447e9b..72ab11d652 100644 --- a/testsuite/MDAnalysisTests/topology/test_txyz.py +++ b/testsuite/MDAnalysisTests/topology/test_txyz.py @@ -26,12 +26,14 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import TXYZ, ARC, ARC_PBC -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose + class TestTXYZParser(ParserBase): parser = mda.topology.TXYZParser.TXYZParser guessed_attrs = ['masses'] expected_attrs = ['ids', 'names', 'bonds', 'types', 'elements'] + expected_n_residues = 1 expected_n_atoms = 9 expected_n_segments = 1 @@ -60,18 +62,24 @@ def test_TXYZ_elements(): u = mda.Universe(TXYZ, format='TXYZ') element_list = np.array(['C', 'H', 'H', 'O', 'H', 'C', 'H', 'H', 'H'], dtype=object) assert_equal(u.atoms.elements, element_list) - - + + def test_missing_elements_noattribute(): """Check that: 1) a warning is raised if elements are missing 2) the elements attribute is not set """ - wmsg = ("Element information is missing, elements attribute will not be " - "populated") + wmsg = ("Element information is missing, elements attribute " + "will not be populated. If needed these can be ") with pytest.warns(UserWarning, match=wmsg): u = mda.Universe(ARC_PBC) with pytest.raises(AttributeError): _ = u.atoms.elements + +def test_guessed_masses(): + u = mda.Universe(TXYZ) + expected = [12.011, 1.008, 1.008, 15.999, 1.008, 12.011, + 1.008, 1.008, 1.008] + assert_allclose(u.atoms.masses, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_xpdb.py b/testsuite/MDAnalysisTests/topology/test_xpdb.py index 9bce73cd44..617d5caf7b 100644 --- a/testsuite/MDAnalysisTests/topology/test_xpdb.py +++ b/testsuite/MDAnalysisTests/topology/test_xpdb.py @@ -27,6 +27,8 @@ XPDB_small, ) +from numpy.testing import assert_equal, assert_allclose + class TestXPDBParser(ParserBase): parser = mda.topology.ExtendedPDBParser.ExtendedPDBParser @@ -38,3 +40,13 @@ class TestXPDBParser(ParserBase): expected_n_atoms = 5 expected_n_residues = 5 expected_n_segments = 1 + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [15.999, 15.999, 15.999, 15.999, 15.999] + assert_allclose(u.atoms.masses, expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['O', 'O', 'O', 'O', 'O'] + assert_equal(u.atoms.types, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_xyz.py b/testsuite/MDAnalysisTests/topology/test_xyz.py index e45591a238..8ce6ce45c7 100644 --- a/testsuite/MDAnalysisTests/topology/test_xyz.py +++ b/testsuite/MDAnalysisTests/topology/test_xyz.py @@ -30,13 +30,16 @@ XYZ_mini, ) +from numpy.testing import assert_equal, assert_allclose + class XYZBase(ParserBase): parser = mda.topology.XYZParser.XYZParser expected_n_residues = 1 expected_n_segments = 1 - expected_attrs = ['names', "elements"] - guessed_attrs = ['types', 'masses'] + expected_attrs = ['names', 'elements'] + guessed_attrs = ['masses', 'types'] + class TestXYZMini(XYZBase): ref_filename = XYZ_mini @@ -49,3 +52,13 @@ class TestXYZParser(XYZBase): @pytest.fixture(params=[XYZ, XYZ_bz2]) def filename(self, request): return request.param + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [1.008, 1.008, 1.008, 1.008, 1.008, 1.008, 1.008] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['H', 'H', 'H', 'H', 'H', 'H', 'H'] + assert_equal(u.atoms.types[:7], expected) diff --git a/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py b/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py index 3eec056a5b..ba2c348bd0 100644 --- a/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py +++ b/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py @@ -13,7 +13,7 @@ def posaveraging_universes(): ''' Create the universe objects for the tests. ''' - u = md.Universe(datafiles.XTC_multi_frame) + u = md.Universe(datafiles.XTC_multi_frame, to_guess=()) transformation = PositionAverager(3) u.trajectory.add_transformations(transformation) return u @@ -24,7 +24,7 @@ def posaveraging_universes_noreset(): Create the universe objects for the tests. Position averaging reset is set to False. ''' - u = md.Universe(datafiles.XTC_multi_frame) + u = md.Universe(datafiles.XTC_multi_frame, to_guess=()) transformation = PositionAverager(3, check_reset=False) u.trajectory.add_transformations(transformation) return u @@ -106,6 +106,6 @@ def test_posavging_specific_noreset(posaveraging_universes_noreset): specr_avgd[...,idx] = ts.positions.copy() idx += 1 assert_array_almost_equal(ref_matrix_specr, specr_avgd[1,:,-1], decimal=5) - + From 101008bc98505d9fc63c73b3dd9a60b45c82c3b5 Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Sun, 20 Oct 2024 12:04:34 +1100 Subject: [PATCH 15/15] Update guesser docs (#4743) * update docs * i've forgotten how sphinx works * replace ref with class * update changelog * fix whitespace * Update default_guesser.py Co-authored-by: Irfan Alibay --------- Co-authored-by: Irfan Alibay --- package/CHANGELOG | 2 + package/MDAnalysis/guesser/default_guesser.py | 139 ++++++++++++++---- package/MDAnalysis/topology/CRDParser.py | 13 ++ package/MDAnalysis/topology/DLPolyParser.py | 6 + package/MDAnalysis/topology/DMSParser.py | 8 +- .../MDAnalysis/topology/ExtendedPDBParser.py | 6 + package/MDAnalysis/topology/FHIAIMSParser.py | 7 + package/MDAnalysis/topology/GMSParser.py | 7 + package/MDAnalysis/topology/GROParser.py | 6 + package/MDAnalysis/topology/ITPParser.py | 7 + package/MDAnalysis/topology/LAMMPSParser.py | 6 + package/MDAnalysis/topology/MMTFParser.py | 6 + package/MDAnalysis/topology/MOL2Parser.py | 7 + package/MDAnalysis/topology/PDBParser.py | 7 + package/MDAnalysis/topology/PDBQTParser.py | 6 + package/MDAnalysis/topology/PQRParser.py | 8 + package/MDAnalysis/topology/TXYZParser.py | 6 + package/MDAnalysis/topology/XYZParser.py | 6 + .../documentation_pages/guesser_modules.rst | 34 +++-- 19 files changed, 252 insertions(+), 35 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 64fcb63fe0..85a7208627 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,6 +23,8 @@ The rules for this file: * 2.8.0 Fixes + * Adds guessed attributes documentation back to each parser page + and updates overall guesser docs (Issue #4696) * Fix Bohrium (Bh) atomic mass in tables.py (PR #3753) * set `n_parts` to the total number of frames being analyzed if `n_parts` is bigger. (Issue #4685) diff --git a/package/MDAnalysis/guesser/default_guesser.py b/package/MDAnalysis/guesser/default_guesser.py index a64b023309..f49e75b24c 100644 --- a/package/MDAnalysis/guesser/default_guesser.py +++ b/package/MDAnalysis/guesser/default_guesser.py @@ -27,9 +27,70 @@ DefaultGuesser is a generic guesser class that has basic guessing methods. This class is a general purpose guesser that can be used with most topologies, -but being generic makes it the less accurate among all guessers. +but being generic makes it the least accurate among all guessers. +Guessing behavior +----------------- + +This section describes how each attribute is guessed by the DefaultGuesser. + +Masses +~~~~~~ + +We first attempt to look up the mass of an atom based on its element if the +element TopologyAttr is available. If not, we attempt to lookup the mass based +on the atom type (``type``) TopologyAttr. If neither of these is available, we +attempt to guess the atom type based on the atom name (``name``) and then +lookup the mass based on the guessed atom type. + + +Types +~~~~~ + +We attempt to guess the atom type based on the atom name (``name``). +The name is first stripped of any numbers and symbols, and then looked up in +the :data:`MDAnalysis.guesser.tables.atomelements` table. If the name is not +found, we continue checking variations of the name following the logic in +:meth:`DefaultGuesser.guess_atom_element`. Ultimately, if no match is found, +the first character of the stripped name is returned. + +Elements +~~~~~~~~ + +This follows the same method as guessing atom types. + + +Bonds +~~~~~ + +Bonds are guessed based on the distance between atoms. +See :meth:`DefaultGuesser.guess_bonds` for more details. + +Angles +~~~~~~ + +Angles are guessed based on the bonds between atoms. +See :meth:`DefaultGuesser.guess_angles` for more details. + +Dihedrals +~~~~~~~~~ + +Dihedrals are guessed based on the angles between atoms. +See :meth:`DefaultGuesser.guess_dihedrals` for more details. + +Improper Dihedrals +~~~~~~~~~~~~~~~~~~ + +Improper dihedrals are guessed based on the angles between atoms. +See :meth:`DefaultGuesser.guess_improper_dihedrals` for more details. + +Aromaticities +~~~~~~~~~~~~~ + +Aromaticity is guessed using RDKit's GetIsAromatic method. +See :meth:`DefaultGuesser.guess_aromaticities` for more details. + @@ -70,6 +131,23 @@ class DefaultGuesser(GuesserBase): You can use this guesser either directly through an instance, or through the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method. + Parameters + ---------- + universe : Universe + The Universe to apply the guesser on + box : np.ndarray, optional + The box of the Universe. This is used for bond guessing. + vdwradii : dict, optional + Dict relating atom types: vdw radii. This is used for bond guessing + fudge_factor : float, optional + The factor by which atoms must overlap each other to be considered + a bond. Larger values will increase the number of bonds found. [0.55] + lower_bound : float, optional + The minimum bond length. All bonds found shorter than this length + will be ignored. This is useful for parsing PDB with altloc records + where atoms with altloc A and B may be very close together and + there should be no chemical bond between them. [0.1] + Examples -------- to guess bonds for a universe:: @@ -84,8 +162,23 @@ class DefaultGuesser(GuesserBase): """ context = 'default' - def __init__(self, universe, **kwargs): - super().__init__(universe, **kwargs) + def __init__( + self, + universe, + box=None, + vdwradii=None, + fudge_factor=0.55, + lower_bound=0.1, + **kwargs + ): + super().__init__( + universe, + box=box, + vdwradii=vdwradii, + fudge_factor=fudge_factor, + lower_bound=lower_bound, + **kwargs + ) self._guesser_methods = { 'masses': self.guess_masses, 'types': self.guess_types, @@ -212,8 +305,19 @@ def guess_types(self, atom_types=None, indices_to_guess=None): def guess_atom_element(self, atomname): """Guess the element of the atom from the name. - Looks in dict to see if element is found, otherwise it uses the first - character in the atomname. The table comes from CHARMM and AMBER atom + First all numbers and symbols are stripped from the name. + Then the name is looked up in the + :data:`MDAnalysis.guesser.tables.atomelements` table. + If the name is not found, we remove the last character or + first character from the name and check the table for both, + with a preference for removing the last character. If the name is + still not found, we iteratively continue to remove the last character + or first character until we find a match. If ultimately no match + is found, the first character of the stripped name is returned. + + If the input name is an empty string, an empty string is returned. + + The table comes from CHARMM and AMBER atom types, where the first character is not sufficient to determine the atom type. Some GROMOS ions have also been added. @@ -270,26 +374,11 @@ def guess_bonds(self, atoms=None, coords=None): Parameters ---------- - atoms : AtomGroup - atoms for which bonds should be guessed - fudge_factor : float, optional - The factor by which atoms must overlap eachother to be considered a - bond. Larger values will increase the number of bonds found. [0.55] - vdwradii : dict, optional - To supply custom vdwradii for atoms in the algorithm. Must be a - dict of format {type:radii}. The default table of van der Waals - radii is hard-coded as :data:`MDAnalysis.guesser.tables.vdwradii`. - Any user defined vdwradii passed as an argument will supercede the - table values. [``None``] - lower_bound : float, optional - The minimum bond length. All bonds found shorter than this length - will be ignored. This is useful for parsing PDB with altloc records - where atoms with altloc A and B maybe very close together and - there should be no chemical bond between them. [0.1] - box : array_like, optional - Bonds are found using a distance search, if unit cell information - is given, periodic boundary conditions will be considered in the - distance search. [``None``] + atoms: AtomGroup + atoms for which bonds should be guessed + coords: np.ndarray, optional + coordinates of the atoms. If not provided, the coordinates + of the ``atoms`` in the universe are used. Returns ------- diff --git a/package/MDAnalysis/topology/CRDParser.py b/package/MDAnalysis/topology/CRDParser.py index 5e4406732f..9a1fc72ec0 100644 --- a/package/MDAnalysis/topology/CRDParser.py +++ b/package/MDAnalysis/topology/CRDParser.py @@ -33,6 +33,12 @@ Residues are detected through a change is either resid or resname while segments are detected according to changes in segid. +.. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _CRD: https://www.charmmtutorial.org/index.php/CHARMM:The_Basics @@ -72,6 +78,13 @@ class CRDParser(TopologyReaderBase): - Resnums - Segids + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Type and mass are not longer guessed here. Until 3.0 these will still be set by default through through universe.guess_TopologyAttrs() API. diff --git a/package/MDAnalysis/topology/DLPolyParser.py b/package/MDAnalysis/topology/DLPolyParser.py index b85a0d188c..4148a38c06 100644 --- a/package/MDAnalysis/topology/DLPolyParser.py +++ b/package/MDAnalysis/topology/DLPolyParser.py @@ -30,6 +30,12 @@ - Atomnames - Atomids +.. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _Poly: http://www.stfc.ac.uk/SCD/research/app/ccg/software/DL_POLY/44516.aspx Classes diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py index f165272fc2..f37a854c72 100644 --- a/package/MDAnalysis/topology/DMSParser.py +++ b/package/MDAnalysis/topology/DMSParser.py @@ -86,11 +86,17 @@ class DMSParser(TopologyReaderBase): Segment: - Segids + .. note:: + + By default, atomtypes will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _DESRES: http://www.deshawresearch.com .. _Desmond: http://www.deshawresearch.com/resources_desmond.html .. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf .. versionchanged:: 2.8.0 - Removed type and mass guessing (attributes guessing takes place now + Removed type guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). """ diff --git a/package/MDAnalysis/topology/ExtendedPDBParser.py b/package/MDAnalysis/topology/ExtendedPDBParser.py index ec6e1e527d..b41463403e 100644 --- a/package/MDAnalysis/topology/ExtendedPDBParser.py +++ b/package/MDAnalysis/topology/ExtendedPDBParser.py @@ -78,6 +78,12 @@ class ExtendedPDBParser(PDBParser.PDBParser): - bonds - formalcharges + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + See Also -------- diff --git a/package/MDAnalysis/topology/FHIAIMSParser.py b/package/MDAnalysis/topology/FHIAIMSParser.py index fcf95691f3..8738d5e3ce 100644 --- a/package/MDAnalysis/topology/FHIAIMSParser.py +++ b/package/MDAnalysis/topology/FHIAIMSParser.py @@ -66,6 +66,13 @@ class FHIAIMSParser(TopologyReaderBase): Creates the following attributes: - Atomnames + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). diff --git a/package/MDAnalysis/topology/GMSParser.py b/package/MDAnalysis/topology/GMSParser.py index 812207ed67..2223cc4275 100644 --- a/package/MDAnalysis/topology/GMSParser.py +++ b/package/MDAnalysis/topology/GMSParser.py @@ -73,6 +73,13 @@ class GMSParser(TopologyReaderBase): - names - atomic charges + + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.9.1 .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now diff --git a/package/MDAnalysis/topology/GROParser.py b/package/MDAnalysis/topology/GROParser.py index 6bcaec24cb..ebb51e7cd0 100644 --- a/package/MDAnalysis/topology/GROParser.py +++ b/package/MDAnalysis/topology/GROParser.py @@ -67,6 +67,12 @@ class GROParser(TopologyReaderBase): - atomids - atomnames + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.8.0 Removed type and mass guessing (attributes guessing takes place now through universe.guess_TopologyAttrs() API). diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index d855216027..9c9dd37976 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -473,6 +473,13 @@ class ITPParser(TopologyReaderBase): .. _ITP: http://manual.gromacs.org/current/reference-manual/topologies/topology-file-formats.html#molecule-itp-file .. _TOP: http://manual.gromacs.org/current/reference-manual/file-formats.html#top + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation + if they are not read from the input file. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 2.2.0 no longer adds angles for water molecules with SETTLE constraint .. versionchanged:: 2.8.0 diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 62664b568b..52a58f7729 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -27,6 +27,12 @@ Parses data_ or dump_ files produced by LAMMPS_. +.. note:: + + By default, masses will be guessed on Universe creation if they are not + read from the input file. This may change in release 3.0. + See :ref:`Guessers` for more information. + .. _LAMMPS: http://lammps.sandia.gov/ .. _data: DATA file format: :http://lammps.sandia.gov/doc/2001/data_format.html .. _dump: http://lammps.sandia.gov/doc/dump.html diff --git a/package/MDAnalysis/topology/MMTFParser.py b/package/MDAnalysis/topology/MMTFParser.py index e9332e9d68..5a58f1b245 100644 --- a/package/MDAnalysis/topology/MMTFParser.py +++ b/package/MDAnalysis/topology/MMTFParser.py @@ -64,6 +64,12 @@ - segid - model + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + Classes ------- diff --git a/package/MDAnalysis/topology/MOL2Parser.py b/package/MDAnalysis/topology/MOL2Parser.py index 4345ca0efe..f554985875 100644 --- a/package/MDAnalysis/topology/MOL2Parser.py +++ b/package/MDAnalysis/topology/MOL2Parser.py @@ -78,6 +78,13 @@ class MOL2Parser(TopologyReaderBase): - Elements + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + + Notes ----- Elements are obtained directly from the SYBYL atom types. If some atoms have diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index bad6d2bc6d..8349be9133 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -35,6 +35,13 @@ :mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue numbers up to 99,999. +.. note:: + + Atomtypes will be created from elements if they are present and valid. + Otherwise, they will be guessed on Universe creation. + By default, masses will also be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. .. Note:: diff --git a/package/MDAnalysis/topology/PDBQTParser.py b/package/MDAnalysis/topology/PDBQTParser.py index 9764082021..435ec5678c 100644 --- a/package/MDAnalysis/topology/PDBQTParser.py +++ b/package/MDAnalysis/topology/PDBQTParser.py @@ -32,6 +32,12 @@ * Reads a PDBQT file line by line and does not require sequential atom numbering. * Multi-model PDBQT files are not supported. +.. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + Notes ----- Only reads atoms and their names; connectivity is not diff --git a/package/MDAnalysis/topology/PQRParser.py b/package/MDAnalysis/topology/PQRParser.py index 1adcd7fba2..9ef6d3e6f9 100644 --- a/package/MDAnalysis/topology/PQRParser.py +++ b/package/MDAnalysis/topology/PQRParser.py @@ -80,6 +80,14 @@ class PQRParser(TopologyReaderBase): - Resnames - Segids + .. note:: + + Atomtypes will be read from the input file if they are present + (e.g. GROMACS PQR files). Otherwise, they will be guessed on Universe + creation. By default, masses will also be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionchanged:: 0.9.0 Read chainID from a PQR file and use it as segid (before we always used diff --git a/package/MDAnalysis/topology/TXYZParser.py b/package/MDAnalysis/topology/TXYZParser.py index 0781488c9d..206f381e9e 100644 --- a/package/MDAnalysis/topology/TXYZParser.py +++ b/package/MDAnalysis/topology/TXYZParser.py @@ -73,6 +73,12 @@ class TXYZParser(TopologyReaderBase): - Atomtypes - Elements (if all atom names are element symbols) + .. note:: + + By default, masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.17.0 .. versionchanged:: 2.4.0 Adding the `Element` attribute if all names are valid element symbols. diff --git a/package/MDAnalysis/topology/XYZParser.py b/package/MDAnalysis/topology/XYZParser.py index cb0df129e0..5fe736fec6 100644 --- a/package/MDAnalysis/topology/XYZParser.py +++ b/package/MDAnalysis/topology/XYZParser.py @@ -59,6 +59,12 @@ class XYZParser(TopologyReaderBase): Creates the following attributes: - Atomnames + .. note:: + + By default, atomtypes and masses will be guessed on Universe creation. + This may change in release 3.0. + See :ref:`Guessers` for more information. + .. versionadded:: 0.9.1 diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst index 7747fdc380..96cb324270 100644 --- a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst @@ -1,19 +1,30 @@ .. Contains the formatted docstrings from the guesser modules located in 'mdanalysis/package/MDAnalysis/guesser' +.. _Guessers: + ************************** Guesser modules ************************** -This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.Universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose +This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose is to be tailored guesser classes that target specific file format or force field (eg. PDB file format, or Martini forcefield). -Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that doesn't fit all topologies. +Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that don't fit all scenarios. Example uses of guessers ------------------------ +Default behavior +~~~~~~~~~~~~~~~~ + +By default, MDAnalysis will guess the "mass" and "type" (atom type) attributes for all particles in the Universe +using the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` at the time of Universe creation, +if they are not read from the input file. +Please see the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` for more information. + + + Guessing using :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` as following:: import MDAnalysis as mda @@ -24,12 +35,12 @@ Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Uni u.guess_TopologyAttrs(to_guess=['elements']) print(u.atoms.elements) # print ['N' 'H' 'H' ... 'NA' 'NA' 'NA'] -In the above example, we passed ``elements`` as the attribute we want to guess, and +In the above example, we passed ``elements`` as the attribute we want to guess :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` guess then add it as a topology attribute to the ``AtomGroup`` of the universe. -If the attribute already exist in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. -To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of the to_guess one as following:: +If the attribute already exists in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. +To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of ``to_guess`` as following:: import MDAnalysis as mda from MDAnalysisTests.datafiles import PRM12 @@ -38,9 +49,14 @@ To override all the attribute values, you can pass the attribute to the ``force_ u.guess_TopologyAttrs(force_guess=['types']) # types ['H', 'O', ..] -N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` -.. rubric:: available guessers +.. note:: + The default ``context`` will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` + + + + +.. rubric:: Available guessers .. toctree:: :maxdepth: 1 @@ -48,7 +64,7 @@ N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MD guesser_modules/default_guesser -.. rubric:: guesser core modules +.. rubric:: Guesser core modules The remaining pages are primarily of interest to developers as they contain functions and classes that are used in the implementation of