Merge pull request #12 from lunamorrow/parser_and_tests

Merge OpenBabelParser and Unit Tests into main
MDAnalysis · Jul 31, 2024 · c15500b · c15500b
2 parents 74f52d9 + a24a6ab
commit c15500b
Show file tree

Hide file tree

Showing 9 changed files with 316 additions and 23 deletions.
diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml
@@ -76,7 +76,7 @@ jobs:
  - name: Install package
  run: |
  python --version
- python -m pip install . --no-deps
+ python -m pip install . --no-deps 
 
  - name: Python information
  run: |
@@ -103,7 +103,7 @@ jobs:
 
 
  pylint_check:
- if: "github.repository == 'MDAnalysis/mda_openbabel_converter'"
+ if: "github.repository == 'lunamorrow/mda_openbabel_converter'"
  runs-on: ubuntu-latest
 
  steps:
@@ -141,7 +141,7 @@ jobs:
 
  - name: Install dependencies
  run: |
- pip install pipx twine
+ pip install pipx twine openbabel<3.0.0
 
  - name: Build package
  run: |

diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -1,4 +1,4 @@
-name: mda_openbabel_converter-test
+name: mda_openbabel_converter
 channels:
  - conda-forge
  - defaults
@@ -9,6 +9,7 @@ dependencies:
 
  # MDAnalysis
  - MDAnalysis
+ - MDAnalysisTests
 
  # OpenBabel
  - openbabel

diff --git a/mda_openbabel_converter/OpenBabel.py b/mda_openbabel_converter/OpenBabel.py
@@ -8,7 +8,7 @@
 from MDAnalysis.core.groups import AtomGroup
 
 try:
- import openbabel as OB
+ from openbabel import openbabel as OB
  from openbabel import OBMol
 except ImportError:
  print("Cannot find openbabel, install with 'pip install openbabel==2.4.0'")

diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py
@@ -3,27 +3,207 @@
 """
 
 import MDAnalysis as mda
-from MDAnalysis.topology.base import TopologyReaderBase
+from MDAnalysis.topology.base import TopologyReaderBase, change_squash
+from MDAnalysis.core.topology import Topology
+from MDAnalysis.topology import guessers
+from MDAnalysis.converters.base import ConverterBase
+from MDAnalysis.core.topologyattrs import (
+ Atomids,
+ Atomnames,
+ Atomtypes,
+ Elements,
+ Masses,
+ Charges,
+ Aromaticities,
+ Bonds,
+ Resids,
+ Resnums,
+ Resnames,
+ RSChirality,
+ Segids,
+ AltLocs,
+ ChainIDs,
+ ICodes,
+ Occupancies,
+ Tempfactors,
+)
+import warnings
+import numpy as np
 
-class OpenBabelParser():
+HAS_OBABEL = False
+NEUTRON_MASS = 1.008
+
+try:
+ import openbabel
+ from openbabel import openbabel as ob
+ from openbabel.openbabel import OBMol, OBResidue, GetSymbol
+ from openbabel.openbabel import *
+ HAS_OBABEL = True
+except ImportError:
+ warnings.warn("Cannot find openbabel, install with `mamba install -c "
+ "conda-forge openbabel`")
+
+
+class OpenBabelParser(TopologyReaderBase):
  """
- Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a 
- MDAnalysis Topology or adds it to a pre-existing Topology. This parser will 
+ Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a
+ MDAnalysis Topology or adds it to a pre-existing Topology. This parser
  does not work in the reverse direction.
  """
+ format = 'OPENBABEL'
+
+ @staticmethod
  def _format_hint(thing):
  """
- Base function to check if the parser can actually parse this “thing” 
- (i.e., is it a valid OpenBabel OBMol with no missing information, that 
- can be converted to a MDAnalysis Topology?)
+ Base function to check if the parser can actually parse this “thing”
+ (i.e., is it a valid OpenBabel OBMol that can be converted to a
+ MDAnalysis Topology?)
  """
- pass
+ if HAS_OBABEL is False:
+ return False
+ else:
+ return isinstance(thing, ob.OBMol)
 
  def parse(self, **kwargs):
  """
  Accepts an OpenBabel OBMol and returns a MDAnalysis Topology. Will need
  to extract the number of atoms, number of residues, number of segments,
- atom_residue index, residue_segment index and other attributes from the
- OBMol to initialise a new Topology. 
+ atom_residue index, residue_segment index and all of the atom's
+ relevant attributes from the OBMol to initialise a new Topology.
  """
- pass
+ mol = self.filename
+
+ # Atoms
+ names = []
+ resnums = []
+ resnames = []
+ elements = []
+ masses = []
+ charges = []
+ aromatics = []
+ ids = []
+ atomtypes = []
+ segids = []
+ chainids = []
+ icodes = []
+
+ if mol.Empty():
+ return Topology(n_atoms=0,
+ n_res=0,
+ n_seg=0,
+ attrs=None,
+ atom_resindex=None,
+ residue_segindex=None)
+
+ for atom in ob.OBMolAtomIter(mol):
+ # Name set with element and id, as name not stored by OpenBabel
+ a_id = atom.GetIdx()
+ name = "%s%d" % (GetSymbol(atom.GetAtomicNum()), a_id)
+ names.append(name)
+ atomtypes.append(atom.GetType())
+ ids.append(a_id)
+ masses.append(atom.GetExactMass())
+ if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS:
+ warnings.warn(
+ f"Exact mass and atomic mass of atom ID: {a_id} are more"
+ " than 1.008 AMU different. Be aware of isotopes,"
+ " which are NOT flagged by MDAnalysis.")
+ charges.append(atom.GetPartialCharge())
+
+ # convert atomic number to element
+ elements.append(GetSymbol(atom.GetAtomicNum()))
+
+ # only for PBD and MOL2
+ if atom.HasResidue():
+ resid = atom.GetResidue()
+ resnums.append(resid.GetNum())
+ resnames.append(resid.GetName())
+ chainids.append(resid.GetChain())
+ icodes.append(resid.GetInsertionCode())
+
+ aromatics.append(atom.IsAromatic())
+
+ # make Topology attributes
+ attrs = []
+ n_atoms = len(ids)
+
+ if resnums and (len(resnums) != len(ids)):
+ raise ValueError(
+ "ResidueInfo is only partially available in the molecule."
+ )
+
+ # * Attributes always present *
+
+ # Atom attributes
+ for vals, Attr, dtype in (
+ (ids, Atomids, np.int32),
+ (elements, Elements, object),
+ (masses, Masses, np.float32),
+ (aromatics, Aromaticities, bool),
+ ):
+ attrs.append(Attr(np.array(vals, dtype=dtype)))
+
+ # Bonds
+ bonds = []
+ bond_orders = []
+ for bond_idx in range(0, mol.NumBonds()):
+ bond = mol.GetBond(bond_idx)
+ bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))
+ bond_orders.append(float(bond.GetBondOrder()))
+ attrs.append(Bonds(bonds, order=bond_orders))
+
+ # * Optional attributes *
+ attrs.append(Atomnames(np.array(names, dtype=object)))
+
+ # Atom type
+ if atomtypes:
+ attrs.append(Atomtypes(np.array(atomtypes, dtype=object)))
+ else:
+ atomtypes = guessers.guess_types(names)
+ attrs.append(Atomtypes(atomtypes, guessed=True))
+
+ # Partial charges
+ if charges:
+ attrs.append(Charges(np.array(charges, dtype=np.float32)))
+ else:
+ pass # no guesser yet
+
+ # Residue
+ if resnums:
+ resnums = np.array(resnums, dtype=np.int32)
+ resnames = np.array(resnames, dtype=object)
+ icodes = np.array(icodes, dtype=object)
+ residx, (resnums, resnames, icodes) = change_squash(
+ (resnums, resnames, icodes),
+ (resnums, resnames, icodes))
+ n_residues = len(resnums)
+ for vals, Attr, dtype in (
+ (resnums, Resids, np.int32),
+ (resnums.copy(), Resnums, np.int32),
+ (resnames, Resnames, object),
+ (icodes, ICodes, object),
+ ):
+ attrs.append(Attr(np.array(vals, dtype=dtype)))
+ else:
+ attrs.append(Resids(np.array([1])))
+ attrs.append(Resnums(np.array([1])))
+ residx = None
+ n_residues = 1
+
+ # Segment
+ if len(segids) and not any(val is None for val in segids):
+ segidx, (segids,) = change_squash((segids,), (segids,))
+ n_segments = len(segids)
+ attrs.append(Segids(segids))
+ else:
+ n_segments = 1
+ attrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
+ segidx = None
+
+ # create topology
+ top = Topology(n_atoms, n_residues, n_segments,
+ attrs=attrs,
+ atom_resindex=residx,
+ residue_segindex=segidx)
+
+ return top
diff --git a/mda_openbabel_converter/__init__.py b/mda_openbabel_converter/__init__.py
@@ -3,10 +3,6 @@
 A package to convert between MDAnalysis and OpenBabel Objects
 """
 
-# Add imports here
 from importlib.metadata import version
-from .OpenBabel import OpenBabelReader
-from .OpenBabel import OpenBabelConverter
-# from .OpenBabelParser import OpenBabelTopologyParser
 
 __version__ = version("mda_openbabel_converter")
diff --git a/mda_openbabel_converter/data/files.py b/mda_openbabel_converter/data/files.py
@@ -12,5 +12,5 @@
  "MDANALYSIS_LOGO", # example file of MDAnalysis logo
 ]
 
-import importlib.resources
-MDANALYSIS_LOGO = importlib.resources.files(__name__) / "mda.txt"
+from importlib.resources import files
+MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt"
diff --git a/mda_openbabel_converter/tests/conftest.py b/mda_openbabel_converter/tests/conftest.py
@@ -8,7 +8,6 @@
 # https://docs.pytest.org/en/stable/how-to/fixtures.html#scope-sharing-fixtures-across-classes-modules-packages-or-session
 
 import pytest
-
 from mda_openbabel_converter.data.files import MDANALYSIS_LOGO