diff --git a/AIMSim-demo.ipynb b/AIMSim-demo.ipynb index bb162ad..0a08e04 100644 --- a/AIMSim-demo.ipynb +++ b/AIMSim-demo.ipynb @@ -112,13 +112,13 @@ "|1|morgan|\n", "|2|topological|\n", "|3|daylight|\n", - "|4|minhash|\n", "\n", "Each of these fingerprints should be generally applicable for chemical problems, though they are all provided to serve as an easy way to compare the results according to fingerprinting approach.\n", "\n", "Additional descriptors are included with _AIMSim_ which are not mathematically compatible with some of the similarity measures. When such a descriptor is selected, the corresponding similarity measure will be removed from the dropdown.\n", "\n", "The `Exp. Descriptors` checkbox adds additional molecular descriptors into the `Molecular Descriptor` dropdown. These are marked as _experimental_ because they are generated using third-party libraries over which we have very little or no control. The descriptors generated by these libraries should be used only when the user has a very specific need for a descriptor as implemented in one of the packages below:\n", + " - [minhash](https://github.com/reymond-group/mhfp): Much faster fingerprint for similarity searches, supports non-binary similarity metrics.\n", " - [ccbmlib](https://doi.org/10.12688/f1000research.22292.2): All molecular fingerprints included in the `ccbmlib` library have been reproduced in _AIMSim_. Read about these fingerprints [in the `ccbmlib` repository](https://github.com/vogt-m/ccbmlib).\n", " - [mordred](https://doi.org/10.1186/s13321-018-0258-y): All 1000+ descriptors included in `mordred` are available in _AIMSim_, though as of Januray 2022 it seems that `mordred` is no longer being maintained and has a significant amount of bugs. Use at your own risk. (command line only)\n", " - [PaDELPy](https://doi.org/10.1002/jcc.21707): [This package](https://github.com/ecrl/padelpy) provides access to all of the molecular descriptors included as part of the PaDEL-Descriptor standalone Java program. (command line only)\n", @@ -3227,7 +3227,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.15" } }, "nbformat": 4, diff --git a/README.md b/README.md index 246bf97..b718bc9 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,10 @@ Start `AIMSim` with a prepared configuration YAML file (`config.yaml`): 1. Morgan Fingerprint (Equivalent to the ECFP fingerprints) 2. RDKit Topological Fingerprint 3. RDKit Daylight Fingerprint -4. MinHash Fingerprint (see [MHFP](https://github.com/reymond-group/mhfp)) _The following are available via command line use (config.yaml) only:_ +4. MinHash Fingerprint (see [MHFP](https://github.com/reymond-group/mhfp)) 5. All fingerprints available from the [ccbmlib](https://github.com/vogt-m/ccbmlib) package (_specify 'ccbmlib:descriptorname' for command line input_). 6. All descriptors and fingerprints available from [PaDELPy](https://github.com/ecrl/padelpy), an interface to PaDEL-Descriptor. (_specify 'padelpy:desciptorname' for command line input._). 7. All descriptors available through the [Mordred](https://github.com/mordred-descriptor/mordred) library (_specify 'mordred:desciptorname' for command line input._). To enable this option, you must install with `pip install 'aimsim[mordred]'` (see disclaimer in the Installation section above). diff --git a/aimsim/ops/descriptor.py b/aimsim/ops/descriptor.py index 72aed31..87cb0f5 100644 --- a/aimsim/ops/descriptor.py +++ b/aimsim/ops/descriptor.py @@ -265,7 +265,7 @@ def _set_ccbmlib_fingerprint(self, molecule_graph, descriptor, **kwargs): self.label_ = descriptor self.params_ = {} - def _set_minhash_fprint(self, molecule_graph, **kwargs): + def _set_minhash_fingerprint(self, molecule_graph, **kwargs): """Set the descriptor to the minhash fingerprint. Args: @@ -273,7 +273,7 @@ def _set_minhash_fprint(self, molecule_graph, **kwargs): """ mhfp_encoder = MHFPEncoder( - n_permutations=kwargs["n_premutations"], + n_permutations=kwargs["n_permutations"], seed=kwargs["seed"], ) fp = mhfp_encoder.encode_mol( @@ -307,7 +307,7 @@ def make_fingerprint( morgan_params = {"radius": 3, "n_bits": 1024} morgan_params.update(fingerprint_params) self._set_morgan_fingerprint(molecule_graph=molecule_graph, **morgan_params) - if fingerprint_type == "minhash_fingerprint": + elif fingerprint_type == "minhash_fingerprint": minhash_params = { "n_permutations": 2048, "seed": 42, @@ -499,7 +499,6 @@ def get_supported_fprints(): "morgan_fingerprint", "topological_fingerprint", "daylight_fingerprint", - "minhash_fingerprint", ] @staticmethod @@ -515,6 +514,7 @@ def get_all_supported_descriptors(): "morgan_fingerprint", "topological_fingerprint", "daylight_fingerprint", + "minhash_fingerprint", "maccs_keys", "atom-pair_fingerprint", "torsion_fingerprint", diff --git a/tests/test_Descriptor.py b/tests/test_Descriptor.py index ebdf11d..662440f 100644 --- a/tests/test_Descriptor.py +++ b/tests/test_Descriptor.py @@ -273,6 +273,31 @@ def test_padelpy_descriptors(self): with self.assertRaises(ValueError): descriptor.to_rdkit() + def test_minhash_fingerprint(self): + """Test creation of minhash fingerprint""" + mol_graph = MolFromSmiles("CCOCC") + descriptor = Descriptor() + descriptor.make_fingerprint( + molecule_graph=mol_graph, fingerprint_type="minhash_fingerprint" + ) + self.assertTrue( + descriptor.check_init(), + "Expected Descriptor object to be initialized", + ) + self.assertEqual( + descriptor.label_, + "minhash_fingerprint", + "Expected label of descriptor initialized with " + "{} to match the fingerprint".format("minhash_fingerprint"), + ) + self.assertIsInstance( + descriptor.to_numpy(), + np.ndarray, + "Expected numpy.ndarray from to_numpy()", + ) + with self.assertRaises(ValueError): + descriptor.to_rdkit() + def test_ccbmlib_descriptors(self): """Test ability to passthrough descriptors to ccbmlib.""" mol_graph = MolFromSmiles("CCOCC") @@ -310,6 +335,7 @@ def test_exptl_descriptors(self): "maccs_keys", "atom-pair_fingerprint", "torsion_fingerprint", + "minhash_fingerprint", ] for desc in fprint_list: descriptor = Descriptor()