From 62d81c6f52bf4ab3362d7e27f45ec18ffd70523b Mon Sep 17 00:00:00 2001 From: Roman Joeres Date: Fri, 19 Apr 2024 12:34:37 +0200 Subject: [PATCH] Fixed bug in vector metric computation --- datasail/cluster/vectors.py | 7 +++---- setup.py | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasail/cluster/vectors.py b/datasail/cluster/vectors.py index 6171107..2049cdd 100644 --- a/datasail/cluster/vectors.py +++ b/datasail/cluster/vectors.py @@ -123,7 +123,7 @@ def run_vector(dataset: DataSet, method: SIM_OPTIONS = "tanimoto") -> None: if method in get_args(SIM_OPTIONS): if isinstance(embed, (list, tuple, np.ndarray)): if isinstance(embed[0], int) or np.issubdtype(embed[0].dtype, int): - if method in ["allbit", "asymmetric", "braunblanquet", "cosine", "kulczynski", "mcconnaughey", "onbit", + if method in ["allbit", "asymmetric", "braunblanquet", "cosine", "kulczynski", "onbit", "rogotgoldberg", "russel", "sokal"]: dataset.data = {k: iterable2bitvect(v) for k, v in dataset.data.items()} else: @@ -138,15 +138,14 @@ def run_vector(dataset: DataSet, method: SIM_OPTIONS = "tanimoto") -> None: f"Unsupported embedding type {type(embed)}. Please use either RDKit datastructures, lists, " f"tuples or one-dimensional numpy arrays.") elif method in get_args(DIST_OPTIONS): + dtype = np.bool_ if ["jaccard", "rogerstanimoto", "sokalmichener", "yule"] else np.float64 if isinstance(embed, ( list, tuple, DataStructs.ExplicitBitVect, DataStructs.LongSparseIntVect, DataStructs.IntSparseIntVect)): - dataset.data = {k: np.array(list(v), dtype=np.float64) for k, v in dataset.data.items()} + dataset.data = {k: np.array(list(v), dtype=dtype) for k, v in dataset.data.items()} if not isinstance(dataset.data[dataset.names[0]], np.ndarray): raise ValueError( f"Unsupported embedding type {type(embed)}. Please use either RDKit datastructures, lists, " f"tuples or one-dimensional numpy arrays.") - if method in ["rogerstanimoto", "sokalmichener", "yule"]: - dataset.data = {k: np.array(list(v), dtype=np.bool_) for k, v in dataset.data.items()} else: raise ValueError(f"Unknown method {method}") fps = [dataset.data[name] for name in dataset.names] diff --git a/setup.py b/setup.py index 4de5754..3673023 100644 --- a/setup.py +++ b/setup.py @@ -14,17 +14,18 @@ author="Roman Joeres", maintainer="Roman Joeres", classifiers=[ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Intended Audience :: Science/Research", "Natural Language :: English", "Topic :: Scientific/Engineering :: Bio-Informatics", ], packages=find_packages(), - setup_requires=['setuptools_scm'], - include_package_data=True, + include_package_data=False, install_requires=[], package_data={}, python_requires=">=3.8, <4.0.0",