From 65c1d635df93d55d23fd8a4a01cd4391f4bc6e48 Mon Sep 17 00:00:00 2001 From: Roman Joeres Date: Sat, 21 Sep 2024 02:25:07 +0200 Subject: [PATCH] Improvements in documentation --- datasail/parsers.py | 7 +++ datasail/routine.py | 22 +++++++- datasail/settings.py | 1 + docs/conf.py | 4 +- docs/index.rst | 39 ++++++------- docs/install.html | 128 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 27 deletions(-) create mode 100644 docs/install.html diff --git a/datasail/parsers.py b/datasail/parsers.py index e133ef6..3a665a7 100644 --- a/datasail/parsers.py +++ b/datasail/parsers.py @@ -25,6 +25,13 @@ def parse_datasail_args(args) -> Dict[str, object]: "clusters within the dataset.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + parser.add_argument( + "--cc", + default=False, + action='store_true', + dest=KW_CC, + help="List available clustering algorithms." + ) parser.add_argument( "-o", "--output", diff --git a/datasail/routine.py b/datasail/routine.py index 632ab28..edfa6d3 100644 --- a/datasail/routine.py +++ b/datasail/routine.py @@ -1,5 +1,5 @@ import time -from typing import Dict, Tuple +from typing import Dict, Tuple, Optional from datasail.argparse_patch import remove_patch from datasail.cluster.clustering import cluster @@ -8,11 +8,23 @@ from datasail.report import report from datasail.settings import LOGGER, KW_TECHNIQUES, KW_EPSILON, KW_RUNS, KW_SPLITS, KW_NAMES, \ KW_MAX_SEC, KW_MAX_SOL, KW_SOLVER, KW_LOGDIR, NOT_ASSIGNED, KW_OUTDIR, MODE_E, MODE_F, DIM_2, SRC_CL, KW_DELTA, \ - KW_E_CLUSTERS, KW_F_CLUSTERS + KW_E_CLUSTERS, KW_F_CLUSTERS, KW_CC, CDHIT, INSTALLED, FOLDSEEK, TMALIGN, CDHIT_EST, DIAMOND, MMSEQS, MASH from datasail.solver.solve import run_solver -def datasail_main(**kwargs) -> Tuple[Dict, Dict, Dict]: +def list_cluster_algos(): + """ + List all available clustering algorithms. + """ + + print("Available clustering algorithms:", "\tECFP", sep="\n") + for algo, name in [(CDHIT, "CD-HIT"), (CDHIT_EST, "CD-HIT-EST"), (DIAMOND, "DIAMOND"), (MMSEQS, "MMseqs, MMseqs2"), + (MASH, "MASH"), (FOLDSEEK, "FoldSeek"), (TMALIGN, "TMalign")]: + if INSTALLED[algo]: + print("\t", name, sep="") + + +def datasail_main(**kwargs) -> Optional[Tuple[Dict, Dict, Dict]]: """ Main routine of DataSAIL. Here the parsed input is aggregated into structures and then split and saved. @@ -20,6 +32,10 @@ def datasail_main(**kwargs) -> Tuple[Dict, Dict, Dict]: **kwargs: Parsed commandline arguments to DataSAIL. """ kwargs = remove_patch(**kwargs) + if kwargs[KW_CC]: + list_cluster_algos() + return None + start = time.time() LOGGER.info("Read data") diff --git a/datasail/settings.py b/datasail/settings.py index ea7f3bb..65d9a37 100644 --- a/datasail/settings.py +++ b/datasail/settings.py @@ -148,6 +148,7 @@ def format2ending(fmt: str) -> str: KW_CACHE = "cache" KW_CACHE_DIR = "cache_dir" +KW_CC = "cc" KW_CLI = "cli" KW_DELTA = "delta" KW_EPSILON = "epsilon" diff --git a/docs/conf.py b/docs/conf.py index 310d2ca..cb22fa9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,6 +6,7 @@ import sphinx_rtd_theme sys.path.insert(0, os.path.abspath("./..")) +sys.path.insert(0, os.path.abspath("./.")) import datasail from datasail.version import __version__ @@ -21,7 +22,7 @@ "sphinx.ext.githubpages", "nbsphinx", "nbsphinx_link", - "IPython.sphinxext.ipython_console_highlighting" + "IPython.sphinxext.ipython_console_highlighting", ] autosummary_generate = True @@ -45,7 +46,6 @@ intersphinx_mapping = { "python": ("https://docs.python.org/", None), "numpy": ("https://numpy.org/doc/stable/", None), - # "numpy": ("http://docs.scipy.org/doc/numpy", None), } html_theme_options = { diff --git a/docs/index.rst b/docs/index.rst index 9eee5f8..bd2d362 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,35 +8,30 @@ datasets. However, its versatility extends beyond biology, making it applicable utilized through its command line interface or integrated as a Python package, DataSAIL stands out for its user-friendly design and adaptability. Licensed under the MIT license, it is open source and conveniently accessible on `GitHub `_. Installation is made simple through -`conda `_, utilizing -`mamba `_. +`conda `_. -Quick Start -########### - -DataSAIL is available for all modern versions of Pytion (v3.8 or newer). Other than described on the conda-website, -the command to install DataSAIL within your just created environment is - -.. code-block:: shell +Install +####### - mamba install -c kalininalab -c conda-forge -c bioconda datasail - pip install grakel +DataSAIL is available for all modern versions of Pytion (v3.8 or newer). -The second command is necessary to run WLK clustering as the grakel library is not available on conda for python 3.10 -or newer. Alternatively, one can install :code:`DataSAIL-lite` from conda as +.. note:: + It is recommended to use `mamba `_ + for the installation because conda might not be able to resolve the dependencies of DataSAIL successfully. -.. code-block:: shell +.. raw:: html + :file: install.html - mamba install -c kalininalab -c conda-forge -c bioconda datasail-lite - pip install grakel +DataSAIL vs. DataSAIL-lite +-------------------------- -.. note:: - It is important to use mamba for the installation because conda might not be able to resolve the dependencies of - DataSAIL successfully. +The difference between :code:`DataSAIL` and :code:`DataSAIL-lite` is that the latter does not include most of the +clustering algorithms as they are not provide on conda for all OSs. Therefore, the user is required to the user to +install them manually as needed. DataSAIL will work even if not all clustering are installed. For the installation, is +it necessary to be able to call them. You can test which are available by running :code:`datasail --cc`. -The difference between :code:`DataSAIL` and :code:`DataSAIL-lite` is that the latter does not include the clustering -algorithms and requires the user to install them manually as needed. The reason for this is that the clustering -algorithms are not available for all OS and we want to make DataSAIL available for all OS. +Quick Start +########### Regardless of which installation command was used, DataSAIL can be executed by running diff --git a/docs/install.html b/docs/install.html new file mode 100644 index 0000000..0ef2587 --- /dev/null +++ b/docs/install.html @@ -0,0 +1,128 @@ + + +
+
+
OS
+
Package
+
Run:
+
+
+
+
Linux
+
OSX
+
OSX-ARM
+
Windows
+
+
+
Conda
+
Pip
+
+
+
Command
+
+
+
+ + \ No newline at end of file