From 7341946388fbb7ab3facadab497a97fb0d25816e Mon Sep 17 00:00:00 2001 From: thanhleviet Date: Tue, 2 Jul 2024 13:04:45 +0000 Subject: [PATCH] ruff & black --- Makefile | 2 +- q2_usearch/__init__.py | 3 +- q2_usearch/_chimera.py | 149 +++++++++++++--------- q2_usearch/_cluster.py | 21 ++-- q2_usearch/_fastqx.py | 81 ++++++------ q2_usearch/_format.py | 14 ++- q2_usearch/_otu.py | 17 +-- q2_usearch/_pipelines.py | 11 +- q2_usearch/_unoise.py | 21 +--- q2_usearch/_utils.py | 19 ++- q2_usearch/_version.py | 156 +++++++++++++++--------- q2_usearch/plugin_setup.py | 4 +- q2_usearch/tests/test_fastx_truncate.py | 3 - q2_usearch/tests/test_unoise.py | 1 - 14 files changed, 271 insertions(+), 231 deletions(-) diff --git a/Makefile b/Makefile index bba09c6..de09df7 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ lint: flake8 test: all - py.test + py.test -vvv install: all pip install . diff --git a/q2_usearch/__init__.py b/q2_usearch/__init__.py index b7239ea..7639174 100644 --- a/q2_usearch/__init__.py +++ b/q2_usearch/__init__.py @@ -13,4 +13,5 @@ del get_versions from . import _version -__version__ = _version.get_versions()['version'] + +__version__ = _version.get_versions()["version"] diff --git a/q2_usearch/_chimera.py b/q2_usearch/_chimera.py index ff52041..98b46af 100644 --- a/q2_usearch/_chimera.py +++ b/q2_usearch/_chimera.py @@ -12,71 +12,87 @@ from q2_types.feature_data import DNAFASTAFormat from ._utils import run_command, _fasta_with_sizes + # from ._cluster_features import _fasta_with_sizes, run_command # from ._format import UchimeStatsFmt -_uchime_defaults = {'dn': 1.4, - 'mindiffs': 3, - 'mindiv': 0.8, - 'minh': 0.28, - 'xn': 8.0} - - -def uchime_ref(sequences: DNAFASTAFormat, - table: biom.Table, - reference_sequences: DNAFASTAFormat, - dn: float = _uchime_defaults['dn'], - mindiffs: int = _uchime_defaults['mindiffs'], - mindiv: float = _uchime_defaults['mindiv'], - minh: float = _uchime_defaults['minh'], - xn: float = _uchime_defaults['xn'], - threads: int = 1) \ - -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt): - cmd, chimeras, nonchimeras, uchime_stats = \ - _uchime_ref(sequences, table, reference_sequences, dn, mindiffs, - mindiv, minh, xn, threads) +_uchime_defaults = {"dn": 1.4, "mindiffs": 3, "mindiv": 0.8, "minh": 0.28, "xn": 8.0} + + +def uchime_ref( + sequences: DNAFASTAFormat, + table: biom.Table, + reference_sequences: DNAFASTAFormat, + dn: float = _uchime_defaults["dn"], + mindiffs: int = _uchime_defaults["mindiffs"], + mindiv: float = _uchime_defaults["mindiv"], + minh: float = _uchime_defaults["minh"], + xn: float = _uchime_defaults["xn"], + threads: int = 1, +) -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt): + cmd, chimeras, nonchimeras, uchime_stats = _uchime_ref( + sequences, table, reference_sequences, dn, mindiffs, mindiv, minh, xn, threads + ) return chimeras, nonchimeras, uchime_stats -def _uchime_ref(sequences, table, reference_sequences, dn, mindiffs, - mindiv, minh, xn, threads): +def _uchime_ref( + sequences, table, reference_sequences, dn, mindiffs, mindiv, minh, xn, threads +): # this function only exists to simplify testing chimeras = DNAFASTAFormat() nonchimeras = DNAFASTAFormat() uchime_stats = UchimeStatsFmt() with tempfile.NamedTemporaryFile() as fasta_with_sizes: _fasta_with_sizes(str(sequences), fasta_with_sizes.name, table) - cmd = ['vsearch', - '--uchime_ref', fasta_with_sizes.name, - '--uchimeout', str(uchime_stats), - '--nonchimeras', str(nonchimeras), - '--chimeras', str(chimeras), - '--dn', str(dn), - '--mindiffs', str(mindiffs), - '--mindiv', str(mindiv), - '--minh', str(minh), - '--xn', str(xn), - '--db', str(reference_sequences), - '--qmask', 'none', # ensures no lowercase DNA chars - '--xsize', - '--threads', str(threads), - '--fasta_width', '0'] + cmd = [ + "vsearch", + "--uchime_ref", + fasta_with_sizes.name, + "--uchimeout", + str(uchime_stats), + "--nonchimeras", + str(nonchimeras), + "--chimeras", + str(chimeras), + "--dn", + str(dn), + "--mindiffs", + str(mindiffs), + "--mindiv", + str(mindiv), + "--minh", + str(minh), + "--xn", + str(xn), + "--db", + str(reference_sequences), + "--qmask", + "none", # ensures no lowercase DNA chars + "--xsize", + "--threads", + str(threads), + "--fasta_width", + "0", + ] run_command(cmd) return cmd, chimeras, nonchimeras, uchime_stats -def uchime3_denovo(sequences: DNAFASTAFormat, - table: biom.Table, - dn: float = _uchime_defaults['dn'], - mindiffs: int = _uchime_defaults['mindiffs'], - mindiv: float = _uchime_defaults['mindiv'], - minh: float = _uchime_defaults['minh'], - xn: float = _uchime_defaults['xn']) \ - -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt): - cmd, chimeras, nonchimeras, uchime_stats = \ - _uchime_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn) +def uchime3_denovo( + sequences: DNAFASTAFormat, + table: biom.Table, + dn: float = _uchime_defaults["dn"], + mindiffs: int = _uchime_defaults["mindiffs"], + mindiv: float = _uchime_defaults["mindiv"], + minh: float = _uchime_defaults["minh"], + xn: float = _uchime_defaults["xn"], +) -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt): + cmd, chimeras, nonchimeras, uchime_stats = _uchime_denovo( + sequences, table, dn, mindiffs, mindiv, minh, xn + ) return chimeras, nonchimeras, uchime_stats @@ -87,19 +103,32 @@ def _uchime3_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn): uchime_stats = UchimeStatsFmt() with tempfile.NamedTemporaryFile() as fasta_with_sizes: _fasta_with_sizes(str(sequences), fasta_with_sizes.name, table) - cmd = ['usearch', - '-uchime3_denovo', fasta_with_sizes.name, - '-uchimeout', str(uchime_stats), - '--nonchimeras', str(nonchimeras), - '--chimeras', str(chimeras), - '--dn', str(dn), - '--mindiffs', str(mindiffs), - '--mindiv', str(mindiv), - '--minh', str(minh), - '--xn', str(xn), - '--qmask', 'none', # ensures no lowercase DNA chars - '--xsize', - '--fasta_width', '0'] + cmd = [ + "usearch", + "-uchime3_denovo", + fasta_with_sizes.name, + "-uchimeout", + str(uchime_stats), + "--nonchimeras", + str(nonchimeras), + "--chimeras", + str(chimeras), + "--dn", + str(dn), + "--mindiffs", + str(mindiffs), + "--mindiv", + str(mindiv), + "--minh", + str(minh), + "--xn", + str(xn), + "--qmask", + "none", # ensures no lowercase DNA chars + "--xsize", + "--fasta_width", + "0", + ] run_command(cmd) return cmd, chimeras, nonchimeras, uchime_stats diff --git a/q2_usearch/_cluster.py b/q2_usearch/_cluster.py index a066f91..f7f9b7f 100644 --- a/q2_usearch/_cluster.py +++ b/q2_usearch/_cluster.py @@ -7,20 +7,14 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os -import tempfile -import sqlite3 - -import biom -import skbio -import pandas as pd -from qiime2 import Metadata + from q2_types.feature_data import DNAFASTAFormat -from ._utils import run_command, validate_params, USearchError +from ._utils import run_command, validate_params import shlex from ._format import USEARCHTextFile + def cluster_otus( sequences: DNAFASTAFormat, minsize: int = 2, @@ -29,9 +23,9 @@ def cluster_otus( ) -> (DNAFASTAFormat, USEARCHTextFile): # type: ignore otus_seqs = DNAFASTAFormat() uparse_out = USEARCHTextFile() - + validate_params([minsize, threads]) - + _relabel = f"-relabel {relabel}" _minsize = f"-minsize {minsize}" _cmd = f"usearch -cluster_otus {sequences} -otus {otus_seqs} -uparseout {uparse_out} {_relabel} {_minsize} -threads {threads}".strip() @@ -39,11 +33,14 @@ def cluster_otus( run_command(cmd) return otus_seqs, uparse_out + def cluster_fast(): pass + def cluster_smallmem(): pass + def cluster_mt(): - pass \ No newline at end of file + pass diff --git a/q2_usearch/_fastqx.py b/q2_usearch/_fastqx.py index f2ddc13..a56194c 100644 --- a/q2_usearch/_fastqx.py +++ b/q2_usearch/_fastqx.py @@ -11,52 +11,53 @@ import gzip import shutil import tempfile -import sqlite3 -import biom -import skbio import pandas as pd import shlex import yaml from typing import List -from qiime2 import Metadata from q2_types.feature_data import DNAFASTAFormat from q2_types.per_sample_sequences import ( QIIME1DemuxDirFmt, SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt, - FastqManifestFormat, YamlFormat) + FastqManifestFormat, + YamlFormat, +) from ._utils import run_command, validate_params import logging as logger -logger.basicConfig(level=logger.DEBUG, format='%(asctime)s - %(message)s') +logger.basicConfig(level=logger.DEBUG, format="%(asctime)s - %(message)s") _mp_defaults = { - 'maxdiffs': 5, - 'pctid': 90, - 'nostagger': False, - 'minmergelen': 50, - 'maxmergelen': 270, - 'minqual': 0, - 'minovlen': 16, - 'trunctail': 2, - 'minlen': 64, - 'relabel': '@', - "threads": 1 + "maxdiffs": 5, + "pctid": 90, + "nostagger": False, + "minmergelen": 50, + "maxmergelen": 270, + "minqual": 0, + "minovlen": 16, + "trunctail": 2, + "minlen": 64, + "relabel": "@", + "threads": 1, } + def fastq_filter(): pass + def fastq_join(): pass + def fastq_mergepairs( demultiplexed_seqs: SingleLanePerSamplePairedEndFastqDirFmt, maxdiffs: int = _mp_defaults["maxdiffs"], - pctid: int = _mp_defaults['pctid'], + pctid: int = _mp_defaults["pctid"], nostagger: bool = _mp_defaults["nostagger"], minmergelen: int = _mp_defaults["minmergelen"], maxmergelen: int = _mp_defaults["maxmergelen"], @@ -66,21 +67,30 @@ def fastq_mergepairs( threads: int = _mp_defaults["threads"], ) -> (SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt): # type: ignore _, merged, unmerged = _merge_pairs_w_command_output( - demultiplexed_seqs, maxdiffs, pctid, nostagger, minmergelen, maxmergelen, minqual, minovlen, threads + demultiplexed_seqs, + maxdiffs, + pctid, + nostagger, + minmergelen, + maxmergelen, + minqual, + minovlen, + threads, ) return merged, unmerged -def fastx_uniques(sequences: QIIME1DemuxDirFmt, - sizeout: bool = True, - relabel: bool = True - ) -> (DNAFASTAFormat): # type: ignore +def fastx_uniques( + sequences: QIIME1DemuxDirFmt, sizeout: bool = True, relabel: bool = True +) -> DNAFASTAFormat: # type: ignore # TODO: the software crashes when uc/tableout is defined. # The magic converting from fastq to fasta happens here https://github.com/qiime2/q2-types/blob/70b511c9657e3b464d1b6c0ed18673a3f0990a48/q2_types/per_sample_sequences/_transformer.py#L188 unique_sequences = DNAFASTAFormat() seqs_fp = f"{sequences}/seqs.fna" logger.debug(f"Seqs file: {seqs_fp}") - _relabel = "-relabel @" if relabel else "" # relabels the sequences with the sample name using the special symbol @ + _relabel = ( + "-relabel @" if relabel else "" + ) # relabels the sequences with the sample name using the special symbol @ _sizeout = "-sizeout" if sizeout else "" _cmd = f"usearch -fastx_uniques {seqs_fp} -fastaout {unique_sequences} {_sizeout} {_relabel}".strip() cmd = shlex.split(_cmd) @@ -88,6 +98,7 @@ def fastx_uniques(sequences: QIIME1DemuxDirFmt, run_command(cmd) return unique_sequences + def fastx_truncate( unique_seqs: SingleLanePerSampleSingleEndFastqDirFmt, trunclen: int = 200, @@ -95,7 +106,7 @@ def fastx_truncate( stripright: int = 0, padlen: int = 200, relabel: bool = False, -) -> (SingleLanePerSampleSingleEndFastqDirFmt): # type: ignore +) -> SingleLanePerSampleSingleEndFastqDirFmt: # type: ignore validate_params([trunclen, stripleft, stripright, padlen]) truncated_seqs = SingleLanePerSampleSingleEndFastqDirFmt() @@ -335,7 +346,7 @@ def _merge_pairs_w_command_output( List[str], SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt, -): # type: ignore +): # type: ignore # create formats """ Merges paired-end reads from demultiplexed sequences using USEARCH. @@ -365,7 +376,9 @@ def _merge_pairs_w_command_output( _write_manifest_header(merged_manifest_fh, add_warning=True) _write_manifest_header(unmerged_manifest_fh) - logger.debug(f"Manifests demultiplexed_seqs: {demultiplexed_seqs.manifest.pathspec}") + logger.debug( + f"Manifests demultiplexed_seqs: {demultiplexed_seqs.manifest.pathspec}" + ) # generate input reads iterable manifest = pd.read_csv( os.path.join(str(demultiplexed_seqs), demultiplexed_seqs.manifest.pathspec), @@ -405,7 +418,7 @@ def _merge_pairs_w_command_output( # Input _cmd = f"usearch -fastq_mergepairs {fwd_fp} -reverse {rev_fp} -fastqout {fq_merged_path}" # Output - _cmd += f" -fastqout_notmerged_fwd {fq_unmerged_fwd_path} -fastqout_notmerged_rev {fq_unmerged_rev_path}" + _cmd += f" -fastqout_notmerged_fwd {fq_unmerged_fwd_path} -fastqout_notmerged_rev {fq_unmerged_rev_path}" # Options _relabel = f"-relabel {relabel}" if relabel else "" _cmd += f" -fastq_maxdiffs {maxdiffs} \ @@ -445,10 +458,10 @@ def _merge_pairs_w_command_output( def _get_output_paths( - format_: SingleLanePerSampleSingleEndFastqDirFmt, - sample_id: str, - barcode_id: int, - direction: int + format_: SingleLanePerSampleSingleEndFastqDirFmt, + sample_id: str, + barcode_id: int, + direction: int, ) -> tuple[str, str]: """ Generate output paths for the given format, sample ID, barcode ID, and direction. @@ -469,8 +482,8 @@ def _get_output_paths( return path, str(path).strip(".gz") -def _write_manifest_header(manifest_fh: str, add_warning: bool=False) -> None: +def _write_manifest_header(manifest_fh: str, add_warning: bool = False) -> None: manifest_fh.write("sample-id,filename,direction\n") if add_warning: manifest_fh.write("") - manifest_fh.write("# direction is not meaningful for joined reads\n") \ No newline at end of file + manifest_fh.write("# direction is not meaningful for joined reads\n") diff --git a/q2_usearch/_format.py b/q2_usearch/_format.py index 8eb6cf4..4254ad0 100644 --- a/q2_usearch/_format.py +++ b/q2_usearch/_format.py @@ -23,24 +23,26 @@ class UchimeStatsFmt(model.TextFileFormat): def _check_n_records(self, n): with open(str(self)) as fh: - csv_reader = csv.reader(fh, delimiter='\t') + csv_reader = csv.reader(fh, delimiter="\t") for i, row in enumerate(csv_reader): if i == n: break else: if len(row) != 18: raise ValidationError( - 'Incorrect number of fields detected on line %d.' - ' Should be exactly 18.' % (i + 1)) + "Incorrect number of fields detected on line %d." + " Should be exactly 18." % (i + 1) + ) def _validate_(self, level): - record_count_map = {'min': 5, 'max': float('inf')} + record_count_map = {"min": 5, "max": float("inf")} self._check_n_records(record_count_map[level]) UchimeStatsDirFmt = model.SingleFileDirectoryFormat( - 'UchimeStatsDirFmt', 'stats.tsv', UchimeStatsFmt) + "UchimeStatsDirFmt", "stats.tsv", UchimeStatsFmt +) USEARCHDirFmt = model.SingleFileDirectoryFormat( "USEARCHDirFmt", "data.txt", USEARCHTextFile -) \ No newline at end of file +) diff --git a/q2_usearch/_otu.py b/q2_usearch/_otu.py index 4e9e39d..b9a39e8 100644 --- a/q2_usearch/_otu.py +++ b/q2_usearch/_otu.py @@ -5,33 +5,17 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os -import tempfile -import sqlite3 import biom -import skbio -import pandas as pd -from qiime2 import Metadata from q2_types.feature_data import DNAFASTAFormat from q2_types.feature_table import BIOMV210Format from q2_types.per_sample_sequences import ( - QIIME1DemuxDirFmt, SingleLanePerSampleSingleEndFastqDirFmt, - SingleLanePerSamplePairedEndFastqDirFmt, - FastqManifestFormat, - YamlFormat, ) from ._utils import ( run_command, - _fasta_with_sizes, - _error_on_nonoverlapping_ids, - USearchError, - _uc_to_sqlite, - _collapse_f_from_sqlite, - _fasta_from_sqlite, ) import shlex @@ -63,6 +47,7 @@ def otutab( tabbed_biomv210 = BIOMV210Format(f.name) return mapout, tabbed_biomv210, tabbed_out, unmapped + # Not available in v12 # def otu_norm(otutable: USEARCHTextFile, sample_size: int = 1000) -> USEARCHTextFile: # tabbed_out = USEARCHTextFile() diff --git a/q2_usearch/_pipelines.py b/q2_usearch/_pipelines.py index d4e0692..bfb42fa 100644 --- a/q2_usearch/_pipelines.py +++ b/q2_usearch/_pipelines.py @@ -1,10 +1,3 @@ -import qiime2.plugin -from q2_types.feature_data import FeatureData, Sequence -from q2_types.feature_table import FeatureTable, Frequency -from q2_types.sample_data import SampleData -from ._format import USEARCHDirFmt - - # Define the pipeline function def denoise_pipeline( ctx, @@ -50,5 +43,5 @@ def denoise_pipeline( otutab_output.feature_table, otutab_output.feature_sequences, otutab_output.otu_table, - otutab_output.denoised_sequences if out_denoised else None - ) \ No newline at end of file + otutab_output.denoised_sequences if out_denoised else None, + ) diff --git a/q2_usearch/_unoise.py b/q2_usearch/_unoise.py index 82f4ae3..6ff6340 100644 --- a/q2_usearch/_unoise.py +++ b/q2_usearch/_unoise.py @@ -5,16 +5,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os -import tempfile -import sqlite3 - -import biom -import skbio -import pandas as pd -from qiime2 import Metadata + from q2_types.feature_data import DNAFASTAFormat -from ._utils import run_command, validate_params, USearchError +from ._utils import run_command, validate_params import shlex @@ -22,13 +15,11 @@ def unoise3( - sequences: DNAFASTAFormat, - minsize: int = 8, - unoise_alpha: float = 2.0 + sequences: DNAFASTAFormat, minsize: int = 8, unoise_alpha: float = 2.0 ) -> (DNAFASTAFormat, USEARCHTextFile): # type: ignore - + validate_params([minsize, unoise_alpha]) - + zotus_seqs = DNAFASTAFormat() tabbed_out = USEARCHTextFile() @@ -37,4 +28,4 @@ def unoise3( _cmd = f"usearch -unoise3 {sequences} -zotus {zotus_seqs} -tabbedout {tabbed_out} {_minsize} {_unoise_alpha}".strip() cmd = shlex.split(_cmd) run_command(cmd) - return zotus_seqs, tabbed_out \ No newline at end of file + return zotus_seqs, tabbed_out diff --git a/q2_usearch/_utils.py b/q2_usearch/_utils.py index d507219..39cc69d 100644 --- a/q2_usearch/_utils.py +++ b/q2_usearch/_utils.py @@ -5,16 +5,10 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os -import tempfile import subprocess import sqlite3 -import biom import skbio -import pandas as pd -from qiime2 import Metadata -from q2_types.feature_data import DNAFASTAFormat class USearchError(Exception): @@ -33,6 +27,7 @@ def validate_params(params: list): if any(param < 0 for param in params): raise ValueError("The parameter must be greater than or equal to 0.0.") + def run_command(cmd, verbose=True): """ Execute a command line command from within Python. @@ -76,6 +71,7 @@ def run_command(cmd, verbose=True): print(f"Command execution failed with error: {e}") raise + def _fasta_with_sizes(input_fasta_fp, output_fasta_fp, table): """ Add size annotations to sequences in a fasta file based on a given table. @@ -127,6 +123,7 @@ def _fasta_with_sizes(input_fasta_fp, output_fasta_fp, table): check_extra_sequence_ids=False, ) + def _error_on_nonoverlapping_ids( table_ids, sequence_ids, check_extra_table_ids=True, check_extra_sequence_ids=True ): @@ -161,7 +158,7 @@ def _error_on_nonoverlapping_ids( "in sequences. The set of features in sequences " "must be identical to the set of features in " "table. Feature ids present in table but not " - f"sequences are: {', '.join(extra_table_ids)}" + f"sequences are: {', '.join(extra_table_ids)}" ) if check_extra_sequence_ids: @@ -283,12 +280,14 @@ def _fasta_from_sqlite(conn, input_fasta_fp, output_fasta_fp): # -----------|------------------ # r1 | ACGTACGTACGTACGT # r2 | AAAAAAAAAAAAAAAA - c.execute("""SELECT fcm.cluster_id, rs.sequence_string, MAX(fcm.count) + c.execute( + """SELECT fcm.cluster_id, rs.sequence_string, MAX(fcm.count) FROM sorted_feature_cluster_map fcm INNER JOIN rep_seqs rs ON rs.feature_id = fcm.feature_id GROUP BY fcm.cluster_id ORDER BY fcm.cluster_id ASC; - """) + """ + ) with open(output_fasta_fp, "w") as output_seqs: while True: partial_results = c.fetchmany(size=100) @@ -297,4 +296,4 @@ def _fasta_from_sqlite(conn, input_fasta_fp, output_fasta_fp): [f">{i}\n{s}\n" for (i, s, _) in partial_results] ) else: - break \ No newline at end of file + break diff --git a/q2_usearch/_version.py b/q2_usearch/_version.py index 36ea012..f951cfa 100644 --- a/q2_usearch/_version.py +++ b/q2_usearch/_version.py @@ -68,12 +68,14 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate @@ -100,10 +102,14 @@ def run_command( try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, + ) break except OSError as e: if e.errno == errno.ENOENT: @@ -141,15 +147,21 @@ def versions_from_parentdir( for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -212,7 +224,7 @@ def git_versions_from_keywords( # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -221,7 +233,7 @@ def git_versions_from_keywords( # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -229,32 +241,36 @@ def git_versions_from_keywords( for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command + tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. @@ -273,8 +289,7 @@ def git_pieces_from_vcs( env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -282,10 +297,19 @@ def git_pieces_from_vcs( # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -300,8 +324,7 @@ def git_pieces_from_vcs( pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -341,17 +364,16 @@ def git_pieces_from_vcs( dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -360,10 +382,12 @@ def git_pieces_from_vcs( if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -412,8 +436,7 @@ def render_pep440(pieces: Dict[str, Any]) -> str: rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -442,8 +465,7 @@ def render_pep440_branch(pieces: Dict[str, Any]) -> str: rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -604,11 +626,13 @@ def render_git_describe_long(pieces: Dict[str, Any]) -> str: def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -632,9 +656,13 @@ def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions() -> Dict[str, Any]: @@ -648,8 +676,7 @@ def get_versions() -> Dict[str, Any]: verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -658,13 +685,16 @@ def get_versions() -> Dict[str, Any]: # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -678,6 +708,10 @@ def get_versions() -> Dict[str, Any]: except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/q2_usearch/plugin_setup.py b/q2_usearch/plugin_setup.py index 646be15..1dfaa4e 100644 --- a/q2_usearch/plugin_setup.py +++ b/q2_usearch/plugin_setup.py @@ -8,7 +8,7 @@ # ---------------------------------------------------------------------------- import qiime2.plugin -from qiime2.plugin import Citations, Plugin, Metadata, SemanticType +from qiime2.plugin import Citations, Plugin, SemanticType from q2_usearch import __version__ import q2_usearch._fastqx import q2_usearch._cluster @@ -261,4 +261,4 @@ }, name="Example microbiome analysis pipeline", description="A pipeline that follows recommended steps for microbiome analysis using USEARCH\n👉 https://drive5.com/usearch/manual/uparse_pipeline.html.\nfastx_uniques ➡️ cluster_otus ➡️ unoise3 ➡️ otutab", -) \ No newline at end of file +) diff --git a/q2_usearch/tests/test_fastx_truncate.py b/q2_usearch/tests/test_fastx_truncate.py index 39024ff..772e008 100644 --- a/q2_usearch/tests/test_fastx_truncate.py +++ b/q2_usearch/tests/test_fastx_truncate.py @@ -1,9 +1,6 @@ import os import pandas as pd import unittest -import tempfile -import subprocess -import filecmp from unittest.mock import patch from qiime2.plugin.testing import TestPluginBase from qiime2 import Artifact diff --git a/q2_usearch/tests/test_unoise.py b/q2_usearch/tests/test_unoise.py index b5bcc25..1ad0525 100644 --- a/q2_usearch/tests/test_unoise.py +++ b/q2_usearch/tests/test_unoise.py @@ -1,6 +1,5 @@ from qiime2.plugin.testing import TestPluginBase from unittest.mock import patch -import os from qiime2 import Artifact from q2_types.feature_data import DNAFASTAFormat, FeatureData, Sequence from q2_usearch._unoise import unoise3