Skip to content

Commit

Permalink
ruff & black
Browse files Browse the repository at this point in the history
  • Loading branch information
thanhleviet committed Jul 2, 2024
1 parent ab4dc63 commit 7341946
Show file tree
Hide file tree
Showing 14 changed files with 271 additions and 231 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ lint:
flake8

test: all
py.test
py.test -vvv

install: all
pip install .
Expand Down
3 changes: 2 additions & 1 deletion q2_usearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
del get_versions

from . import _version
__version__ = _version.get_versions()['version']

__version__ = _version.get_versions()["version"]
149 changes: 89 additions & 60 deletions q2_usearch/_chimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,71 +12,87 @@
from q2_types.feature_data import DNAFASTAFormat

from ._utils import run_command, _fasta_with_sizes

# from ._cluster_features import _fasta_with_sizes, run_command
# from ._format import UchimeStatsFmt


_uchime_defaults = {'dn': 1.4,
'mindiffs': 3,
'mindiv': 0.8,
'minh': 0.28,
'xn': 8.0}


def uchime_ref(sequences: DNAFASTAFormat,
table: biom.Table,
reference_sequences: DNAFASTAFormat,
dn: float = _uchime_defaults['dn'],
mindiffs: int = _uchime_defaults['mindiffs'],
mindiv: float = _uchime_defaults['mindiv'],
minh: float = _uchime_defaults['minh'],
xn: float = _uchime_defaults['xn'],
threads: int = 1) \
-> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
cmd, chimeras, nonchimeras, uchime_stats = \
_uchime_ref(sequences, table, reference_sequences, dn, mindiffs,
mindiv, minh, xn, threads)
_uchime_defaults = {"dn": 1.4, "mindiffs": 3, "mindiv": 0.8, "minh": 0.28, "xn": 8.0}


def uchime_ref(
sequences: DNAFASTAFormat,
table: biom.Table,
reference_sequences: DNAFASTAFormat,
dn: float = _uchime_defaults["dn"],
mindiffs: int = _uchime_defaults["mindiffs"],
mindiv: float = _uchime_defaults["mindiv"],
minh: float = _uchime_defaults["minh"],
xn: float = _uchime_defaults["xn"],
threads: int = 1,
) -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
cmd, chimeras, nonchimeras, uchime_stats = _uchime_ref(
sequences, table, reference_sequences, dn, mindiffs, mindiv, minh, xn, threads
)
return chimeras, nonchimeras, uchime_stats


def _uchime_ref(sequences, table, reference_sequences, dn, mindiffs,
mindiv, minh, xn, threads):
def _uchime_ref(
sequences, table, reference_sequences, dn, mindiffs, mindiv, minh, xn, threads
):
# this function only exists to simplify testing
chimeras = DNAFASTAFormat()
nonchimeras = DNAFASTAFormat()
uchime_stats = UchimeStatsFmt()
with tempfile.NamedTemporaryFile() as fasta_with_sizes:
_fasta_with_sizes(str(sequences), fasta_with_sizes.name, table)
cmd = ['vsearch',
'--uchime_ref', fasta_with_sizes.name,
'--uchimeout', str(uchime_stats),
'--nonchimeras', str(nonchimeras),
'--chimeras', str(chimeras),
'--dn', str(dn),
'--mindiffs', str(mindiffs),
'--mindiv', str(mindiv),
'--minh', str(minh),
'--xn', str(xn),
'--db', str(reference_sequences),
'--qmask', 'none', # ensures no lowercase DNA chars
'--xsize',
'--threads', str(threads),
'--fasta_width', '0']
cmd = [
"vsearch",
"--uchime_ref",
fasta_with_sizes.name,
"--uchimeout",
str(uchime_stats),
"--nonchimeras",
str(nonchimeras),
"--chimeras",
str(chimeras),
"--dn",
str(dn),
"--mindiffs",
str(mindiffs),
"--mindiv",
str(mindiv),
"--minh",
str(minh),
"--xn",
str(xn),
"--db",
str(reference_sequences),
"--qmask",
"none", # ensures no lowercase DNA chars
"--xsize",
"--threads",
str(threads),
"--fasta_width",
"0",
]
run_command(cmd)

return cmd, chimeras, nonchimeras, uchime_stats


def uchime3_denovo(sequences: DNAFASTAFormat,
table: biom.Table,
dn: float = _uchime_defaults['dn'],
mindiffs: int = _uchime_defaults['mindiffs'],
mindiv: float = _uchime_defaults['mindiv'],
minh: float = _uchime_defaults['minh'],
xn: float = _uchime_defaults['xn']) \
-> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
cmd, chimeras, nonchimeras, uchime_stats = \
_uchime_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn)
def uchime3_denovo(
sequences: DNAFASTAFormat,
table: biom.Table,
dn: float = _uchime_defaults["dn"],
mindiffs: int = _uchime_defaults["mindiffs"],
mindiv: float = _uchime_defaults["mindiv"],
minh: float = _uchime_defaults["minh"],
xn: float = _uchime_defaults["xn"],
) -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
cmd, chimeras, nonchimeras, uchime_stats = _uchime_denovo(
sequences, table, dn, mindiffs, mindiv, minh, xn
)
return chimeras, nonchimeras, uchime_stats


Expand All @@ -87,19 +103,32 @@ def _uchime3_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn):
uchime_stats = UchimeStatsFmt()
with tempfile.NamedTemporaryFile() as fasta_with_sizes:
_fasta_with_sizes(str(sequences), fasta_with_sizes.name, table)
cmd = ['usearch',
'-uchime3_denovo', fasta_with_sizes.name,
'-uchimeout', str(uchime_stats),
'--nonchimeras', str(nonchimeras),
'--chimeras', str(chimeras),
'--dn', str(dn),
'--mindiffs', str(mindiffs),
'--mindiv', str(mindiv),
'--minh', str(minh),
'--xn', str(xn),
'--qmask', 'none', # ensures no lowercase DNA chars
'--xsize',
'--fasta_width', '0']
cmd = [
"usearch",
"-uchime3_denovo",
fasta_with_sizes.name,
"-uchimeout",
str(uchime_stats),
"--nonchimeras",
str(nonchimeras),
"--chimeras",
str(chimeras),
"--dn",
str(dn),
"--mindiffs",
str(mindiffs),
"--mindiv",
str(mindiv),
"--minh",
str(minh),
"--xn",
str(xn),
"--qmask",
"none", # ensures no lowercase DNA chars
"--xsize",
"--fasta_width",
"0",
]
run_command(cmd)

return cmd, chimeras, nonchimeras, uchime_stats
21 changes: 9 additions & 12 deletions q2_usearch/_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,14 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import os
import tempfile
import sqlite3

import biom
import skbio
import pandas as pd
from qiime2 import Metadata

from q2_types.feature_data import DNAFASTAFormat
from ._utils import run_command, validate_params, USearchError
from ._utils import run_command, validate_params
import shlex

from ._format import USEARCHTextFile


def cluster_otus(
sequences: DNAFASTAFormat,
minsize: int = 2,
Expand All @@ -29,21 +23,24 @@ def cluster_otus(
) -> (DNAFASTAFormat, USEARCHTextFile): # type: ignore
otus_seqs = DNAFASTAFormat()
uparse_out = USEARCHTextFile()

validate_params([minsize, threads])

_relabel = f"-relabel {relabel}"
_minsize = f"-minsize {minsize}"
_cmd = f"usearch -cluster_otus {sequences} -otus {otus_seqs} -uparseout {uparse_out} {_relabel} {_minsize} -threads {threads}".strip()
cmd = shlex.split(_cmd)
run_command(cmd)
return otus_seqs, uparse_out


def cluster_fast():
pass


def cluster_smallmem():
pass


def cluster_mt():
pass
pass
Loading

0 comments on commit 7341946

Please sign in to comment.