From a5915229e27876576cc982f75d39dc838a87b470 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Mon, 28 Oct 2024 18:27:23 +1100 Subject: [PATCH] init new structure --- pyproject.toml | 2 +- src/tirmite/modules/build.py | 48 +++++++++++++++ src/tirmite/modules/classify.py | 50 ++++++++++++++++ src/tirmite/modules/compare.py | 7 +++ src/tirmite/modules/extract.py | 48 +++++++++++++++ src/tirmite/modules/find.py | 49 +++++++++++++++ src/tirmite/modules/merge.py | 44 ++++++++++++++ src/tirmite/modules/pair.py | 46 ++++++++++++++ src/tirmite/modules/report.py | 9 +++ src/tirmite/redirect.py | 102 ++++++++++++++++++++++++++++++++ 10 files changed, 404 insertions(+), 1 deletion(-) create mode 100644 src/tirmite/modules/build.py create mode 100644 src/tirmite/modules/classify.py create mode 100644 src/tirmite/modules/compare.py create mode 100644 src/tirmite/modules/extract.py create mode 100644 src/tirmite/modules/find.py create mode 100644 src/tirmite/modules/merge.py create mode 100644 src/tirmite/modules/pair.py create mode 100644 src/tirmite/modules/report.py diff --git a/pyproject.toml b/pyproject.toml index 6686da8..d496125 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,10 +33,10 @@ documentation = "https://github.com/adamtaranto/tirmite" repository = "https://github.com/adamtaranto/tirmite" [project.scripts] -#my-script = "package.module:function" tirmite = "tirmite.cmd_tirmite:main" tsplit-TIR = "tirmite.cmd_TIR:main" + # ... other project metadata fields as specified in: # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ # https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html diff --git a/src/tirmite/modules/build.py b/src/tirmite/modules/build.py new file mode 100644 index 0000000..4fab634 --- /dev/null +++ b/src/tirmite/modules/build.py @@ -0,0 +1,48 @@ +""" + +timite build + +Cluster, align TIRs and build HMM + + +Input: +- unaligned TIR sequences, or +- TIR alignment + +Output: +- TIR-HMM +- Report alignment to screen +- Report variation +- Report TIR stats: length, variation, identity, duplicates +- If high diversity suggest making sub-models + +""" + +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Build TIR-HMM from a collection of TIRsequences", + prog="tirmite build", + ) + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + args = parser.parse_args() + return args + + +def main(): + args = mainArgs() + logging.info("Running analysis") + + +""" + +""" diff --git a/src/tirmite/modules/classify.py b/src/tirmite/modules/classify.py new file mode 100644 index 0000000..26103ee --- /dev/null +++ b/src/tirmite/modules/classify.py @@ -0,0 +1,50 @@ +""" +tirmite classify + +cluster complete elements +search for known transposase domains +update gff with cluster labels + + +Input: +- Element json +- or element fasta +- Clustering identity threshold +- Domain database + +Output: +- Element JSON with cluster labels +- GFF with cluster labels + +""" + +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description="Cluster elements on identity. Search for TPase domains. Apply Wicker classification.", + prog="tirmite classify", + ) + + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + args = parser.parse_args() + return args + + +def main(): + args = mainArgs() + logging.info("Running analysis") + + +""" + +""" diff --git a/src/tirmite/modules/compare.py b/src/tirmite/modules/compare.py new file mode 100644 index 0000000..9a99a2b --- /dev/null +++ b/src/tirmite/modules/compare.py @@ -0,0 +1,7 @@ +""" + +tirmite compare + +report unique hits in each set + +""" diff --git a/src/tirmite/modules/extract.py b/src/tirmite/modules/extract.py new file mode 100644 index 0000000..ed9161a --- /dev/null +++ b/src/tirmite/modules/extract.py @@ -0,0 +1,48 @@ +""" +tirmite extract + +Extract TIRs from list of elements + + +Input: +- TE sequence + +Output: +- TIR sequences as fasta +- Print sequences to screen +- Print as proportion of input sequence +- Report if TIRs are too short for meaningful HMM hits + + +""" +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description=" Identify and extract TIR sequences from one or more candidate transposon sequences.", + prog="tirmite extract", + ) + + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + + args = parser.parse_args() + return args + + +def main(): + args = mainArgs() + logging.info("Running analysis") + + +""" + +""" diff --git a/src/tirmite/modules/find.py b/src/tirmite/modules/find.py new file mode 100644 index 0000000..2c9d9cc --- /dev/null +++ b/src/tirmite/modules/find.py @@ -0,0 +1,49 @@ +""" +tirmite find + +Query genome for HMM hits + + +Input: +- TIR HMM +- Path to genome +- Filtering options +- Genome index + +Output: +- Bedfile of hits coords with quality + strand +- Optional collection of unique hit sequences +- Optional add flanks to extraction + +""" + +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description="Find TIR model matches in a genome.", + prog="tirmite find", + ) + + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + args = parser.parse_args() + return args + + +def main(): + args = mainArgs() + logging.info("Running analysis") + + +""" + +""" diff --git a/src/tirmite/modules/merge.py b/src/tirmite/modules/merge.py new file mode 100644 index 0000000..971a46c --- /dev/null +++ b/src/tirmite/modules/merge.py @@ -0,0 +1,44 @@ +""" +tirmite merge + +merge two sets of hits + + +Input: +- Multiple hit bedfiles (BLAST, HMM, other) + +Output: +- Bedfile of merged hits + +""" + +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description="Merge TIR features from diff runs of search methods.", + prog="tirmite merge", + ) + + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + args = parser.parse_args() + return args + + +def main(): + args = mainArgs() + logging.info("Running analysis") + + +""" + +""" diff --git a/src/tirmite/modules/pair.py b/src/tirmite/modules/pair.py new file mode 100644 index 0000000..66f8e9c --- /dev/null +++ b/src/tirmite/modules/pair.py @@ -0,0 +1,46 @@ +""" +tirmite pair + +Take list of hits (HMM or BLAST) and perform pairing. + + +Input: +- TIR hit file (BED) +- Genome path +- Index path + +Output: +- GFF annotation +- Element JSON file +- TIRs of Paired elements only +- FASTA of elements + +""" + +import argparse +import logging + + +def mainArgs(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description="Run pairing procedure on TIR hits", + prog="tirmite pair", + ) + + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level.", + ) + args = parser.parse_args() + return args + + +logging.info("Running analysis") + +""" + +""" diff --git a/src/tirmite/modules/report.py b/src/tirmite/modules/report.py new file mode 100644 index 0000000..5c01dbd --- /dev/null +++ b/src/tirmite/modules/report.py @@ -0,0 +1,9 @@ +""" +tirmite report + +Output gffs for elements and TIRs, +output fasta, +report length distribution, +stats on found elements + +""" diff --git a/src/tirmite/redirect.py b/src/tirmite/redirect.py index 8cbec5d..151474d 100644 --- a/src/tirmite/redirect.py +++ b/src/tirmite/redirect.py @@ -3,3 +3,105 @@ # Call module and pass args # https://stackoverflow.com/questions/3781851/run-a-python-script-from-another-python-script-passing-in-arguments # Check that __name__ updates with redirect + +import sys + +JCVIHELP = "JCVI utility libraries {} [{}]\n".format(__version__, __copyright__) + + +class ActionDispatcher(object): + """ + This class will be invoked + a) when the base package is run via __main__, listing all MODULESs + a) when a directory is run via __main__, listing all SCRIPTs + b) when a script is run directly, listing all ACTIONs + + This is controlled through the meta variable, which is automatically + determined in get_meta(). + """ + + def __init__(self, actions): + self.actions = actions + if not actions: + actions = [(None, None)] + self.valid_actions, self.action_helps = zip(*actions) + + def get_meta(self): + args = splitall(sys.argv[0])[-3:] + args[-1] = args[-1].replace(".py", "") + if args[-2] == "jcvi": + meta = "MODULE" + elif args[-1] == "__main__": + meta = "SCRIPT" + else: + meta = "ACTION" + return meta, args + + def print_help(self): + meta, args = self.get_meta() + if meta == "MODULE": + del args[0] + args[-1] = meta + elif meta == "SCRIPT": + args[-1] = meta + else: + args[-1] += " " + meta + + help = "Usage:\n python -m {0}\n\n\n".format(".".join(args)) + help += "Available {0}s:\n".format(meta) + max_action_len = max(len(action) for action, ah in self.actions) + for action, action_help in sorted(self.actions): + action = action.rjust(max_action_len + 4) + help += ( + " | ".join((action, action_help[0].upper() + action_help[1:])) + "\n" + ) + help += "\n" + JCVIHELP + + sys.stderr.write(help) + sys.exit(1) + + def dispatch(self, globals): + from difflib import get_close_matches + + meta = "ACTION" # function is only invoked for listing ACTIONs + if len(sys.argv) == 1: + self.print_help() + + action = sys.argv[1] + + if not action in self.valid_actions: + print("[error] {0} not a valid {1}\n".format(action, meta), file=sys.stderr) + alt = get_close_matches(action, self.valid_actions) + print( + "Did you mean one of these?\n\t{0}\n".format(", ".join(alt)), + file=sys.stderr, + ) + self.print_help() + + globals[action](sys.argv[2:]) + + +def splitall(path): + allparts = [] + while True: + path, p1 = op.split(path) + if not p1: + break + allparts.append(p1) + allparts = allparts[::-1] + return allparts + + +def main(): + actions = ( + ("tandem", "identify tandem gene groups within certain distance"), + ("ortholog", "run a combined synteny and RBH pipeline to call orthologs"), + ("group", "cluster the anchors into ortho-groups"), + ("omgprepare", "prepare weights file to run Sankoff OMG algorithm"), + ("omg", "generate a series of Sankoff OMG algorithm inputs"), + ("omgparse", "parse the OMG outputs to get gene lists"), + ("enrich", "enrich OMG output by pulling genes missed by OMG"), + ("layout", "layout the gene lists"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals())