Skip to content

Commit

Permalink
init new structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Adamtaranto committed Oct 28, 2024
1 parent 995149f commit a591522
Show file tree
Hide file tree
Showing 10 changed files with 404 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ documentation = "https://github.com/adamtaranto/tirmite"
repository = "https://github.com/adamtaranto/tirmite"

[project.scripts]
#my-script = "package.module:function"
tirmite = "tirmite.cmd_tirmite:main"
tsplit-TIR = "tirmite.cmd_TIR:main"


# ... other project metadata fields as specified in:
# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/
# https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
Expand Down
48 changes: 48 additions & 0 deletions src/tirmite/modules/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
timite build
Cluster, align TIRs and build HMM
Input:
- unaligned TIR sequences, or
- TIR alignment
Output:
- TIR-HMM
- Report alignment to screen
- Report variation
- Report TIR stats: length, variation, identity, duplicates
- If high diversity suggest making sub-models
"""

import argparse
import logging


def mainArgs():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Build TIR-HMM from a collection of TIRsequences",
prog="tirmite build",
)
parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)
args = parser.parse_args()
return args


def main():
args = mainArgs()
logging.info("Running analysis")


"""
"""
50 changes: 50 additions & 0 deletions src/tirmite/modules/classify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
tirmite classify
cluster complete elements
search for known transposase domains
update gff with cluster labels
Input:
- Element json
- or element fasta
- Clustering identity threshold
- Domain database
Output:
- Element JSON with cluster labels
- GFF with cluster labels
"""

import argparse
import logging


def mainArgs():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(
description="Cluster elements on identity. Search for TPase domains. Apply Wicker classification.",
prog="tirmite classify",
)

parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)
args = parser.parse_args()
return args


def main():
args = mainArgs()
logging.info("Running analysis")


"""
"""
7 changes: 7 additions & 0 deletions src/tirmite/modules/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
tirmite compare
report unique hits in each set
"""
48 changes: 48 additions & 0 deletions src/tirmite/modules/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
tirmite extract
Extract TIRs from list of elements
Input:
- TE sequence
Output:
- TIR sequences as fasta
- Print sequences to screen
- Print as proportion of input sequence
- Report if TIRs are too short for meaningful HMM hits
"""
import argparse
import logging


def mainArgs():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(
description=" Identify and extract TIR sequences from one or more candidate transposon sequences.",
prog="tirmite extract",
)

parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)

args = parser.parse_args()
return args


def main():
args = mainArgs()
logging.info("Running analysis")


"""
"""
49 changes: 49 additions & 0 deletions src/tirmite/modules/find.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
tirmite find
Query genome for HMM hits
Input:
- TIR HMM
- Path to genome
- Filtering options
- Genome index
Output:
- Bedfile of hits coords with quality + strand
- Optional collection of unique hit sequences
- Optional add flanks to extraction
"""

import argparse
import logging


def mainArgs():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(
description="Find TIR model matches in a genome.",
prog="tirmite find",
)

parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)
args = parser.parse_args()
return args


def main():
args = mainArgs()
logging.info("Running analysis")


"""
"""
44 changes: 44 additions & 0 deletions src/tirmite/modules/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
tirmite merge
merge two sets of hits
Input:
- Multiple hit bedfiles (BLAST, HMM, other)
Output:
- Bedfile of merged hits
"""

import argparse
import logging


def mainArgs():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(
description="Merge TIR features from diff runs of search methods.",
prog="tirmite merge",
)

parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)
args = parser.parse_args()
return args


def main():
args = mainArgs()
logging.info("Running analysis")


"""
"""
46 changes: 46 additions & 0 deletions src/tirmite/modules/pair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
tirmite pair
Take list of hits (HMM or BLAST) and perform pairing.
Input:
- TIR hit file (BED)
- Genome path
- Index path
Output:
- GFF annotation
- Element JSON file
- TIRs of Paired elements only
- FASTA of elements
"""

import argparse
import logging


def mainArgs():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(
description="Run pairing procedure on TIR hits",
prog="tirmite pair",
)

parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level.",
)
args = parser.parse_args()
return args


logging.info("Running analysis")

"""
"""
9 changes: 9 additions & 0 deletions src/tirmite/modules/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
tirmite report
Output gffs for elements and TIRs,
output fasta,
report length distribution,
stats on found elements
"""
Loading

0 comments on commit a591522

Please sign in to comment.