-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1 parent
820ac09
commit 99a9e13
Showing
27 changed files
with
4,614 additions
and
2,090 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Metadata-Version: 1.1 | ||
Name: CLASHChimeras | ||
Version: 0.1b3 | ||
Summary: Python package to find chimeras in CRAC/CLASH and HITS-CLIP datasets | ||
Home-page: https://github.com/kashyapchhatbar/CLASHChimeras | ||
Author: Kashyap Chhatbar | ||
Author-email: [email protected] | ||
License: MIT | ||
Description: UNKNOWN | ||
Keywords: clash chimeras hybrids hits-clip bioinformatics | ||
Platform: UNKNOWN | ||
Classifier: Development Status :: 4 - Beta | ||
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics | ||
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps. | ||
Classifier: License :: OSI Approved :: MIT License | ||
Classifier: Environment :: Console | ||
Classifier: Intended Audience :: Science/Research | ||
Classifier: Intended Audience :: Developers | ||
Classifier: Operating System :: POSIX :: Linux | ||
Classifier: Operating System :: MacOS | ||
Classifier: Programming Language :: Python :: 3 :: Only |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
LICENSE | ||
MANIFEST.in | ||
README.rst | ||
VERSION | ||
requirements.txt | ||
setup.cfg | ||
setup.py | ||
CLASHChimeras.egg-info/PKG-INFO | ||
CLASHChimeras.egg-info/SOURCES.txt | ||
CLASHChimeras.egg-info/dependency_links.txt | ||
CLASHChimeras.egg-info/requires.txt | ||
CLASHChimeras.egg-info/top_level.txt | ||
clashchimeras/__init__.py | ||
clashchimeras/align.py | ||
clashchimeras/download.py | ||
clashchimeras/find.py | ||
clashchimeras/initialize.py | ||
clashchimeras/log.py | ||
clashchimeras/methods.py | ||
clashchimeras/parsers.py | ||
clashchimeras/runners.py | ||
scripts/align-for-chimeras | ||
scripts/download-for-chimeras | ||
scripts/find-chimeras |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
alabaster | ||
ansicolors >= 1.0.2 | ||
beautifulsoup4 >= 4.3.2 | ||
biopython >= 1.65 | ||
coloredlogs >= 1.0.1 | ||
ftputil >= 2.2 | ||
pandas >= 0.16 | ||
progress >= 1.2 | ||
pyfaidx >= 0.3.4 | ||
requests >= 2.7.0 | ||
sphinx-argparse | ||
tabulate >= 0.7.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
clashchimeras |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
import argparse | ||
import sys | ||
|
||
import clashchimeras.log | ||
from clashchimeras.initialize import Arguments | ||
from clashchimeras.parsers import SAM | ||
from clashchimeras.runners import Bowtie, Tophat | ||
|
||
|
||
class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter, | ||
argparse.RawDescriptionHelpFormatter): | ||
pass | ||
|
||
|
||
def parseArguments(): | ||
requiredArgs = getRequiredArgs() | ||
bowtieArgs = getBowtie2Args() | ||
tophatArgs = getTophatArgs() | ||
optionalArgs = getOptionalArgs() | ||
outputArgs = getOutputArgs() | ||
|
||
parser = argparse.ArgumentParser( | ||
parents=[requiredArgs, bowtieArgs, tophatArgs, | ||
outputArgs, optionalArgs], | ||
formatter_class=CustomFormatter, | ||
description='Given a fastq file, this script executes ' | ||
'bowtie2 and tophat aligners to generate alignment files ' | ||
'necessary for detecting chimeras in the reads', | ||
usage='\n %(prog)s -i input.fastq -si ' | ||
'/path/to/smallRNA_index -ti /path/to/targetRNA_index -o ' | ||
'output -r bowtie2 \n %(prog)s -i input.fastq -gi ' | ||
'/path/to/genome_index -tri /path/to/transcriptome_index ' | ||
'-o output -r tophat \n \n \n ' | ||
'To see detailed help, please run \n %(prog)s -h', | ||
add_help=True) | ||
|
||
return parser | ||
|
||
args = parser.parse_args() | ||
|
||
if args.logLevel == 'DEBUG': | ||
logger = clashchimeras.log.debug_logger('root') | ||
elif args.logLevel == 'WARNING': | ||
logger = clashchimeras.log.warning_logger('root') | ||
elif args.logLevel == 'ERROR': | ||
logger = clashchimeras.log.error_logger('root') | ||
else: | ||
logger = clashchimeras.log.info_logger('root') | ||
|
||
argCheck = Arguments(args, type='align') | ||
argCheck.validateAlign() | ||
|
||
return args | ||
|
||
|
||
def getRequiredArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
required = parser.add_argument_group('Input arguments') | ||
|
||
required.add_argument('--input', '-i', | ||
help='Input file containing reads fastq', | ||
metavar='raw reads', | ||
required=True) | ||
|
||
return parser | ||
|
||
|
||
def getBowtie2Args(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
bowtieArgs = parser.add_argument_group('Bowtie2 arguments') | ||
|
||
bowtieArgs.add_argument("--smallRNAIndex", "-si", | ||
help="""Provide the smallRNA bowtie2 index (Usually | ||
resides in ~/db/CLASHChimeras or elsewhere if you have | ||
specified in --path -pa during initialize)""") | ||
|
||
bowtieArgs.add_argument("--targetRNAIndex", "-ti", | ||
help="""Provide the targetRNA bowtie2 index (Usually | ||
resides in ~/db/CLASHChimeras or elsewhere if you have | ||
specified in --path -pa during initialize)""") | ||
|
||
return parser | ||
|
||
|
||
def getTophatArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
tophatArgs = parser.add_argument_group('Tophat arguments') | ||
|
||
tophatArgs.add_argument("--genomeIndex", "-gi", | ||
help="""Provide the genome bowtie2 index (Usually | ||
resides in ~/db/CLASHChimeras or elsewhere if you have | ||
specified in --path during initialize)""") | ||
|
||
tophatArgs.add_argument("--transcriptomeIndex", "-tri", | ||
help="""Provide the transcriptome index as specified | ||
in tophat --transcriptome-index""") | ||
|
||
return parser | ||
|
||
|
||
def getOutputArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
output = parser.add_argument_group('Output') | ||
|
||
output.add_argument("--output", "-o", | ||
help="""The output name without extension (.sam .bam will | ||
be added)""", | ||
metavar='output prefix', | ||
required=True) | ||
|
||
return parser | ||
|
||
|
||
def getOptionalArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
optional = parser.add_argument_group('Optional arguments') | ||
|
||
optional.add_argument("--run", "-r", | ||
help="Run the following aligner for raw reads", | ||
default='bowtie2', | ||
choices=['bowtie2', 'tophat']) | ||
|
||
optional.add_argument("--logLevel", "-l", | ||
help="Set logging level", | ||
default='INFO', | ||
choices=['INFO', 'DEBUG', 'WARNING', 'ERROR']) | ||
|
||
optional.add_argument("--gzip", "-gz", action="store_true", | ||
help="Whether your input file is gzipped") | ||
|
||
optional.add_argument("--bowtieExecutable", "-be", | ||
help="""Provide bowtie2 executable if it's not present | ||
in your path""") | ||
|
||
optional.add_argument("--tophatExecutable", "-te", | ||
help="""Provide Tophat executable if it's not present in | ||
your path""") | ||
|
||
optional.add_argument("--preset", "-p", default='sensitive-local', | ||
choices=['very-fast', 'fast', 'sensitive', | ||
'very-sensitive', 'very-fast-local', 'fast-local', | ||
'sensitive-local', 'very-sensitive-local'], | ||
help="Provide preset for bowtie2") | ||
|
||
optional.add_argument("--tophatPreset", "-tp", | ||
choices=['very-fast', 'fast', 'sensitive', | ||
'very-sensitive'], | ||
default='very-sensitive', | ||
help="Provide preset for Tophat") | ||
|
||
optional.add_argument("--mismatch", "-m", type=int, | ||
choices=[0, 1], default=1, | ||
help="""Number of seed mismatches as represented in | ||
bowtie2 as -N""") | ||
|
||
optional.add_argument("--reverseComplement", "-rc", | ||
action="store_true", | ||
help="""Align to reverse complement of reference as | ||
represented in bowtie2 as --norc""") | ||
|
||
optional.add_argument("--unaligned", "-un", | ||
action="store_true", | ||
help="""Whether to keep unaligned reads in the output | ||
sam file. Represented in bowtie2 as --no-unal""") | ||
|
||
optional.add_argument("--threads", "-n", default=1, | ||
help="Specify the number of threads") | ||
|
||
return parser | ||
|
||
|
||
def main(): | ||
|
||
parser = parseArguments() | ||
|
||
args = parser.parse_args() | ||
|
||
if args.logLevel == 'DEBUG': | ||
logger = clashchimeras.log.debug_logger('root') | ||
elif args.logLevel == 'WARNING': | ||
logger = clashchimeras.log.warning_logger('root') | ||
elif args.logLevel == 'ERROR': | ||
logger = clashchimeras.log.error_logger('root') | ||
else: | ||
logger = clashchimeras.log.info_logger('root') | ||
|
||
argCheck = Arguments(args) | ||
argCheck.validateAlign() | ||
|
||
if args.run == 'bowtie2': | ||
b = Bowtie(args=args) | ||
b.run(output='smallRNA') | ||
s = SAM(fileName=b.outputSam) | ||
filteredFasta = s.filterPotentialChimeras(target=b.outputSam) | ||
b.run(output='targetRNA', filtered=filteredFasta) | ||
|
||
if args.run == 'tophat': | ||
t = Tophat(args=args) | ||
t.run() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import argparse | ||
import os | ||
import textwrap | ||
|
||
import clashchimeras.log | ||
from clashchimeras.initialize import Releases, Index | ||
|
||
|
||
class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter, | ||
argparse.RawDescriptionHelpFormatter): | ||
pass | ||
|
||
|
||
def parseArguments(): | ||
requiredArgs = getRequiredArgs() | ||
optionalArgs = getOptionalArgs() | ||
outputArgs = getOutputArgs() | ||
|
||
parser = argparse.ArgumentParser( | ||
parents=[requiredArgs, outputArgs, optionalArgs], | ||
formatter_class=CustomFormatter, | ||
description=textwrap.dedent("""\ | ||
Downloads required sequences and create bowtie2 indexes | ||
required for alignment"""), | ||
usage='An example usage is: %(prog)s -gor "H.sapiens" -mor hsa', | ||
add_help=True) | ||
|
||
return parser | ||
|
||
|
||
def getRequiredArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
required = parser.add_argument_group('Required arguments') | ||
|
||
required.add_argument("--gencodeOrganism", "-gor", | ||
default="H.sapiens", choices=["H.sapiens", | ||
"M.musculus"], | ||
help="""Select model organism""", | ||
required=True) | ||
|
||
required.add_argument("--mirbaseOrganism", "-mor", | ||
default='hsa', | ||
help="""Select organism to download microRNAs for""", | ||
required=True) | ||
|
||
return parser | ||
|
||
|
||
def getOutputArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
output = parser.add_argument_group('Output') | ||
|
||
output.add_argument("--path", "-pa", | ||
help="""Location where all the database files and | ||
indexes will be downloaded""", | ||
default='~/db/CLASHChimeras', | ||
metavar='path') | ||
|
||
return parser | ||
|
||
|
||
def getOptionalArgs(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
|
||
optional = parser.add_argument_group('Optional arguments') | ||
|
||
optional.add_argument("--logLevel", "-l", | ||
help="Set logging level", | ||
default='INFO', | ||
choices=['INFO', 'DEBUG', 'WARNING', 'ERROR']) | ||
|
||
optional.add_argument("--bowtieExecutable", "-be", | ||
help="""Provide bowtie2 executable if it's not present | ||
in your path""") | ||
|
||
optional.add_argument("--tophatExecutable", "-te", | ||
help="""Provide Tophat executable if it's not present in | ||
your path""") | ||
|
||
optional.add_argument("--miRNA", "-mi", | ||
choices=['mature', 'hairpin'], default='hairpin', | ||
help="Which miRNA sequences to align") | ||
|
||
return parser | ||
|
||
|
||
def main(): | ||
parser = parseArguments() | ||
|
||
args = parser.parse_args() | ||
|
||
if args.logLevel == 'DEBUG': | ||
logger = clashchimeras.log.debug_logger('root') | ||
elif args.logLevel == 'WARNING': | ||
logger = clashchimeras.log.warning_logger('root') | ||
elif args.logLevel == 'ERROR': | ||
logger = clashchimeras.log.error_logger('root') | ||
else: | ||
logger = clashchimeras.log.info_logger('root') | ||
|
||
r = Releases(gencodeOrganism=args.gencodeOrganism, | ||
mirbaseOrganism=args.mirbaseOrganism, | ||
path=os.path.expanduser(args.path)) | ||
|
||
i = Index(root=r.gencodePath) | ||
i.create() | ||
|
||
logger.warn('Please find the indexes created listed below..') | ||
logger.warn('Use them when you run align-for-chimeras') | ||
|
||
for _i in i.created: | ||
logger.warn(_i) | ||
|
||
i = Index(root=r.mirbasePath) | ||
i.create() | ||
|
||
logger.warn('Please find the indexes created listed below..') | ||
logger.warn('Use them when you run align-for-chimeras') | ||
|
||
for _i in i.created: | ||
logger.warn(_i) | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.