diff --git a/CLASHChimeras.egg-info/PKG-INFO b/CLASHChimeras.egg-info/PKG-INFO
new file mode 100644
index 0000000..39f039d
--- /dev/null
+++ b/CLASHChimeras.egg-info/PKG-INFO
@@ -0,0 +1,21 @@
+Metadata-Version: 1.1
+Name: CLASHChimeras
+Version: 0.1b3
+Summary: Python package to find chimeras in CRAC/CLASH and HITS-CLIP datasets
+Home-page: https://github.com/kashyapchhatbar/CLASHChimeras
+Author: Kashyap Chhatbar
+Author-email: kashyap.c@ed.ac.uk
+License: MIT
+Description: UNKNOWN
+Keywords: clash chimeras hybrids hits-clip bioinformatics
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: MacOS
+Classifier: Programming Language :: Python :: 3 :: Only
diff --git a/CLASHChimeras.egg-info/SOURCES.txt b/CLASHChimeras.egg-info/SOURCES.txt
new file mode 100644
index 0000000..e23791f
--- /dev/null
+++ b/CLASHChimeras.egg-info/SOURCES.txt
@@ -0,0 +1,24 @@
+LICENSE
+MANIFEST.in
+README.rst
+VERSION
+requirements.txt
+setup.cfg
+setup.py
+CLASHChimeras.egg-info/PKG-INFO
+CLASHChimeras.egg-info/SOURCES.txt
+CLASHChimeras.egg-info/dependency_links.txt
+CLASHChimeras.egg-info/requires.txt
+CLASHChimeras.egg-info/top_level.txt
+clashchimeras/__init__.py
+clashchimeras/align.py
+clashchimeras/download.py
+clashchimeras/find.py
+clashchimeras/initialize.py
+clashchimeras/log.py
+clashchimeras/methods.py
+clashchimeras/parsers.py
+clashchimeras/runners.py
+scripts/align-for-chimeras
+scripts/download-for-chimeras
+scripts/find-chimeras
\ No newline at end of file
diff --git a/CLASHChimeras.egg-info/dependency_links.txt b/CLASHChimeras.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/CLASHChimeras.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/CLASHChimeras.egg-info/requires.txt b/CLASHChimeras.egg-info/requires.txt
new file mode 100644
index 0000000..35f2e18
--- /dev/null
+++ b/CLASHChimeras.egg-info/requires.txt
@@ -0,0 +1,12 @@
+alabaster
+ansicolors >= 1.0.2
+beautifulsoup4 >= 4.3.2
+biopython >= 1.65
+coloredlogs >= 1.0.1
+ftputil >= 2.2
+pandas >= 0.16
+progress >= 1.2
+pyfaidx >= 0.3.4
+requests >= 2.7.0
+sphinx-argparse
+tabulate >= 0.7.5
diff --git a/CLASHChimeras.egg-info/top_level.txt b/CLASHChimeras.egg-info/top_level.txt
new file mode 100644
index 0000000..7ee7188
--- /dev/null
+++ b/CLASHChimeras.egg-info/top_level.txt
@@ -0,0 +1 @@
+clashchimeras
diff --git a/build/lib/clashchimeras/__init__.py b/build/lib/clashchimeras/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/clashchimeras/align.py b/build/lib/clashchimeras/align.py
new file mode 100644
index 0000000..8f2efc9
--- /dev/null
+++ b/build/lib/clashchimeras/align.py
@@ -0,0 +1,207 @@
+import argparse
+import sys
+
+import clashchimeras.log
+from clashchimeras.initialize import Arguments
+from clashchimeras.parsers import SAM
+from clashchimeras.runners import Bowtie, Tophat
+
+
+class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
+                      argparse.RawDescriptionHelpFormatter):
+    pass
+
+
+def parseArguments():
+    requiredArgs = getRequiredArgs()
+    bowtieArgs = getBowtie2Args()
+    tophatArgs = getTophatArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
+
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, bowtieArgs, tophatArgs,
+                 outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
+        description='Given a fastq file, this script executes '
+        'bowtie2 and tophat aligners to generate alignment files '
+        'necessary for detecting chimeras in the reads',
+        usage='\n %(prog)s -i input.fastq -si '
+        '/path/to/smallRNA_index -ti /path/to/targetRNA_index -o '
+        'output -r bowtie2 \n %(prog)s -i input.fastq -gi '
+        '/path/to/genome_index -tri /path/to/transcriptome_index '
+        '-o output -r tophat \n \n \n '
+        'To see detailed help, please run \n %(prog)s -h',
+        add_help=True)
+
+    return parser
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    argCheck = Arguments(args, type='align')
+    argCheck.validateAlign()
+
+    return args
+
+
+def getRequiredArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    required = parser.add_argument_group('Input arguments')
+
+    required.add_argument('--input', '-i',
+                          help='Input file containing reads fastq',
+                          metavar='raw reads',
+                          required=True)
+
+    return parser
+
+
+def getBowtie2Args():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    bowtieArgs = parser.add_argument_group('Bowtie2 arguments')
+
+    bowtieArgs.add_argument("--smallRNAIndex", "-si",
+                            help="""Provide the smallRNA bowtie2 index (Usually
+                        resides in ~/db/CLASHChimeras or elsewhere if you have
+                        specified in --path -pa during initialize)""")
+
+    bowtieArgs.add_argument("--targetRNAIndex", "-ti",
+                            help="""Provide the targetRNA bowtie2 index (Usually
+                        resides in ~/db/CLASHChimeras or elsewhere if you have
+                        specified in --path -pa during initialize)""")
+
+    return parser
+
+
+def getTophatArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    tophatArgs = parser.add_argument_group('Tophat arguments')
+
+    tophatArgs.add_argument("--genomeIndex", "-gi",
+                            help="""Provide the genome bowtie2 index (Usually
+                        resides in ~/db/CLASHChimeras or elsewhere if you have
+                        specified in --path during initialize)""")
+
+    tophatArgs.add_argument("--transcriptomeIndex", "-tri",
+                            help="""Provide the transcriptome index as specified
+                        in tophat --transcriptome-index""")
+
+    return parser
+
+
+def getOutputArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    output = parser.add_argument_group('Output')
+
+    output.add_argument("--output", "-o",
+                        help="""The output name without extension (.sam .bam will
+                      be added)""",
+                        metavar='output prefix',
+                        required=True)
+
+    return parser
+
+
+def getOptionalArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    optional = parser.add_argument_group('Optional arguments')
+
+    optional.add_argument("--run", "-r",
+                          help="Run the following aligner for raw reads",
+                          default='bowtie2',
+                          choices=['bowtie2', 'tophat'])
+
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+
+    optional.add_argument("--gzip", "-gz", action="store_true",
+                          help="Whether your input file is gzipped")
+
+    optional.add_argument("--bowtieExecutable", "-be",
+                          help="""Provide bowtie2 executable if it's not present
+                        in your path""")
+
+    optional.add_argument("--tophatExecutable", "-te",
+                          help="""Provide Tophat executable if it's not present in
+                        your path""")
+
+    optional.add_argument("--preset", "-p", default='sensitive-local',
+                          choices=['very-fast', 'fast', 'sensitive',
+                                   'very-sensitive', 'very-fast-local', 'fast-local',
+                                   'sensitive-local', 'very-sensitive-local'],
+                          help="Provide preset for bowtie2")
+
+    optional.add_argument("--tophatPreset", "-tp",
+                          choices=['very-fast', 'fast', 'sensitive',
+                                   'very-sensitive'],
+                          default='very-sensitive',
+                          help="Provide preset for Tophat")
+
+    optional.add_argument("--mismatch", "-m", type=int,
+                          choices=[0, 1], default=1,
+                          help="""Number of seed mismatches as represented in
+                        bowtie2 as -N""")
+
+    optional.add_argument("--reverseComplement", "-rc",
+                          action="store_true",
+                          help="""Align to reverse complement of reference as
+                        represented in bowtie2 as --norc""")
+
+    optional.add_argument("--unaligned", "-un",
+                          action="store_true",
+                          help="""Whether to keep unaligned reads in the output
+                        sam file. Represented in bowtie2 as --no-unal""")
+
+    optional.add_argument("--threads", "-n", default=1,
+                          help="Specify the number of threads")
+
+    return parser
+
+
+def main():
+
+    parser = parseArguments()
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    argCheck = Arguments(args)
+    argCheck.validateAlign()
+
+    if args.run == 'bowtie2':
+        b = Bowtie(args=args)
+        b.run(output='smallRNA')
+        s = SAM(fileName=b.outputSam)
+        filteredFasta = s.filterPotentialChimeras(target=b.outputSam)
+        b.run(output='targetRNA', filtered=filteredFasta)
+
+    if args.run == 'tophat':
+        t = Tophat(args=args)
+        t.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/build/lib/clashchimeras/download.py b/build/lib/clashchimeras/download.py
new file mode 100644
index 0000000..ef767b6
--- /dev/null
+++ b/build/lib/clashchimeras/download.py
@@ -0,0 +1,126 @@
+import argparse
+import os
+import textwrap
+
+import clashchimeras.log
+from clashchimeras.initialize import Releases, Index
+
+
+class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
+                      argparse.RawDescriptionHelpFormatter):
+    pass
+
+
+def parseArguments():
+    requiredArgs = getRequiredArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
+
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
+        description=textwrap.dedent("""\
+              Downloads required sequences and create bowtie2 indexes
+              required for alignment"""),
+        usage='An example usage is: %(prog)s -gor "H.sapiens" -mor hsa',
+        add_help=True)
+
+    return parser
+
+
+def getRequiredArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    required = parser.add_argument_group('Required arguments')
+
+    required.add_argument("--gencodeOrganism", "-gor",
+                          default="H.sapiens", choices=["H.sapiens",
+                                                        "M.musculus"],
+                          help="""Select model organism""",
+                          required=True)
+
+    required.add_argument("--mirbaseOrganism", "-mor",
+                          default='hsa',
+                          help="""Select organism to download microRNAs for""",
+                          required=True)
+
+    return parser
+
+
+def getOutputArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    output = parser.add_argument_group('Output')
+
+    output.add_argument("--path", "-pa",
+                        help="""Location where all the database files and
+                      indexes will be downloaded""",
+                        default='~/db/CLASHChimeras',
+                        metavar='path')
+
+    return parser
+
+
+def getOptionalArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    optional = parser.add_argument_group('Optional arguments')
+
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+
+    optional.add_argument("--bowtieExecutable", "-be",
+                          help="""Provide bowtie2 executable if it's not present
+                        in your path""")
+
+    optional.add_argument("--tophatExecutable", "-te",
+                          help="""Provide Tophat executable if it's not present in
+                        your path""")
+
+    optional.add_argument("--miRNA", "-mi",
+                          choices=['mature', 'hairpin'], default='hairpin',
+                          help="Which miRNA sequences to align")
+
+    return parser
+
+
+def main():
+    parser = parseArguments()
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    r = Releases(gencodeOrganism=args.gencodeOrganism,
+                 mirbaseOrganism=args.mirbaseOrganism,
+                 path=os.path.expanduser(args.path))
+
+    i = Index(root=r.gencodePath)
+    i.create()
+
+    logger.warn('Please find the indexes created listed below..')
+    logger.warn('Use them when you run align-for-chimeras')
+
+    for _i in i.created:
+        logger.warn(_i)
+
+    i = Index(root=r.mirbasePath)
+    i.create()
+
+    logger.warn('Please find the indexes created listed below..')
+    logger.warn('Use them when you run align-for-chimeras')
+
+    for _i in i.created:
+        logger.warn(_i)
+
+if __name__ == "__main__":
+    main()
diff --git a/build/lib/clashchimeras/find.py b/build/lib/clashchimeras/find.py
new file mode 100644
index 0000000..025a0d3
--- /dev/null
+++ b/build/lib/clashchimeras/find.py
@@ -0,0 +1,218 @@
+import argparse
+import textwrap
+import sys
+
+from colors import yellow
+
+import clashchimeras.log
+import clashchimeras.methods
+from clashchimeras.parsers import GFF, GTF, Output, SAM
+from clashchimeras.initialize import Arguments
+
+
+class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
+                      argparse.RawDescriptionHelpFormatter):
+    pass
+
+
+def parseArguments():
+    requiredArgs = getRequiredArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
+
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
+
+        usage='An example usage is: %(prog)s -s smallRNA.sam -t '
+        'targetRNA.sam -o output',
+
+        description=textwrap.dedent('''\
+                Given two SAM files, this script tries to find chimeras that
+                are observed between a smallRNA and a targetRNA'''),
+
+        add_help=True)
+
+    return parser
+
+
+def getRequiredArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    required = parser.add_argument_group('Required arguments')
+
+    required.add_argument('--smallRNA', '-s',
+                          help='Provide smallRNA alignment SAM file',
+                          metavar='SAM file',
+                          required=True)
+    required.add_argument('--targetRNA', '-t',
+                          help='Provide targetRNA alignment SAM file',
+                          metavar='SAM file',
+                          required=True)
+
+    required.add_argument("--getGenomicLocationsSmallRNA", "-ggs",
+                          help="Do you want genomic locations for small RNA?",
+                          action="store_true")
+
+    required.add_argument("--getGenomicLocationsTargetRNA", "-ggt",
+                          help="Do you want genomic locations for target RNA?",
+                          action="store_true")
+
+    required.add_argument("--smallRNAAnnotation", "-sa",
+                          action="store",
+                          help="""Provide smallRNA annotation gtf(from Gencode)
+                        or gff3(from Mirbase). Only provide gtf from Gencode or
+                        gff3 from Mirbase. Does not support other gtf files""")
+
+    required.add_argument("--targetRNAAnnotation", "-ta",
+                          action="store",
+                          help="""Provide targetRNA annotation gtf(from Gencode).
+                        Only provide gtf from Gencode. Does not support other
+                        gtf files""")
+
+    return parser
+
+
+def getOutputArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    output = parser.add_argument_group('Output')
+
+    output.add_argument("--output", "-o",
+                        help="""The output name without extension (.bed .csv will
+                      be added)""",
+                        metavar='output prefix',
+                        required=True)
+
+    return parser
+
+
+def getOptionalArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    optional = parser.add_argument_group('Optional arguments')
+
+    optional.add_argument("--overlap", "-ov",
+                          help="""Maximum overlap to be set between two
+                        molecules when determining chimeras""",
+                          default=4)
+
+    optional.add_argument("--gap", "-ga",
+                          help="""Maximum gap (number of unaligned nucleotides) to
+                        allowed between two molecules within a chimera""",
+                          default=9)
+
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+
+    return parser
+
+
+def main():
+    """The main function to execute when you run the program as a script
+    """
+    parser = parseArguments()
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    argCheck = Arguments(args, type='find')
+    argCheck.validateFind()
+
+    smallRNA = SAM(fileName=args.smallRNA)
+    smallRNA.read()
+
+    targetRNA = SAM(fileName=args.targetRNA)
+    targetRNA.read()
+
+    if args.getGenomicLocationsSmallRNA:
+
+        if args.smallRNAAnnotation.endswith('gff3'):
+
+            smallRNAAnnotation = GFF(fileName=args.smallRNAAnnotation)
+            smallRNAAnnotation.read()
+
+        elif args.smallRNAAnnotation.endswith('gtf'):
+            smallRNAAnnotation = GTF(fileName=args.smallRNAAnnotation)
+            if 'tRNAs' in args.smallRNAAnnotation:
+                smallRNAAnnotation.read(featureType='tRNAscan')
+            else:
+                smallRNAAnnotation.read()
+
+    if args.getGenomicLocationsTargetRNA:
+        if args.targetRNAAnnotation.endswith('gtf'):
+            targetRNAAnnotation = GTF(fileName=args.targetRNAAnnotation)
+            targetRNAAnnotation.read()
+
+    output = Output(target=args.output,
+                    smallRNABed=args.getGenomicLocationsSmallRNA,
+                    targetRNABed=args.getGenomicLocationsTargetRNA,
+                    overlap=args.overlap,
+                    gap=args.gap)
+
+    common = set(smallRNA.records.keys()).intersection(targetRNA.records.keys())
+
+    logger.info('Determining chimeras')
+
+    for index, queryName in enumerate(common, start=1):
+        smallRNARecord = smallRNA.access(queryName)
+        targetRNARecord = targetRNA.access(queryName)
+
+        chimeraString = clashchimeras.methods.chimeraOrNot(
+            smallRNARecord.cigarString, targetRNARecord.cigarString,
+            overlap=args.overlap, gap=args.gap)
+
+        if chimeraString:
+
+            if args.getGenomicLocationsSmallRNA:
+
+                try:
+                    smallRNALocation = smallRNAAnnotation.coordinates(
+                        smallRNARecord.refId,
+                        start=smallRNARecord.start,
+                        end=smallRNARecord.end)
+                except IndexError as e:
+                    logger.error(e)
+                    logger.error('Are you sure you have the correct annotation file for '
+                                 'smallRNA?')
+                    logger.error('Please check --smallRNAAnnotation -sa and '
+                                 'and make sure they are matching with the alignment '
+                                 'file {}'.format(args.smallRNA))
+                    sys.exit()
+                output.writeSmallRNABed(queryName, smallRNALocation,
+                                        targetRNARecord.refId)
+
+            if args.getGenomicLocationsTargetRNA:
+                try:
+                    targetRNALocation = targetRNAAnnotation.coordinates(
+                        targetRNARecord.refId,
+                        start=targetRNARecord.start,
+                        end=targetRNARecord.end)
+                except IndexError as e:
+                    logger.error(e.msg)
+                    logger.error('Are you sure you have the correct annotation file for '
+                                 'targetRNA?')
+                    logger.error('Please check --targetRNAAnnotation -ta and '
+                                 'and make sure they are matching with the alignment '
+                                 'file {}'.format(args.targetRNA))
+                    sys.exit()
+
+                output.writeTargetRNABed(queryName, targetRNALocation,
+                                         smallRNARecord.refId)
+
+            output.write(queryName, smallRNARecord, targetRNARecord)
+
+    logger.info('Determining chimeras finished')
+
+if __name__ == "__main__":
+    main()
diff --git a/build/lib/clashchimeras/initialize.py b/build/lib/clashchimeras/initialize.py
new file mode 100644
index 0000000..17e275f
--- /dev/null
+++ b/build/lib/clashchimeras/initialize.py
@@ -0,0 +1,759 @@
+import glob
+import gzip
+import hashlib
+import logging
+import os
+import re
+import subprocess
+import sys
+import textwrap
+import tempfile
+from collections import defaultdict
+
+import ftputil
+import pandas as pd
+import requests
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from colors import green
+from bs4 import BeautifulSoup
+from tabulate import tabulate
+
+from clashchimeras.parsers import Fasta
+
+logger = logging.getLogger('root')
+
+
+class Releases:
+
+    def __init__(self, gencodeOrganism='H.sapiens', mirbaseOrganism='hsa',
+                 path=os.path.join(os.path.expanduser("~"), '.CLASHchimeras')):
+        organisms = {'H.sapiens': 'http://www.gencodegenes.org/releases/',
+                     'M.musculus': 'http://www.gencodegenes.org/mouse_releases/'}
+        shortOrganisms = {'H.sapiens': 'human',
+                          'M.musculus': 'mouse'}
+        self.gencodeOrganism = gencodeOrganism
+        self.mirbaseOrganism = mirbaseOrganism
+        self.path = path
+        self.url = organisms[self.gencodeOrganism]
+        self.gencodeShortOrganism = shortOrganisms[self.gencodeOrganism]
+        self.mirbaseUrl = 'ftp://mirbase.org/pub/mirbase/CURRENT/README'
+        self.df = self.openUrl()
+        self.mirbaseDf = self.openMirbaseReadme()
+
+        self.selectGencodeRelease()
+
+        self.selectMirbaseRelease()
+
+        g = Gencode(release=self.gencodeRelease,
+                    path=self.gencodePath,
+                    ftpDir=self.gencodeFtpDir)
+
+        g.download()
+
+        m = Mirbase(release=self.mirbaseRelease,
+                    path=self.mirbasePath,
+                    ftpDir=self.mirbaseFtpDir,
+                    organism=self.mirbaseOrganism)
+
+        m.download()
+
+        m.process()
+
+    def openUrl(self):
+        r = requests.get(self.url)
+        data = r.text
+        soup = BeautifulSoup(data)
+        table = soup.find("table")
+        headings = [th.get_text().strip() for th in table.find("tr").find_all("th")]
+        dataset = defaultdict(dict)
+        for index, row in enumerate(table.find_all("tr")[1:]):
+            for i, j in zip(headings,
+                            (td.get_text().strip() for td in row.find_all("td"))):
+                dataset[index][i] = j
+
+        return pd.DataFrame(dataset).transpose()
+
+    def openMirbaseReadme(self):
+        with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
+            fobj = fH.open('pub/mirbase/CURRENT/README')
+            store = False
+            dataset = defaultdict(dict)
+            index = 0
+            for line in fobj.readlines():
+                if store:
+                    row = line.strip().split()
+                    if len(row) == 3 and row[1][0].isdigit():
+                        dataset[index]['Version'] = row[0]
+                        dataset[index]['Date'] = row[1]
+                        dataset[index]['Entries'] = row[2]
+                        index += 1
+                if 'HISTORY' in line:
+                    store = True
+
+            return pd.DataFrame(dataset).transpose()
+
+    def downloadMirbaseRelease(self, key):
+        logger.warn('Are you sure??')
+        decision = input('Y/n: ')
+        if decision.lower() == 'y' or decision == '':
+
+            self.mirbaseRelease = self.mirbaseDf.loc[int(key)]['Version']
+
+            os.makedirs(os.path.join(self.path, 'Mirbase',
+                                     str(self.mirbaseRelease)), exist_ok=True)
+
+            self.mirbasePath = os.path.join(self.path, 'Mirbase',
+                                            str(self.mirbaseRelease))
+
+            self.mirbaseFtpDir = os.path.join('pub/mirbase', self.mirbaseRelease)
+
+            return True
+
+    def downloadGencodeRelease(self, key):
+        logger.warn('Are you sure??')
+        decision = input('Y/n: ')
+        if decision.lower() == 'y' or decision == '':
+
+            self.gencodeRelease = self.df.loc[int(key)]['GENCODE release']
+
+            os.makedirs(os.path.join(self.path, 'Gencode', self.gencodeOrganism,
+                                     str(self.gencodeRelease)), exist_ok=True)
+
+            self.gencodePath = os.path.join(self.path, 'Gencode',
+                                            self.gencodeOrganism,
+                                            str(self.gencodeRelease))
+
+            self.gencodeFtpDir = os.path.join('pub/gencode/Gencode_' +
+                                              self.gencodeShortOrganism,
+                                              'release_' + self.gencodeRelease)
+
+            return True
+
+    def selectGencodeRelease(self):
+        cols = self.df.columns
+
+        logger.info("Select the release that you want \n{}".format(
+                    tabulate(self.df[[cols[2], cols[0],
+                                      cols[1], cols[3]]],
+                             headers=['Index', cols[2], cols[0], cols[1], cols[3]],
+                             tablefmt="simple")))
+
+        logger.warn('Please bare in mind that the automatic download relies on '
+                    'regex search which is known to work for Gencode release 17 '
+                    'and higher ')
+
+        releaseKeys = self.df.index
+
+        while True:
+            logger.warn('Please select which Gencode release to use (select index ' +
+                        '[{}]): '.format(min(self.df.index)))
+            key = input('Index: ')
+            if key.isdigit() and int(key) in releaseKeys:
+                logger.warn('This will download the Gencode release {} which '
+                            'corresponds to Ensembl release {} and Genome assembly '
+                            'version {} freezed on {}'.format(
+                                self.df.loc[int(key)]['GENCODE release'],
+                                self.df.loc[int(key)]['Ensembl release'],
+                                self.df.loc[int(key)]['Genome assembly version'],
+                                self.df.loc[int(key)]['Freeze date *']))
+                if self.downloadGencodeRelease(key):
+                    break
+
+            elif key == '':
+                logger.warn('This will download the latest Gencode release {} which '
+                            'corresponds to Ensembl release {} and Genome assembly '
+                            'version {} freezed on {}'.format(
+                                self.df.loc[min(self.df.index)]['GENCODE release'],
+                                self.df.loc[min(self.df.index)]['Ensembl release'],
+                                self.df.loc[min(self.df.index)]['Genome assembly '
+                                                                'version'],
+                                self.df.loc[min(self.df.index)]['Freeze date *']))
+                if self.downloadGencodeRelease(min(self.df.index)):
+                    break
+            else:
+                logger.warn('Not a valid release index:')
+
+    def selectMirbaseRelease(self):
+        cols = self.mirbaseDf.columns
+
+        logger.info("Select the Mirbase release you want \n{}".format(
+                    tabulate(self.mirbaseDf, headers=['Index', cols[0], cols[1],
+                                                      cols[2]], tablefmt="simple")))
+
+        releaseKeys = self.mirbaseDf.index
+
+        while True:
+            logger.warn('Please select which Mirbase release to use (select index ' +
+                        '[{}]): '.format(max(self.mirbaseDf.index)))
+            key = input('Index: ')
+            if key.isdigit() and int(key) in releaseKeys:
+                logger.warn('This will download miRBase release {} which contains {} '
+                            'entries and released on {}'.format(
+                                self.mirbaseDf.loc[int(key)]['Version'],
+                                self.mirbaseDf.loc[int(key)]['Entries'],
+                                self.mirbaseDf.loc[int(key)]['Date']))
+                if self.downloadMirbaseRelease(key):
+                    break
+
+            elif key == '':
+                logger.warn('This will download miRBase release {} which contains {} '
+                            'entries and released on {}'.format(
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Version'],
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Entries'],
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Date']))
+                if self.downloadMirbaseRelease(max(self.mirbaseDf.index)):
+                    break
+
+            else:
+                logger.warn('Not a valid release index:')
+
+
+class Arguments:
+
+    def __init__(self, args, type=None):
+        self.args = args
+        if type == 'align':
+            if args.genomeIndex:
+                self.args.genomeIndex = os.path.expanduser(args.genomeIndex)
+            if args.transcriptomeIndex:
+                self.args.transcriptomeIndex = os.path.expanduser(
+                    args.transcriptomeIndex)
+            if args.input:
+                self.args.input = os.path.expanduser(args.input)
+            if args.smallRNAIndex:
+                self.args.smallRNAIndex = os.path.expanduser(args.smallRNAIndex)
+            if self.args.targetRNAIndex:
+                self.args.targetRNAIndex = os.path.expanduser(args.targetRNAIndex)
+
+        if args.output:
+            self.args.output = os.path.expanduser(args.output)
+
+        if type == 'find':
+            if args.smallRNAAnnotation:
+                self.args.smallRNAAnnotation = os.path.expanduser(
+                    args.smallRNAAnnotation)
+            if args.targetRNAAnnotation:
+                self.args.targetRNAAnnotation = os.path.expanduser(
+                    args.targetRNAAnnotation)
+            if args.smallRNA:
+                self.args.smallRNA = os.path.expanduser(args.smallRNA)
+            if args.targetRNA:
+                self.args.targetRNA = os.path.expanduser(args.targetRNA)
+
+        self.exit = False
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def validateFind(self):
+        if not os.path.exists(self.args.smallRNA):
+            logger.error('{} not found...'.format(self.args.smallRNA))
+            self.exit = True
+        if not os.path.exists(self.args.targetRNA):
+            logger.error('{} not found...'.format(self.args.smallRNA))
+            self.exit = True
+        if self.args.getGenomicLocationsSmallRNA:
+            if not self.args.smallRNAAnnotation:
+                logger.error('--smallRNAAnnotation -sa not specified. If you want '
+                             'genomic locations for smallRNA, please enable '
+                             '--getGenomicLocationsSmallRNA -ggs and specify '
+                             'annotation file using --smallRNAAnnotation -sa')
+                self.exit = True
+            else:
+                if not os.path.exists(self.args.smallRNAAnnotation):
+                    logger.error('{} not found'.format(self.args.smallRNAAnnotation))
+                    self.exit = True
+        if self.args.getGenomicLocationsTargetRNA:
+            if not self.args.targetRNAAnnotation:
+                logger.error('--targetRNAAnnotation -ta not specified. If you want '
+                             'genomic locations for targetRNA, please enable '
+                             '--getGenomicLocationsTargetRNA -ggt and specify '
+                             'annotation file using --targetRNAAnnotation -ta')
+                self.exit = True
+            else:
+                if not os.path.exists(self.args.targetRNAAnnotation):
+                    logger.error('{} not found...'.format(self.args.targetRNAAnnotation))
+                    self.exit = True
+        if self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
+            logger.error('CLASHChimeras does not detect chimeras between the same '
+                         'RNA type yet... Please hang in there, we are planning it '
+                         'in the feature')
+            self.exit = True
+
+        if self.exit:
+            logger.error('Exiting because of the above errors...')
+            sys.exit()
+        elif self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
+            logger.error('CLASHChimeras does not detect chimeras between the same '
+                         'RNA type yet... Please hang in there, we are planning it '
+                         'in the feature')
+            sys.exit()
+
+    def validateAlign(self):
+        if not os.path.exists(self.args.input):
+            logger.error('{} not found...'.format(self.args.input))
+            self.exit = True
+        if self.args.run == 'bowtie2' and (not self.args.smallRNAIndex or
+                                           not self.args.targetRNAIndex):
+            logger.error('{}'.format('Please specify --smallRNAIndex -si and ' +
+                                     '--targetRNAIndex -ti properly...'))
+            self.exit = True
+        if self.args.smallRNAIndex:
+            bts = glob.glob(self.args.smallRNAIndex + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.smallRNAIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.targetRNAIndex:
+            bts = glob.glob(self.args.targetRNAIndex + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.targetRNAIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.genomeIndex:
+            bts = glob.glob(self.args.genomeIndex.strip() + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.genomeIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.run == 'tophat' and not self.args.genomeIndex:
+            logger.error('Please provide genome index if you want to use tophat..')
+            self.exit = True
+        if self.exit:
+            sys.exit()
+
+
+class Gencode:
+    """This class makes sure the required files (sequences, annotation etc.) are
+    present and in working order. It generates sha256 checksums and autodownloads
+    the required files from Gencode Genes.
+    """
+
+    def __init__(self, release=None, user="anonymous", password=None,
+                 path=None, ftpDir=None):
+
+        self.downloaded = []
+        self.release = release
+        self.reList = ['^GRC.+\.genome\.fa',
+                       '^gencode.+chr_patch_hapl_scaff\.annotation\.gtf',
+                       '^gencode.+pc_transcripts\.fa',
+                       '^gencode.+tRNAs\.gtf',
+                       '^gencode.+lncRNA_transcripts\.fa']
+
+        self.host = "ftp.sanger.ac.uk"
+        self.user = user
+        self.path = path
+
+        self.files = []
+        self.otherFiles = {}
+        biotypes = ['snRNA', 'snoRNA', 'misc_RNA', 'tRNA']
+        for b in biotypes:
+            self.otherFiles[b] = 'gencode.v{}.{}_transcripts.fa'.format(self.release,
+                                                                        b)
+        if self.user == "anonymous":
+            self.password = "anonymous"
+        else:
+            self.password = password
+
+        ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
+
+        self.dir = ftpDir
+        fileList = ftp_host.listdir(self.dir)
+        for file in fileList:
+            for rEx in self.reList:
+                if re.match(rEx, file):
+                    if not 'primary_assembly' in file:
+                        self.files.append(file)
+        ftp_host.close()
+        _files = self.files.copy()
+        _otherFiles = self.otherFiles.copy()
+
+        for file in _files:
+            logger.debug('Checking %s' % file)
+            _file = file.rpartition(".")[0]
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % _file)
+                    self.files.remove(file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % _file)
+                    logger.error('Will be downloaded again')
+            else:
+                logger.warn('%s will be downloaded' % _file)
+
+        for biotype, file in _otherFiles.items():
+            logger.debug('Checking %s' % file)
+            _file = file
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                    self.otherFiles.pop(biotype)
+                else:
+                    logger.warn('%s is generated but doesnt match the sha256sum' % file)
+                    logger.error('Will be generated again')
+            else:
+                logger.warn('%s will be generated' % file)
+
+    def download(self):
+
+        if len(self.files) == 0:
+            logger.info('Gencode files are downloaded and checksum verified...')
+        else:
+            with ftputil.FTPHost(self.host, self.user, self.password) as fH:
+                for file in self.files:
+                    logger.info('Downloading %s from ftp.sanger.ac.uk' % file)
+                    fH.download(self.dir + '/' + file, os.path.join(self.path, file),
+                                callback=None)
+                    self.downloaded.append(file)
+                    _file = file.rpartition(".")[0]
+                    p = subprocess.Popen(['gunzip', os.path.join(self.path, file)])
+                    p.communicate()
+                    sha256sum = self.hash(os.path.join(self.path, _file))
+                    with open(os.path.join(self.path, _file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Downloading, extraction and hashing of %s finished' %
+                                file)
+
+        gtfFiles = glob.glob(os.path.join(self.path, "*.annotation.gtf"))
+
+        if len(gtfFiles) == 0:
+            logger.warn('This release does not contain annotation file or they are '
+                        'not grabbed by regex. Please download it manually from {'
+                        '}'.format(self.dir))
+            gtfFile = None
+        elif len(gtfFiles) == 2:
+            gtfFiles.sort()
+            gtfFile = gtfFiles[1]
+        elif len(gtfFiles) == 1:
+            gtfFile = gtfFiles[0]
+
+        genomeFiles = glob.glob(os.path.join(self.path, "*.genome.fa"))
+
+        if len(genomeFiles) == 0:
+            logger.warn('This release does not contain genome file or they are not '
+                        'grabbed by regex. Please download it manually from {'
+                        '}'.format(self.dir))
+            genomeFile = None
+        else:
+            genomeFile = genomeFiles[0]
+
+        tRNAgtfFiles = glob.glob(os.path.join(self.path, "*.tRNAs.gtf"))
+
+        if len(tRNAgtfFiles) == 0:
+            logger.warn(('This release does not contain tRNA annotation file or they '
+                         'are not grabbed by regex. Please download it manually from {'
+                         '}'.format(self.dir)))
+            tRNAgtfFile = None
+        else:
+            tRNAgtfFile = tRNAgtfFiles[0]
+
+        if len(self.otherFiles) == 0:
+            logger.info('Other fasta files are generated and checksum verified...')
+        else:
+            for biotype, file in self.otherFiles.items():
+                if biotype == 'tRNA' and (tRNAgtfFile and genomeFile):
+                    fasta = Fasta(genome=genomeFile, gtf=tRNAgtfFile)
+                    fasta.getBiotype(biotype='tRNA', output=os.path.join(self.path,
+                                                                         file))
+                    sha256sum = self.hash(os.path.join(self.path, file))
+                    with open(os.path.join(self.path, file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Extraction and hashing of %s finished' % file)
+                elif gtfFile and genomeFile:
+                    fasta = Fasta(genome=genomeFile, gtf=gtfFile)
+                    fasta.getBiotype(biotype=biotype, output=os.path.join(self.path,
+                                                                          file))
+                    sha256sum = self.hash(os.path.join(self.path, file))
+                    with open(os.path.join(self.path, file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Extraction and hashing of %s finished' % file)
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+
+class Mirbase:
+    """This class makes sure that requried files (sequences, annotations etc.)
+    from Mirbase are downloaded. It generates sha256 checksums and autodownloads
+    the required files
+    """
+
+    def __init__(self, release=None, user="anonymous", password=None,
+                 path=os.path.join(os.path.expanduser("~"), '.CLASHchimeras'),
+                 organism='hsa',
+                 ftpDir=None):
+        self.reList = ['^hairpin\.fa\.gz', '^mature\.fa\.gz']
+        organisms = {}
+        with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
+            tH, tP = tempfile.mkstemp()
+            fH.download(os.path.join('pub/mirbase', release, 'organisms.txt.gz'), tP)
+            for i in gzip.open(tP):
+                ii = i.decode('utf-8').strip()
+                if not ii.startswith('#'):
+                    organisms[ii.split("\t")[0]] = ii.split("\t")[2]
+
+        self.shortOrganism = organism
+        self.organism = organisms.get(self.shortOrganism, None)
+        if not self.organism:
+            logger.error('{} is not a valid organism name in Mirbase'.format(
+                self.shortOrganism))
+            sys.exit()
+        self.host = 'mirbase.org'
+        self.release = release
+        self.user = user
+        self.path = path
+        self.files = []
+
+        if self.user == "anonymous":
+            self.password = "anonymous"
+        else:
+            self.password = password
+
+        ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
+
+        self.dir = ftpDir
+
+        fileList = ftp_host.listdir(self.dir)
+
+        self.files.append('genomes/' + self.shortOrganism + '.gff3')
+        for file in fileList:
+            for rEx in self.reList:
+                if re.match(rEx, file):
+                    self.files.append(file)
+        ftp_host.close()
+        _files = self.files.copy()
+
+        for file in _files:
+            logger.debug('Checking %s' % file)
+            if file.endswith('gz'):
+                _file = file.rpartition(".")[0]
+            elif '/' in file:
+                _file = file.rpartition("/")[2]
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                    self.files.remove(file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
+                    logger.error('Must run download method')
+            else:
+                logger.warn('%s will be downloaded when you run download method' % file)
+
+    def download(self):
+
+        if len(self.files) == 0:
+            logger.debug('Mirbase files are downloaded and checksum verified...')
+        else:
+            with ftputil.FTPHost(self.host, self.user, self.password) as fH:
+                for file in self.files:
+                    logger.info('Downloading %s from mirbase.org' % file)
+                    if "/" in file:
+                        _file = file.rpartition("/")[2]
+                    else:
+                        _file = file
+                    fH.download(self.dir + '/' + file, os.path.join(self.path, _file),
+                                callback=None)
+                    if _file.endswith('.gz'):
+                        __file = _file.rpartition(".")[0]
+                        p = subprocess.Popen(['gunzip', os.path.join(self.path, __file)])
+                        p.communicate()
+                    else:
+                        __file = _file
+                    sha256sum = self.hash(os.path.join(self.path, __file))
+                    with open(os.path.join(self.path, __file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.debug('Downloading, extraction and hashing of %s finished' %
+                                 file)
+            self.files = []
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def process(self):
+        files = ['hairpin.fa', 'mature.fa']
+        for file in files:
+            temp = []
+            if os.path.exists(os.path.join(self.path, self.shortOrganism + '-' + file)) \
+                and os.path.exists(os.path.join(self.path,
+                                                self.shortOrganism + '-' + file + '.sha256sum')):
+                with open(os.path.join(self.path, file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
+            else:
+                logger.debug('Extrating %s sequences from %s' % (self.organism, file))
+                with open(os.path.join(self.path, file)) as iH:
+                    for rec in SeqIO.parse(iH, 'fasta'):
+                        if self.organism in rec.description:
+                            _temp_rec = SeqRecord(id=rec.id, description=rec.description,
+                                                  seq=rec.seq.back_transcribe())
+                            temp.append(_temp_rec)
+                SeqIO.write(temp, os.path.join(self.path, self.shortOrganism + '-' + file),
+                            'fasta')
+                with open(os.path.join(self.path,
+                                       self.shortOrganism + '-' + file + '.sha256sum'), 'w') as wH:
+                    print(self.hash(os.path.join(self.path,
+                                                 self.shortOrganism + '-' + file + '.sha256sum')), file=wH)
+
+
+class Index:
+
+    def __init__(self, root=os.path.join(os.path.expanduser("~"),
+                                         '.CLASHchimeras'), bowtieExecutable=None, tophatExecutable=None):
+
+        self.created = []
+
+        if bowtieExecutable is not None:
+            logger.info('Bowtie2 path {} provided and will be used'
+                        .format(bowtieExecutable))
+            self.bowtieExecutable = bowtieExecutable
+        else:
+            try:
+                bowtieVersion = float(subprocess.check_output(['bowtie2', '--version']
+                                                              ).decode('utf-8').split("\n")[0].split()[-1].rpartition(".")[0])
+                fullBowtieVersion = subprocess.check_output(['bowtie2', '--version']
+                                                            ).decode('utf-8').split("\n")[0].split()[-1]
+                if bowtieVersion < 2.2:
+                    logger.warn("Please update bowtie2, you can download it from")
+                    logger.warn("http://sourceforge.net/projects/bowtie-bio/files/bowtie2/")
+                else:
+                    logger.info("Bowtie2 version {} found and will be used"
+                                .format(fullBowtieVersion))
+                    self.bowtieExecutable = 'bowtie2'
+            except OSError as e:
+                if e.errno == os.errno.ENOENT:
+                    logger.error(textwrap.fill("""
+            Can't find Bowtie2 in your path. Please install it from
+            http://sourceforge.net/projects/bowtie-bio/files/bowtie2/
+            """))
+                else:
+                    logger.error('Something wrong with your bowtie2 command')
+
+        if tophatExecutable is not None:
+            logger.info('Tophat2 path {} provided and will be used'
+                        .format(tophatExecutable))
+            self.tophatExecutable = tophatExecutable
+        else:
+            try:
+                tophatVersion = float(subprocess.check_output(['tophat',
+                                                               '--version']).decode('utf-8').split(" v")[-1].rpartition(".")[0])
+                fullTophatVersion = subprocess.check_output(['tophat',
+                                                             '--version']).decode('utf-8').split(" v")[-1].rstrip()
+                if tophatVersion < 2:
+                    logger.warn("Please update tophat, you can download it from")
+                    logger.warn("http://ccb.jhu.edu/software/tophat/index.shtml")
+                else:
+                    logger.info("Tophat version {} found and will be used"
+                                .format(fullTophatVersion))
+                    self.tophatExecutable = 'tophat'
+            except OSError as e:
+                if e.errno == os.errno.ENOENT:
+                    logger.error("Can't find Tophat in your path. Please install it from")
+                    logger.error("http://ccb.jhu.edu/software/tophat/index.shtml")
+                else:
+                    logger.error('Something wrong with your tophat command')
+
+        self.root = root
+
+    def create(self):
+
+        stdoutHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stdout'),
+                            'w')
+        stderrHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stderr'),
+                            'w')
+        indexCreated = False
+        for fa in glob.glob(os.path.join(self.root, '*.fa')):
+            indexName = fa.rpartition(".fa")[0]
+            self.created.append('{} - {}'.format(indexName, green('Bowtie2',
+                                                                  style='bold')))
+            if 'genome.fa' in fa:
+                genomeIndex = fa.rpartition(".")[0]
+            if len(glob.glob(os.path.join(self.root, indexName + '*.bt2'))) == 6:
+                logger.info('Bowtie2 index for {} found'.format(fa))
+            else:
+                try:
+                    logger.info('Generating bowtie2 index for {}'.format(fa))
+                    indexCreated = True
+                    p = subprocess.Popen([self.bowtieExecutable + '-build', fa, indexName],
+                                         stdout=stdoutHandle, stderr=stderrHandle)
+                    p.communicate()
+                    logger.info('Bowtie2 index for {} is generated'.format(fa))
+                except OSError as e:
+                    if e.errno == os.errno.ENOENT:
+                        logger.error("Can't find bowtie2-build in your path...")
+                        logger.error("Please make sure bowtie2 is in your path and")
+                        logger.error("bowtie2-build can run from your shell")
+                    else:
+                        logger.error("Something wrong with your bowtie2-build command")
+
+        if indexCreated:
+            logger.warn('The stdout and stderr for bowtie2-build are stored in')
+            logger.warn('CLASHChimeras-Index.stdout and CLASHChimeras-Index.stderr')
+
+        for gtf in glob.glob(os.path.join(self.root, '*.annotation.gtf')):
+            indexName = gtf.rpartition("/")[-1].rpartition(".")[0]
+            self.created.append('{} - {}'.format(gtf.rpartition(".")[0], green(
+                                'Tophat2 Transcriptome', style='bold')))
+            if len(glob.glob(os.path.join(self.root, indexName + '*.bt2'))) == 6:
+                logger.info('Tophat2 transcriptome index found for {}'.format(gtf))
+            else:
+                try:
+                    indexCreated = True
+                    logger.info('Generating tophat transcriptome index for {}'
+                                .format(gtf))
+                    p = subprocess.Popen([self.tophatExecutable, '-G', gtf,
+                                          '--transcriptome-index', os.path.join(
+                                              self.root, indexName),
+                                          genomeIndex], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                    p.communicate()
+                    logger.info('Tophat2 transcriptome index generated for {}'
+                                .format(gtf))
+                except OSError as e:
+                    if e.errno == os.errno.ENOENT:
+                        logger.error("Can't find tophat in your path...")
+                        logger.error("Please make sure tophat is in your path")
+                    else:
+                        logger.error("Something wrong with your tophat command")
+
+        stdoutHandle.close()
+        stderrHandle.close()
diff --git a/build/lib/clashchimeras/log.py b/build/lib/clashchimeras/log.py
new file mode 100644
index 0000000..2a6b0ed
--- /dev/null
+++ b/build/lib/clashchimeras/log.py
@@ -0,0 +1,30 @@
+import logging
+import coloredlogs
+
+
+def debug_logger(name):
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.DEBUG, show_name=False,
+                        show_hostname=True)
+    return logger
+
+
+def info_logger(name):
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.INFO, show_name=False,
+                        show_hostname=True)
+    return logger
+
+
+def warning_logger(name):
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.WARNING, show_name=False,
+                        show_hostname=True)
+    return logger
+
+
+def error_logger(name):
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.ERROR, show_name=False,
+                        show_hostname=True)
+    return logger
diff --git a/build/lib/clashchimeras/methods.py b/build/lib/clashchimeras/methods.py
new file mode 100644
index 0000000..c52d45e
--- /dev/null
+++ b/build/lib/clashchimeras/methods.py
@@ -0,0 +1,85 @@
+import re
+
+from itertools import repeat
+
+
+def convertCigar(cigarString):
+    cigarAttributes = re.findall("\D", cigarString)
+    cigarNumbers = re.findall("\d+", cigarString)
+    convertedCigarString = ""
+    for index, attribute in enumerate(cigarAttributes):
+        for i in repeat(attribute, int(cigarNumbers[index])):
+            convertedCigarString += i
+    return convertedCigarString
+
+
+def chimeraOrNot(bitOne, bitTwo, overlap=4, gap=9):
+    combinedCigar = ""
+    for i, j in zip(bitOne, bitTwo):
+        if i == "M" and j == "M":
+            combinedCigar += "="
+        elif i == "S" and j == "S":
+            combinedCigar += "#"
+        elif i == "I" and j == "D":
+            combinedCigar += "-"
+        elif i == "D" and j == "I":
+            combinedCigar += "-"
+        elif i == "M" and j != "M":
+            combinedCigar += "{"
+        elif i != "M" and j == "M":
+            combinedCigar += "}"
+        elif i != "M" and j == "D":
+            combinedCigar += "-"
+        elif i == "D" and j != "M":
+            combinedCigar += "-"
+        elif i == "I" and j != "M":
+            combinedCigar += "+"
+        elif i != "M" and j == "I":
+            combinedCigar += "+"
+        else:
+            combinedCigar += "*"
+    numberOfMs = len(list(filter(lambda x: x == "=", combinedCigar)))
+    curlyStartStart = combinedCigar.find("{")
+    curlyStartEnd = combinedCigar.rfind("{")
+    curlyEndStart = combinedCigar.find("}")
+    curlyEndEnd = combinedCigar.rfind("}")
+    listCurlies = sorted([[curlyStartStart, curlyStartEnd], [curlyEndStart,
+                                                             curlyEndEnd]])
+    matches = combinedCigar.count("{") + combinedCigar.count("}") + \
+        combinedCigar.count("=") + combinedCigar.count("-") + \
+        combinedCigar.count("+")
+    if numberOfMs <= overlap and abs(listCurlies[0][1] - listCurlies[1][0]) \
+            <= gap and matches >= (0.75 * len(combinedCigar)):
+        return combinedCigar
+    else:
+        return None
+
+
+def findRegion(record_data):
+    if 'ENS' in record_data.refId and '|' in record_data.refId:
+        reference_id = record_data.refId
+        reference_start = int(record_data.start)
+        match_length = record_data.matchLength
+        regions = reference_id.split("|")[7:-1]
+        match_start = reference_start
+        match_end = reference_start + match_length
+        mixed = []
+
+        for region in regions:
+            region_name = region.split(":")[0]
+            region_start = int(region.split(":")[-1].split("-")[0])
+            region_end = int(region.split(":")[-1].split("-")[-1])
+            if match_start >= region_start and match_end <= region_end:
+                mixed.append(region_name)
+                break
+            if match_start >= region_start and match_start <= region_end:
+                mixed.append(region_name)
+            if match_end >= region_start and match_end <= region_end:
+                mixed.append(region_name)
+            if match_start <= region_start and match_end >= region_end:
+                mixed.append(region_name)
+        return '{}|{}|{}'.format(reference_id.split("|")[0],
+                                 reference_id.split("|")[5],
+                                 "-".join(mixed))
+    else:
+        return record_data.refId
diff --git a/build/lib/clashchimeras/parsers.py b/build/lib/clashchimeras/parsers.py
new file mode 100644
index 0000000..989c662
--- /dev/null
+++ b/build/lib/clashchimeras/parsers.py
@@ -0,0 +1,803 @@
+import csv
+import gzip
+import logging
+import mmap
+import os
+import sys
+import textwrap
+from collections import Counter, defaultdict
+from itertools import groupby
+from operator import itemgetter
+
+import pandas as pd
+import pyfaidx
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.Seq import Seq
+
+import clashchimeras
+
+logger = logging.getLogger('root')
+
+
+class GFF:
+    """GFF file parser for mirbase gff3 file
+
+    This class uses memory-mapped file object to read a mirbase gff3 file. It
+    contains methods to read, process a gff3 file and return genomic coordinates
+
+    Attributes:
+      fileName: A mirbase gff3 file path
+    """
+
+    def __init__(self, fileName=None):
+        self.features = {}
+        self.fileName = fileName
+
+    def read(self, featureType='miRNA_primary_transcript'):
+        """Reads gff3 file provided during class initialization
+
+        Stores the byte positions of every feature in a dict object named
+        self.features
+
+        Keyword Args:
+          featureType: Feature type of a gff3 record, the third element of every
+            record in the file. Please change this if you want to store mature
+            form of microRNA, by default it uses primary transcript
+            (default 'miRNA_primary_transcript')
+        """
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split(";")
+                for attribute in attributes:
+                    if attribute.startswith('Name'):
+                        mirbase_name = attribute.split("=")[-1]
+                        self.features[mirbase_name] = bytePosition
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def process(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        self.mm.seek(self.features[name])
+        row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+        attributes = row[-1].split(";")
+        for attribute in attributes:
+            if attribute.startswith("ID"):
+                _id = attribute.split("=")[-1]
+            elif attribute.startswith("Name"):
+                _name = attribute.split("=")[-1]
+        record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
+                        strand=row[6], mirbase_id=_id, mirbase_name=_name)
+        self.fileHandle.close()
+        return record
+
+    def coordinates(self, name, start=None, end=None):
+        """A method to return a bed record containing genomic coordinates for the
+          aligned segment
+
+        Keyword Args:
+          start: The alignment start position of the cDNA molecule or the relative
+            start of the particular molecule
+          end: The alignment end position in the cDNA molecule or the relative end
+            of the particular molecule
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          A tuple of strings containing elements for a bed record
+        """
+        record = self.process(name)
+        if not start and not end:
+            start = 1
+            end = record.end - record.start + 1
+        positions = {}
+        match_positions = []
+
+        if record.strand == '+':
+            _start = 1
+            for relative, actual in enumerate(range(record.start - 1, record.end),
+                                              start=_start):
+                positions[relative] = actual
+            for pos in range(start, end + 1):
+                match_positions.append(positions[pos])
+            return [(record.scaffold, min(match_positions), max(match_positions) + 1,
+                     record.mirbase_name, 0, record.strand)]
+
+        elif record.strand == '-':
+            _start = 1
+            for relative, actual in enumerate(reversed(range(record.start - 1,
+                                                             record.end)), start=_start):
+                positions[relative] = actual
+            for pos in range(start, end + 1):
+                match_positions.append(positions[pos])
+            return [(record.scaffold, min(match_positions), max(match_positions) + 1,
+                     record.mirbase_name, 0, record.strand)]
+
+
+class GTF:
+    """GTF file parser for gencode gtf file
+
+    This class uses memory-mapped file object to read a gencode gtf file. It
+    contains methods to read, process a gtf file and return genomic coordinates
+
+    Attributes:
+      fileName: A gencode gtf file path
+    """
+
+    def __init__(self, fileName=None):
+        self.features = defaultdict(list)
+        self.biotypeFeatures = defaultdict(list)
+        self.geneFeatures = defaultdict(list)
+        self.fileName = fileName
+        self.geneIds = {}
+
+    def readBiotype(self, featureType='exon', biotype=None):
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+
+                havana_transcript = '-'
+                havana_gene = '-'
+                exon_number = '0'
+
+                for attribute in attributes:
+
+                    if attribute.startswith("transcript_id"):
+                        transcript_id = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("transcript_type"):
+                        transcript_type = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("exon_number"):
+                        exon_number = int(attribute.split(" ")[-1])
+
+                    elif attribute.startswith("havana_gene"):
+                        havana_gene = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("havana_transcript"):
+                        havana_transcript = attribute.split(" ")[-1][1:-2]
+
+                    elif attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("gene_name"):
+                        gene_name = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("transcript_name"):
+                        transcript_name = attribute.split(" ")[-1][1:-1]
+
+                if biotype == 'tRNA':
+                    if transcript_type == "tRNAscan":
+                        self.biotypeFeatures[transcript_id].append((exon_number, row[0],
+                                                                    int(row[3]), int(row[4]),
+                                                                    row[6], gene_id,
+                                                                    havana_gene,
+                                                                    havana_transcript,
+                                                                    transcript_name,
+                                                                    gene_name))
+                else:
+                    if transcript_type == biotype:
+                        self.biotypeFeatures[transcript_id].append((exon_number, row[0],
+                                                                    int(row[3]), int(row[4]),
+                                                                    row[6], gene_id,
+                                                                    havana_gene,
+                                                                    havana_transcript,
+                                                                    transcript_name,
+                                                                    gene_name))
+
+        self.fileHandle.close()
+
+    def read(self, featureType='exon'):
+        """Reads gtf file provided during class initialization
+
+        Stores the byte positions of every feature in a defaultdict(list) object
+        named self.features
+
+        Keyword Args:
+          featureType: Feature type of a gtf record, the third element of every
+            record in the file. Please change this if you want to get specific
+            records (e.g. 'UTR') (default 'exon')
+        """
+
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+                for attribute in attributes:
+                    if attribute.startswith("transcript_id"):
+                        transcript_id = attribute.split(" ")[-1][1:-1]
+                        self.features[transcript_id].append(bytePosition)
+                        self.geneIds[transcript_id] = gene_id
+                    if attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+                for attribute in attributes:
+                    if attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+                        self.geneFeatures[gene_id].append(bytePosition)
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def process(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid gencode transcript_id
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        positions = self.features[name]
+        for position in positions:
+            self.mm.seek(position)
+            row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+            attributes = row[-1].split("; ")
+            _eid = '-'
+            _enb = '0'
+            for attribute in attributes:
+                if attribute.startswith("transcript_type"):
+                    _tt = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("transcript_id"):
+                    _tid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("exon_id"):
+                    _eid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("exon_number"):
+                    _enb = int(attribute.split(" ")[-1])
+                elif attribute.startswith("gene_name"):
+                    _gn = attribute.split(" ")[-1][1:-1]
+            record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
+                            strand=row[6], transcript_type=_tt, transcript_id=_tid, exon_id=_eid,
+                            exon_number=_enb, gene_name=_gn)
+            yield record
+        self.fileHandle.close()
+
+    def geneExonicRegions(self, df):
+        """Given a DataFrame with the exon coordinates from Gencode for a single
+        gene, return the total number of coding regions in that gene.
+        """
+        scaffold = df.iloc[0].scaffold
+        strand = df.iloc[0].strand
+        gene_type = df.iloc[0].gene_type
+        gene_id = df.iloc[0].gene_id
+        gene_name = df.iloc[0].gene_name
+        start = df.start.min()
+        end = df.end.max()
+        bp = [False] * (end - start + 1)
+        for i in range(df.shape[0]):
+            s = df.iloc[i]['start'] - start
+            e = df.iloc[i]['end'] - start + 1
+            bp[s:e] = [True] * (e - s)
+        regions = list(range(start, end + 1))
+        groups = []
+
+        for i, j in groupby(bp):
+            groups.append((i, len(list(j))))
+        e_start = 0
+
+        for i in groups:
+            e_end = e_start + i[1]
+            if i[0]:
+                record = Record(scaffold=scaffold, start=regions[e_start],
+                                end=regions[e_end - 1], gene_type=gene_type, gene_id=gene_id,
+                                gene_name=gene_name, strand=strand)
+                yield record
+            e_start += i[1]
+
+    def geneProcess(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid gencode gene_id
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        positions = self.geneFeatures[name]
+        exons = []
+        for position in positions:
+            self.mm.seek(position)
+            row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+            attributes = row[-1].split("; ")
+            for attribute in attributes:
+                if attribute.startswith("gene_type"):
+                    _gt = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("gene_id"):
+                    _gid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("gene_name"):
+                    _gn = attribute.split(" ")[-1][1:-1]
+            exons.append((row[0], int(row[3]), int(row[4]), row[6], _gt, _gid, _gn))
+        self.fileHandle.close()
+        exons_df = pd.DataFrame(exons, columns=['scaffold', 'start', 'end',
+                                                'strand', 'gene_type', 'gene_id', 'gene_name'])
+
+        for record in self.geneExonicRegions(exons_df):
+            yield record
+
+    def coordinates(self, name, start=None, end=None):
+        """A generator to return a bed record containing genomic coordinates for the
+          aligned segment
+
+        Keyword Args:
+          start: The alignment start position of the cDNA molecule or the relative
+            start of the particular molecule
+          end: The alignment end position in the cDNA molecule or the relative end
+            of the particular molecule
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          A list of tuple(s) of strings containing elements for a bed record. There
+          may be more than one because of alternate splicing.
+        """
+        if "|" in name:
+            self.name = name.split("|")[0]
+        else:
+            self.name = name
+        positions = {}
+        match_positions = []
+        records = []
+        segments = []
+        result_segments = []
+        for record in self.process(self.name):
+            records.append(record)
+        records.sort(key=lambda x: int(x.exon_number))
+
+        if records[0].strand == '+':
+            _start = 1
+            for record in records:
+                for relative, actual in enumerate(range(record.start, record.end + 1),
+                                                  start=_start):
+                    positions[relative] = actual
+                _start = relative + 1
+            for pos in range(start, end):
+                match_positions.append(positions[pos])
+            for key, group in groupby(enumerate(match_positions),
+                                      lambda x: x[0] - x[-1]):
+                segment = list(map(itemgetter(1), group))
+                segments.append([segment[0], segment[-1]])
+            for segment in segments:
+                for record in records:
+                    if segment[0] >= record.start and segment[1] <= record.end:
+                        result_segments.append((record.scaffold, segment[0], segment[1],
+                                                record.transcript_id + '|' + record.gene_name, 0, record.strand))
+
+        elif records[0].strand == '-':
+            _start = 1
+            for record in records:
+                for relative, actual in enumerate(reversed(range(record.start,
+                                                                 record.end + 1)), start=_start):
+                    positions[relative] = actual
+                _start = relative + 1
+            for pos in range(start, end):
+                match_positions.append(positions[pos])
+            for key, group in groupby(enumerate(reversed(match_positions)),
+                                      lambda x: x[0] - x[-1]):
+                segment = list(map(itemgetter(1), group))
+                segments.append([segment[0], segment[-1]])
+            for segment in segments:
+                for record in records:
+                    if segment[0] >= record.start and segment[1] <= record.end:
+                        result_segments.append((record.scaffold, segment[0], segment[1],
+                                                record.transcript_id + '|' + record.gene_name, 0, record.strand))
+
+        if len(result_segments) == 0:
+            logger.debug('%s, %s, %s' % (name, start, end))
+            logger.debug('%s' % str(segments))
+            for r in records:
+                logger.debug('%s %s %s %s' % (r.scaffold, r.strand,
+                                              r.start, r.end))
+
+        return result_segments
+
+
+class SAM:
+    """SAM file parser for parsing bowtie2 generated files
+
+    This class uses memory-mapped file object to read a sam file
+
+    Attributes:
+      fileName: A sam file path
+    """
+
+    def __init__(self, fileName=None):
+        self.fileName = fileName
+        self.records = {}
+
+    def read(self, flag=0):
+        """Reads sam file provided during class initialization
+
+        Stores the byte position of every record based on the keyword arg flag
+        provided, to a dict object named self.records
+
+        Keyword Args:
+          flag: The SAM alignment flag for a record. For default, it uses the
+          primary alignment for every record and ignores secondary alignments
+          (default 0)
+        """
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            read = line.decode('utf-8').split("\t")
+            if not read[0].startswith("@") and read[1] == str(flag):
+                self.records[read[0]] = bytePosition
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def access(self, queryName):
+        """Provides random access of a record from the sam file
+
+        Args:
+          queryName: The query name of the read from the sam file
+
+        Returns:
+          A list generated after splitting the record line from sam file
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        self.mm.seek(self.records[queryName])
+        row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+        self.fileHandle.close()
+
+        return self.pretty(row)
+
+    def filterPotentialChimeras(self, min_length=30, flag=0, target=None):
+        """Generated a filtered fasta file from a sam file
+
+        This filtered fasta file contains reads that can be potentially chimeras.
+        The criteria for filtering is based on the minimum length
+
+        Keyword Args:
+          min_length: To be selected as a potential chimera, this is the minimum
+            read length (default 30)
+          flag: The SAM alignment flag describing the type of alignment (default 0)
+          target: The prefix for output file
+        """
+        logger.debug('Filtering {} for potential chimeras'.format(target))
+        target = '{}.filter.fasta'.format(target.rpartition(".")[0])
+        if os.path.exists(target):
+            logger.info('Skipping filtering for {}'.format(target))
+        else:
+            with open(target, 'w') as oH:
+                with open(self.fileName) as iH:
+                    for row in csv.reader(iH, delimiter="\t"):
+                        if not row[0].startswith('@') and row[1] == str(flag):
+                            if len(row[9]) >= 30:
+                                print(textwrap.fill('>%s' % row[0], width=80), file=oH)
+                                print(textwrap.fill('%s' % row[9], width=80), file=oH)
+            logger.debug('Filtering finished')
+        return target
+
+    def pretty(self, row):
+        refId = row[2]
+        start = int(row[3])
+        for i in row[10:]:
+            if i.startswith('MD'):
+                mismatchInfo = i
+        sequence = row[9]
+        cigar = row[5]
+        cigarString = clashchimeras.methods.convertCigar(row[5])
+        matchLength = cigarString.count("M") + cigarString.count("D")
+        end = start + matchLength - 1
+        record = Record(refId=refId, start=start, mismatchInfo=mismatchInfo,
+                        sequence=sequence, cigarString=cigarString, matchLength=matchLength,
+                        cigar=cigar, end=end)
+        return record
+
+
+class Output:
+    """Contains methods for writing output files
+
+    This class is used to generate every kind of output generated by this
+    package which includes plain text, ansi colored text and bed file
+
+    Attributes:
+      target: A prefix for output file which will be automatically followed by
+        extension (default 'wip')
+      overlap: Minimum overlap to be set between two molecules when determining
+        chimera (default 4)
+      gap: Maximum gap (number of unknown nucleotides) to be allowed between
+        two molecules within a chimera (default 9)
+    """
+
+    def __init__(self,
+                 target=None,
+                 smallRNABed=False,
+                 targetRNABed=False,
+                 overlap=4,
+                 gap=9):
+        self.target = target
+        self.overlap = overlap
+        self.gap = gap
+
+        if smallRNABed:
+            self.smallRNABedHandle = open('{}.smallRNA.bed'.format(self.target), 'w')
+            print('# BED locations of smallRNA part of the identified chimera',
+                  file=self.smallRNABedHandle)
+            self.smallRNABedCSV = csv.writer(self.smallRNABedHandle, delimiter="\t")
+            self.smallRNABedCSV.writerow(
+                ['# The name field represents the following:'])
+            self.smallRNABedCSV.writerow(
+                ['#  E.g. 201980-1-48|hsa-mir-100==PAPSS1'])
+            self.smallRNABedCSV.writerow(
+                ['#    201980-1-48 is the fasta identifier'])
+            self.smallRNABedCSV.writerow(
+                ["#      201980 is the unique identifier"])
+            self.smallRNABedCSV.writerow(
+                ["#      1 is the number of times that sequence was observed in raw "
+                 "fastq "])
+            self.smallRNABedCSV.writerow(
+                ["#      48 is the length of the sequence"])
+            self.smallRNABedCSV.writerow(
+                ['#    hsa-mir-100 represents the smallRNA transcript'])
+            self.smallRNABedCSV.writerow(
+                ['#    PAPSS1 represents the gene symbol for targetRNA transcript '
+                 'transcript '])
+        if targetRNABed:
+            self.targetRNABedHandle = open('{}.targetRNA.bed'.format(self.target),
+                                           'w')
+            self.targetRNABedCSV = csv.writer(self.targetRNABedHandle, delimiter="\t")
+            self.targetRNABedCSV.writerow(
+                ['# The name field represents the following:'])
+            self.targetRNABedCSV.writerow(
+                ['#  E.g. 136019-1-48|ENST00000375759.6|SPEN==hsa-mir-103a-2'])
+            self.targetRNABedCSV.writerow(
+                ['#    136019-1-48 is the fasta identifier'])
+            self.targetRNABedCSV.writerow(
+                ["#      136019 is the unique identifier"])
+            self.targetRNABedCSV.writerow(
+                ["#      1 is the number of times that sequence was observed in raw "
+                 "fastq "])
+            self.targetRNABedCSV.writerow(
+                ["#      48 is the length of the sequence"])
+            self.targetRNABedCSV.writerow(
+                ["#    ENST00000375759.6 is the targetRNA transcript identifier"])
+            self.targetRNABedCSV.writerow(
+                ['#    SPEN is the gene symbol for for targetRNA transcript '
+                 'ENST00000375759.6'])
+            self.targetRNABedCSV.writerow(
+                ['#    hsa-mir-103a-2 represents the smallRNA transcript '])
+
+        self.hybWriter = open('%s.chimeras.tsv' % self.target, 'w')
+        self.hybComments()
+
+    def hybComments(self):
+        print("# fasta Identifier: The identifier in <sample>.unique.fasta. ",
+              "#\tE.g. 123456-3-68 ",
+              "#\t123456 is the unique identifier",
+              "#\t3 is the number of times that sequence was observed in raw "
+              "fastq ",
+              "#\t68 is the length of the sequence", sep="\n", file=self.hybWriter)
+
+        print("# smallRNA: The cDNA ID of the type of RNA labelled as smallRNA in "
+              "the analysis",
+              "#\tE.g. hsa-let-7b (miRBase identifier)",
+              "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
+              sep="\n", file=self.hybWriter)
+
+        print("# smallRNA_start: cDNA alignment start position of the smallRNA "
+              "part of the chimera", file=self.hybWriter)
+
+        print("# smallRNA_MDtag: Showing the MD tag from the smallRNA SAM "
+              "alignment for the chimera",
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
+              "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
+
+        print('# smallRNA_cigar: Cigar string from the smallRNA SAM alignment for '
+              'the chimera',
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
+
+        print('# arbitrary_chimera: The chimera representation indicating what '
+              'part of the sequence represents smallRNA and targetRNA',
+              '#\t{ is representing a match with smallRNA',
+              '#\t} is representing a match with targetRNA',
+              '#\t# is representing unaligned sequences (identified as --gap -ga)',
+              '#\t- is representing a deletion (D in cigar string)',
+              '#\t+ is representing a deletion (I in cigar string)',
+              '#\tE.g {{{{{{{{-{{{{{{{{{{{{{##}}}}}}}}}}+}}}}}}}}}}}}}}}}}}}}}}'
+              '#\tE.g The first 22 nucleotides are aligning to smallRNA cDNA',
+              '#\tE.g The last 33 nucleotides are aligning to targetRNA cDNA',
+              sep="\n", file=self.hybWriter)
+
+        print('# read_sequence: The actual sequence that is appeared in raw '
+              'reads', file=self.hybWriter)
+
+        print("# targetRNA: The cDNA ID of the type of RNA labelled as targetRNA "
+              "in "
+              "the analysis",
+              "#\tE.g. hsa-let-7b (miRBase identifier)",
+              "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
+              sep="\n", file=self.hybWriter)
+
+        print("# targetRNA_start: cDNA alignment start position of the targetRNA "
+              "part of the chimera", file=self.hybWriter)
+
+        print("# targetRNA_MDtag: Showing the MD tag from the targetRNA SAM "
+              "alignment for the chimera",
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
+              "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
+
+        print('# targetRNA_cigar: Cigar string from the targetRNA SAM alignment '
+              'for '
+              'the chimera',
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
+
+        print("# fasta_Identifier", "smallRNA", "smallRNA_start", "smallRNA_MDtag",
+              "smallRNA_cigar", "arbitrary_chimera", "read_sequence", "targetRNA",
+              "targetRNA_start", "targetRNA_MDtag", "targetRNA_cigar", sep="\t",
+              file=self.hybWriter)
+
+    def writeTargetRNABed(self, query, targetRNASegments, smallRNA):
+        if "ENS" in smallRNA and "|" in smallRNA:
+            _smallRNA = smallRNA.split("|")[5]
+        else:
+            _smallRNA = smallRNA
+        for segment in targetRNASegments:
+            _segment = list(segment)
+            _segment[3] = query + "|" + _segment[3] + "==" + _smallRNA
+            self.targetRNABedCSV.writerow(_segment)
+
+    def writeSmallRNABed(self, query, smallRNASegments, targetRNA):
+        if "ENS" in targetRNA and "|" in targetRNA:
+            _targetRNA = targetRNA.split("|")[5]
+        else:
+            _targetRNA = targetRNA
+        for segment in smallRNASegments:
+            _segment = list(segment)
+            _segment[3] = query + "|" + _segment[3] + "==" + _targetRNA
+            self.smallRNABedCSV.writerow(_segment)
+
+    def write(self, queryName, smallRNA, targetRNA):
+        chimeraString = clashchimeras.methods.chimeraOrNot(smallRNA.cigarString,
+                                                           targetRNA.cigarString, overlap=self.overlap, gap=self.gap)
+        smallRNARegion = clashchimeras.methods.findRegion(smallRNA)
+        targetRNARegion = clashchimeras.methods.findRegion(targetRNA)
+        print(queryName, smallRNARegion, smallRNA.start, smallRNA.mismatchInfo,
+              smallRNA.cigar, chimeraString, smallRNA.sequence,
+              targetRNARegion, targetRNA.start,
+              targetRNA.mismatchInfo, targetRNA.cigar, sep="\t", file=self.hybWriter)
+
+    def __del__(self):
+        self.hybWriter.close()
+
+
+class Fasta:
+
+    def __init__(self, genome=None, gtf=None):
+        self.genome = genome
+        self.gtf = gtf
+        self.faidx = pyfaidx.Fasta(self.genome)
+
+    def getBiotype(self, output=None, biotype=None):
+        self.sequences = []
+        g = GTF(fileName=self.gtf)
+        if biotype == 'tRNA':
+            g.readBiotype(biotype=biotype, featureType='tRNAscan')
+        else:
+            g.readBiotype(biotype=biotype)
+        for transcript_id, exons in g.biotypeFeatures.items():
+            temp_seq = ''
+            exons.sort(key=itemgetter(0))
+            for exon in exons:
+                if exon[4] == '-':
+                    temp_seq += (-self.faidx[exon[1]][exon[2] - 1:exon[3]]).seq
+                elif exon[4] == '+':
+                    temp_seq += self.faidx[exon[1]][exon[2] - 1:exon[3]].seq
+
+            _id = '{}|{}|{}|{}|{}|{}|{}'.format(transcript_id,
+                                                exons[0][5],
+                                                exons[0][6],
+                                                exons[0][7],
+                                                exons[0][8],
+                                                exons[0][9],
+                                                len(temp_seq))
+
+            temp_rec = SeqRecord(seq=Seq(temp_seq), id=_id,
+                                 description='')
+
+            self.sequences.append(temp_rec)
+
+        if not output:
+            logger.error('Please provide output file..')
+            sys.exit()
+        else:
+            logger.info('Writing {}'.format(output))
+            SeqIO.write(self.sequences, output, 'fasta')
+
+
+class Fastq:
+
+    def __init__(self, fileName=None, compressed=False):
+        self.fileName = fileName
+        self.compressed = compressed
+        self.n = 4
+        self.sequences = Counter()
+        self.uniqueOutput = fileName.rpartition(".")[0] + '.unique.fasta'
+
+    def recordIterator(self):
+        record = []
+        record_length = 0
+        for line in self.fileHandle:
+            if record_length == self.n:
+                yield record
+                record_length = 0
+                record = []
+            record.append(line.decode().rstrip())
+            record_length += 1
+        yield record
+
+    def createUnique(self):
+        if self.compressed:
+            self.fileHandle = gzip.open(self.fileName, 'rb')
+        else:
+            self.fileHandle = open(self.fileName, 'rb')
+        logger.info('Reading {}'.format(self.fileName))
+        for record in self.recordIterator():
+            self.sequences[record[1]] += 1
+        logger.info('Writing {}'.format(self.uniqueOutput))
+        with open(self.uniqueOutput, 'w') as wH:
+            for index, (sequence, counts) in enumerate(sorted(self.sequences.items(),
+                                                              key=itemgetter(1), reverse=True), start=1):
+                print('>{}-{}-{}'.format(index, counts, len(sequence)), file=wH)
+                print(textwrap.fill(sequence, width=80), file=wH)
+        logger.debug('Finished writing {}'.format(self.uniqueOutput))
+        self.fileHandle.close()
+
+
+class Record:
+    """A custom object (preferred over dict) for easy access using variables
+
+    It's a dependency for GTF and GFF classes
+    """
+
+    def __init__(self, **kwds):
+        self.__dict__.update(kwds)
diff --git a/build/lib/clashchimeras/runners.py b/build/lib/clashchimeras/runners.py
new file mode 100644
index 0000000..9f0a033
--- /dev/null
+++ b/build/lib/clashchimeras/runners.py
@@ -0,0 +1,179 @@
+import hashlib
+import logging
+import os
+import subprocess
+
+from clashchimeras.parsers import Fastq
+
+logger = logging.getLogger('root')
+
+
+class Bowtie:
+    """Class for calling bowtie2 aligner
+    """
+
+    def __init__(self, args=None):
+        self.parameters = []
+        self.inputFile = args.input
+        self.compressed = args.gzip
+        self.bowtieExecutable = args.bowtieExecutable
+        self.uniqueInput = self.inputFile.rpartition(".")[0] + '.unique.fasta'
+        if os.path.exists(self.uniqueInput) and os.path.exists(
+                self.uniqueInput + '.sha256sum'):
+            with open(self.uniqueInput + '.sha256sum') as f:
+                s = f.readline().rstrip()
+            _s = self.hash(self.uniqueInput)
+            if s == _s:
+                logger.info(
+                    '{} is present and verified'.format(self.uniqueInput))
+            else:
+                logger.warn('{} is present but checksum verification '
+                            'failed'.format(self.uniqueInput))
+                logger.warn(
+                    '{} will be generated again'.format(self.uniqueInput))
+        else:
+            _f = Fastq(fileName=self.inputFile, compressed=self.compressed)
+            _f.createUnique()
+            sha256sum = self.hash(self.uniqueInput)
+            with open(self.uniqueInput + '.sha256sum', 'w') as wH:
+                print(sha256sum, file=wH)
+        if not args.reverseComplement:
+            self.parameters.append('--norc')
+        self.parameters.append('-N')
+        self.parameters.append(str(args.mismatch))
+        if not args.unaligned:
+            self.parameters.append('--no-unal')
+        self.parameters.append('-p')
+        self.parameters.append(str(args.threads))
+        self.parameters.append('--' + args.preset)
+        self.smallRNAIndex = args.smallRNAIndex
+        self.targetRNAIndex = args.targetRNAIndex
+        self.output = args.output
+        self.runAligner = True
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def run(self, output=None, filtered=None):
+        self.bowtie2 = []
+        if output == 'smallRNA':
+            self.outputSam = '{}.{}.sam'.format(self.output, 'smallRNA')
+        elif output == 'targetRNA':
+            self.outputSam = '{}.{}.sam'.format(self.output, 'targetRNA')
+        if not self.bowtieExecutable:
+            self.bowtie2.append('bowtie2')
+        else:
+            self.bowtie2.append(self.bowtieExecutable)
+        self.bowtie2.append('-f')
+        if filtered:
+            self.bowtie2.append(filtered)
+        else:
+            self.bowtie2.append(self.uniqueInput)
+        self.bowtie2.append('-x')
+        if output == 'smallRNA':
+            self.bowtie2.append('"' + self.smallRNAIndex + '"')
+        elif output == 'targetRNA':
+            self.bowtie2.append('"' + self.targetRNAIndex + '"')
+        self.bowtie2 = self.bowtie2 + self.parameters
+        self.bowtie2.append('-S')
+        self.bowtie2.append(self.outputSam)
+
+        if os.path.exists(self.outputSam) and os.path.exists(
+                self.outputSam + '.sha256sum'):
+            with open(self.outputSam + '.sha256sum') as f:
+                s = f.readline().rstrip()
+            _s = self.hash(self.outputSam)
+            if s == _s:
+                logger.info(
+                    '{} is present and verified'.format(self.outputSam))
+                self.runAligner = False
+            else:
+                logger.warn('{} is present but checksum verification '
+                            'failed'.format(self.outputSam))
+                logger.warn('{} will be generated again')
+                self.runAligner = True
+
+        if self.runAligner:
+            logger.info(
+                'Executing bowtie2 and generating {}'.format(self.outputSam))
+            logger.info('The complete command for bowtie2 is \n{}'.format(
+                        " ".join(self.bowtie2)))
+            bowtie2Process = subprocess.Popen(self.bowtie2,
+                                              stdout=subprocess.PIPE,
+                                              stderr=subprocess.PIPE)
+            bowtie2Stdout, bowtie2Stderr = bowtie2Process.communicate()
+            logger.warn(bowtie2Stdout.decode('utf-8'))
+            logger.warn(bowtie2Stderr.decode('utf-8'))
+            if os.path.exists(self.outputSam):
+                logger.info('Bowtie2 execution finished and {} '
+                            'generated'.format(self.outputSam))
+                sha256sum = self.hash(self.outputSam)
+                with open(self.outputSam + '.sha256sum', 'w') as wH:
+                    print(sha256sum, file=wH)
+            else:
+                logger.error('Something went wrong with your bowtie2 command.')
+                logger.error(
+                    'Please check your bowtie2 stderr and report a bug....')
+
+
+class Tophat:
+
+    def __init__(self, args=None):
+        self.inputFile = args.input
+        if args.genomeIndex:
+            args.genomeIndex = os.path.expanduser(args.genomeIndex)
+        if args.transcriptomeIndex:
+            args.transcriptomeIndex = os.path.expanduser(
+                args.transcriptomeIndex)
+        if args.output:
+            args.output = os.path.expanduser(args.output)
+        if args.input:
+            args.input = os.path.expanduser(args.input)
+
+        self.tophatOutput = args.output
+        self.genomeIndex = args.genomeIndex
+
+        self.tophatRun = []
+        if args.tophatExecutable is not None:
+            self.tophatExecutable = args.tophatExecutable
+        else:
+            self.tophatExecutable = 'tophat'
+        self.tophatRun.append(self.tophatExecutable)
+        self.tophatRun.append('--b2-' + args.tophatPreset)
+        self.tophatRun.append('--b2-N')
+        self.tophatRun.append(str(args.mismatch))
+
+        if args.transcriptomeIndex:
+            self.tophatRun.append('--transcriptome-index=' +
+                                  args.transcriptomeIndex.strip())
+        else:
+            logger.warn('Transcriptome index is not provided for tophat')
+        self.tophatRun.append('-p')
+        self.tophatRun.append(str(args.threads))
+        self.tophatRun.append('-o')
+        self.tophatRun.append(self.tophatOutput)
+        self.tophatRun.append(self.genomeIndex)
+        self.tophatRun.append(self.inputFile)
+
+    def run(self):
+        logger.info('Executing Tophat2 for {}'.format(self.inputFile))
+        logger.info('The complete command for tophat is \n{}'.format(" ".join(
+            self.tophatRun)))
+        logger.info('The stdout and stderr for tophat run will be '
+                    'dumped after its finished')
+        tophatProcess = subprocess.Popen(self.tophatRun,
+                                         stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+        tophatStderr, tophatStdout = tophatProcess.communicate()
+        logger.warn('Tophat stderr')
+        logger.info(tophatStderr.decode("utf-8"))
+        logger.warn('Tophat stdout')
+        logger.info(tophatStdout.decode("utf-8"))
+        logger.info('Execution of Tophat2 finished')
diff --git a/build/scripts-3.5/align-for-chimeras b/build/scripts-3.5/align-for-chimeras
new file mode 100755
index 0000000..d3291f8
--- /dev/null
+++ b/build/scripts-3.5/align-for-chimeras
@@ -0,0 +1,5 @@
+#!/usr/bin/python3
+
+import clashchimeras.align
+
+clashchimeras.align.main()
diff --git a/build/scripts-3.5/download-for-chimeras b/build/scripts-3.5/download-for-chimeras
new file mode 100755
index 0000000..1dfe575
--- /dev/null
+++ b/build/scripts-3.5/download-for-chimeras
@@ -0,0 +1,5 @@
+#!/usr/bin/python3
+
+import clashchimeras.download
+
+clashchimeras.download.main()
diff --git a/build/scripts-3.5/find-chimeras b/build/scripts-3.5/find-chimeras
new file mode 100755
index 0000000..53e0116
--- /dev/null
+++ b/build/scripts-3.5/find-chimeras
@@ -0,0 +1,5 @@
+#!/usr/bin/python3
+
+import clashchimeras.find
+
+clashchimeras.find.main()
diff --git a/clashchimeras/align.py b/clashchimeras/align.py
index addf1a0..8f2efc9 100644
--- a/clashchimeras/align.py
+++ b/clashchimeras/align.py
@@ -6,199 +6,202 @@
 from clashchimeras.parsers import SAM
 from clashchimeras.runners import Bowtie, Tophat
 
+
 class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
                       argparse.RawDescriptionHelpFormatter):
-  pass
+    pass
+
 
 def parseArguments():
-  requiredArgs = getRequiredArgs()
-  bowtieArgs = getBowtie2Args()
-  tophatArgs = getTophatArgs()
-  optionalArgs = getOptionalArgs()
-  outputArgs = getOutputArgs()
-
-  parser = argparse.ArgumentParser(
-              parents=[requiredArgs, bowtieArgs, tophatArgs,
-                       outputArgs, optionalArgs],
-              formatter_class=CustomFormatter,
-              description='Given a fastq file, this script executes '
-              'bowtie2 and tophat aligners to generate alignment files '
-              'necessary for detecting chimeras in the reads',
-              usage='\n %(prog)s -i input.fastq -si '
-                    '/path/to/smallRNA_index -ti /path/to/targetRNA_index -o '
-                    'output -r bowtie2 \n %(prog)s -i input.fastq -gi '
-                    '/path/to/genome_index -tri /path/to/transcriptome_index '
-                    '-o output -r tophat \n \n \n '
-                    'To see detailed help, please run \n %(prog)s -h',
-              add_help=True)
-
-  return parser
-
-  args = parser.parse_args()
-
-  if args.logLevel == 'DEBUG':
-    logger = clashchimeras.log.debug_logger('root')
-  elif args.logLevel == 'WARNING':
-    logger = clashchimeras.log.warning_logger('root')
-  elif args.logLevel == 'ERROR':
-    logger = clashchimeras.log.error_logger('root')
-  else:
-    logger = clashchimeras.log.info_logger('root')
-
-  argCheck = Arguments(args, type='align')
-  argCheck.validateAlign()
-
-  return args
+    requiredArgs = getRequiredArgs()
+    bowtieArgs = getBowtie2Args()
+    tophatArgs = getTophatArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
+
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, bowtieArgs, tophatArgs,
+                 outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
+        description='Given a fastq file, this script executes '
+        'bowtie2 and tophat aligners to generate alignment files '
+        'necessary for detecting chimeras in the reads',
+        usage='\n %(prog)s -i input.fastq -si '
+        '/path/to/smallRNA_index -ti /path/to/targetRNA_index -o '
+        'output -r bowtie2 \n %(prog)s -i input.fastq -gi '
+        '/path/to/genome_index -tri /path/to/transcriptome_index '
+        '-o output -r tophat \n \n \n '
+        'To see detailed help, please run \n %(prog)s -h',
+        add_help=True)
+
+    return parser
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    argCheck = Arguments(args, type='align')
+    argCheck.validateAlign()
+
+    return args
+
 
 def getRequiredArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  required = parser.add_argument_group('Input arguments')
+    required = parser.add_argument_group('Input arguments')
 
-  required.add_argument('--input', '-i',
-                        help='Input file containing reads fastq',
-                        metavar='raw reads',
-                        required=True)
+    required.add_argument('--input', '-i',
+                          help='Input file containing reads fastq',
+                          metavar='raw reads',
+                          required=True)
 
-  return parser
+    return parser
 
-def getBowtie2Args():
-  parser = argparse.ArgumentParser(add_help=False)
 
-  bowtieArgs = parser.add_argument_group('Bowtie2 arguments')
+def getBowtie2Args():
+    parser = argparse.ArgumentParser(add_help=False)
 
+    bowtieArgs = parser.add_argument_group('Bowtie2 arguments')
 
-  bowtieArgs.add_argument("--smallRNAIndex", "-si",
-                        help="""Provide the smallRNA bowtie2 index (Usually
+    bowtieArgs.add_argument("--smallRNAIndex", "-si",
+                            help="""Provide the smallRNA bowtie2 index (Usually
                         resides in ~/db/CLASHChimeras or elsewhere if you have
                         specified in --path -pa during initialize)""")
 
-  bowtieArgs.add_argument("--targetRNAIndex", "-ti",
-                        help="""Provide the targetRNA bowtie2 index (Usually
+    bowtieArgs.add_argument("--targetRNAIndex", "-ti",
+                            help="""Provide the targetRNA bowtie2 index (Usually
                         resides in ~/db/CLASHChimeras or elsewhere if you have
                         specified in --path -pa during initialize)""")
 
-  return parser
+    return parser
+
 
 def getTophatArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  tophatArgs = parser.add_argument_group('Tophat arguments')
+    tophatArgs = parser.add_argument_group('Tophat arguments')
 
-  tophatArgs.add_argument("--genomeIndex", "-gi",
-                        help="""Provide the genome bowtie2 index (Usually
+    tophatArgs.add_argument("--genomeIndex", "-gi",
+                            help="""Provide the genome bowtie2 index (Usually
                         resides in ~/db/CLASHChimeras or elsewhere if you have
                         specified in --path during initialize)""")
 
-  tophatArgs.add_argument("--transcriptomeIndex", "-tri",
-                        help="""Provide the transcriptome index as specified
+    tophatArgs.add_argument("--transcriptomeIndex", "-tri",
+                            help="""Provide the transcriptome index as specified
                         in tophat --transcriptome-index""")
 
-  return parser
+    return parser
 
 
 def getOutputArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  output = parser.add_argument_group('Output')
+    output = parser.add_argument_group('Output')
 
-  output.add_argument("--output", "-o",
-                      help="""The output name without extension (.sam .bam will
+    output.add_argument("--output", "-o",
+                        help="""The output name without extension (.sam .bam will
                       be added)""",
-                      metavar='output prefix',
-                      required=True)
+                        metavar='output prefix',
+                        required=True)
 
-  return parser
+    return parser
 
 
 def getOptionalArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  optional = parser.add_argument_group('Optional arguments')
+    optional = parser.add_argument_group('Optional arguments')
 
-  optional.add_argument("--run", "-r",
-                        help="Run the following aligner for raw reads",
-                        default='bowtie2',
-                        choices=['bowtie2', 'tophat'])
+    optional.add_argument("--run", "-r",
+                          help="Run the following aligner for raw reads",
+                          default='bowtie2',
+                          choices=['bowtie2', 'tophat'])
 
-  optional.add_argument("--logLevel", "-l",
-                        help="Set logging level",
-                        default='INFO',
-                        choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
 
-  optional.add_argument("--gzip", "-gz", action="store_true",
-                        help="Whether your input file is gzipped")
+    optional.add_argument("--gzip", "-gz", action="store_true",
+                          help="Whether your input file is gzipped")
 
-  optional.add_argument("--bowtieExecutable", "-be",
-                        help="""Provide bowtie2 executable if it's not present
+    optional.add_argument("--bowtieExecutable", "-be",
+                          help="""Provide bowtie2 executable if it's not present
                         in your path""")
 
-  optional.add_argument("--tophatExecutable", "-te",
-                        help="""Provide Tophat executable if it's not present in
+    optional.add_argument("--tophatExecutable", "-te",
+                          help="""Provide Tophat executable if it's not present in
                         your path""")
 
-  optional.add_argument("--preset", "-p", default='sensitive-local',
-                        choices=['very-fast', 'fast', 'sensitive',
-                        'very-sensitive', 'very-fast-local', 'fast-local',
-                        'sensitive-local', 'very-sensitive-local'],
-                        help="Provide preset for bowtie2")
-
-  optional.add_argument("--tophatPreset", "-tp",
-                        choices=['very-fast', 'fast', 'sensitive',
-                        'very-sensitive'],
-                        default='very-sensitive',
-                        help="Provide preset for Tophat")
-
-  optional.add_argument("--mismatch", "-m", type=int,
-                        choices=[0, 1], default=1,
-                        help="""Number of seed mismatches as represented in
+    optional.add_argument("--preset", "-p", default='sensitive-local',
+                          choices=['very-fast', 'fast', 'sensitive',
+                                   'very-sensitive', 'very-fast-local', 'fast-local',
+                                   'sensitive-local', 'very-sensitive-local'],
+                          help="Provide preset for bowtie2")
+
+    optional.add_argument("--tophatPreset", "-tp",
+                          choices=['very-fast', 'fast', 'sensitive',
+                                   'very-sensitive'],
+                          default='very-sensitive',
+                          help="Provide preset for Tophat")
+
+    optional.add_argument("--mismatch", "-m", type=int,
+                          choices=[0, 1], default=1,
+                          help="""Number of seed mismatches as represented in
                         bowtie2 as -N""")
 
-  optional.add_argument("--reverseComplement", "-rc",
-                        action="store_true",
-                        help="""Align to reverse complement of reference as
+    optional.add_argument("--reverseComplement", "-rc",
+                          action="store_true",
+                          help="""Align to reverse complement of reference as
                         represented in bowtie2 as --norc""")
 
-  optional.add_argument("--unaligned", "-un",
-                        action="store_true",
-                        help="""Whether to keep unaligned reads in the output
+    optional.add_argument("--unaligned", "-un",
+                          action="store_true",
+                          help="""Whether to keep unaligned reads in the output
                         sam file. Represented in bowtie2 as --no-unal""")
 
-  optional.add_argument("--threads", "-n", default=1,
-                        help="Specify the number of threads")
+    optional.add_argument("--threads", "-n", default=1,
+                          help="Specify the number of threads")
 
-  return parser
+    return parser
 
 
 def main():
 
-  parser = parseArguments()
-
-  args = parser.parse_args()
+    parser = parseArguments()
 
+    args = parser.parse_args()
 
-  if args.logLevel == 'DEBUG':
-    logger = clashchimeras.log.debug_logger('root')
-  elif args.logLevel == 'WARNING':
-    logger = clashchimeras.log.warning_logger('root')
-  elif args.logLevel == 'ERROR':
-    logger = clashchimeras.log.error_logger('root')
-  else:
-    logger = clashchimeras.log.info_logger('root')
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
 
-  argCheck = Arguments(args)
-  argCheck.validateAlign()
+    argCheck = Arguments(args)
+    argCheck.validateAlign()
 
-  if args.run == 'bowtie2':
-    b = Bowtie(args=args)
-    b.run(output='smallRNA')
-    s = SAM(fileName=b.outputSam)
-    filteredFasta = s.filterPotentialChimeras(target=b.outputSam)
-    b.run(output='targetRNA', filtered=filteredFasta)
+    if args.run == 'bowtie2':
+        b = Bowtie(args=args)
+        b.run(output='smallRNA')
+        s = SAM(fileName=b.outputSam)
+        filteredFasta = s.filterPotentialChimeras(target=b.outputSam)
+        b.run(output='targetRNA', filtered=filteredFasta)
 
-  if args.run == 'tophat' :
-    t = Tophat(args=args)
-    t.run()
+    if args.run == 'tophat':
+        t = Tophat(args=args)
+        t.run()
 
 if __name__ == "__main__":
-   main()
+    main()
diff --git a/clashchimeras/download.py b/clashchimeras/download.py
index ddb9b64..ef767b6 100644
--- a/clashchimeras/download.py
+++ b/clashchimeras/download.py
@@ -5,117 +5,122 @@
 import clashchimeras.log
 from clashchimeras.initialize import Releases, Index
 
+
 class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
                       argparse.RawDescriptionHelpFormatter):
-  pass
+    pass
+
 
 def parseArguments():
-  requiredArgs = getRequiredArgs()
-  optionalArgs = getOptionalArgs()
-  outputArgs = getOutputArgs()
-
-  parser = argparse.ArgumentParser(
-              parents=[requiredArgs, outputArgs, optionalArgs],
-              formatter_class=CustomFormatter,
-              description=textwrap.dedent("""\
+    requiredArgs = getRequiredArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
+
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
+        description=textwrap.dedent("""\
               Downloads required sequences and create bowtie2 indexes
               required for alignment"""),
-              usage='An example usage is: %(prog)s -gor "H.sapiens" -mor hsa',
-              add_help=True)
+        usage='An example usage is: %(prog)s -gor "H.sapiens" -mor hsa',
+        add_help=True)
+
+    return parser
 
-  return parser
 
 def getRequiredArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  required = parser.add_argument_group('Required arguments')
+    required = parser.add_argument_group('Required arguments')
 
-  required.add_argument("--gencodeOrganism", "-gor",
-                        default="H.sapiens", choices=["H.sapiens",
-                        "M.musculus"],
-                        help="""Select model organism""",
-                        required=True)
+    required.add_argument("--gencodeOrganism", "-gor",
+                          default="H.sapiens", choices=["H.sapiens",
+                                                        "M.musculus"],
+                          help="""Select model organism""",
+                          required=True)
 
-  required.add_argument("--mirbaseOrganism", "-mor",
-                        default='hsa',
-                        help="""Select organism to download microRNAs for""",
-                        required=True)
+    required.add_argument("--mirbaseOrganism", "-mor",
+                          default='hsa',
+                          help="""Select organism to download microRNAs for""",
+                          required=True)
+
+    return parser
 
-  return parser
 
 def getOutputArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  output = parser.add_argument_group('Output')
+    output = parser.add_argument_group('Output')
 
-  output.add_argument("--path", "-pa",
-                      help="""Location where all the database files and
+    output.add_argument("--path", "-pa",
+                        help="""Location where all the database files and
                       indexes will be downloaded""",
-                      default='~/db/CLASHChimeras',
-                      metavar='path')
+                        default='~/db/CLASHChimeras',
+                        metavar='path')
+
+    return parser
 
-  return parser
 
 def getOptionalArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  optional = parser.add_argument_group('Optional arguments')
+    optional = parser.add_argument_group('Optional arguments')
 
-  optional.add_argument("--logLevel", "-l",
-                        help="Set logging level",
-                        default='INFO',
-                        choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
 
-  optional.add_argument("--bowtieExecutable", "-be",
-                        help="""Provide bowtie2 executable if it's not present
+    optional.add_argument("--bowtieExecutable", "-be",
+                          help="""Provide bowtie2 executable if it's not present
                         in your path""")
 
-  optional.add_argument("--tophatExecutable", "-te",
-                        help="""Provide Tophat executable if it's not present in
+    optional.add_argument("--tophatExecutable", "-te",
+                          help="""Provide Tophat executable if it's not present in
                         your path""")
 
-  optional.add_argument("--miRNA", "-mi",
-                        choices=['mature', 'hairpin'], default='hairpin',
-                        help="Which miRNA sequences to align")
+    optional.add_argument("--miRNA", "-mi",
+                          choices=['mature', 'hairpin'], default='hairpin',
+                          help="Which miRNA sequences to align")
 
-  return parser
+    return parser
 
 
 def main():
-  parser = parseArguments()
+    parser = parseArguments()
 
-  args = parser.parse_args()
+    args = parser.parse_args()
 
-  if args.logLevel == 'DEBUG':
-    logger = clashchimeras.log.debug_logger('root')
-  elif args.logLevel == 'WARNING':
-    logger = clashchimeras.log.warning_logger('root')
-  elif args.logLevel == 'ERROR':
-    logger = clashchimeras.log.error_logger('root')
-  else:
-    logger = clashchimeras.log.info_logger('root')
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
 
-  r = Releases(gencodeOrganism=args.gencodeOrganism,
-           mirbaseOrganism=args.mirbaseOrganism,
-           path=os.path.expanduser(args.path))
+    r = Releases(gencodeOrganism=args.gencodeOrganism,
+                 mirbaseOrganism=args.mirbaseOrganism,
+                 path=os.path.expanduser(args.path))
 
-  i = Index(root=r.gencodePath)
-  i.create()
+    i = Index(root=r.gencodePath)
+    i.create()
 
-  logger.warn('Please find the indexes created listed below..')
-  logger.warn('Use them when you run align-for-chimeras')
+    logger.warn('Please find the indexes created listed below..')
+    logger.warn('Use them when you run align-for-chimeras')
 
-  for _i in i.created:
-    logger.warn(_i)
+    for _i in i.created:
+        logger.warn(_i)
 
-  i = Index(root=r.mirbasePath)
-  i.create()
+    i = Index(root=r.mirbasePath)
+    i.create()
 
-  logger.warn('Please find the indexes created listed below..')
-  logger.warn('Use them when you run align-for-chimeras')
+    logger.warn('Please find the indexes created listed below..')
+    logger.warn('Use them when you run align-for-chimeras')
 
-  for _i in i.created:
-    logger.warn(_i)
+    for _i in i.created:
+        logger.warn(_i)
 
 if __name__ == "__main__":
-   main()
+    main()
diff --git a/clashchimeras/find.py b/clashchimeras/find.py
index f5a4302..025a0d3 100644
--- a/clashchimeras/find.py
+++ b/clashchimeras/find.py
@@ -3,223 +3,216 @@
 import sys
 
 from colors import yellow
-from progress.bar import Bar
 
 import clashchimeras.log
 import clashchimeras.methods
 from clashchimeras.parsers import GFF, GTF, Output, SAM
 from clashchimeras.initialize import Arguments
 
+
 class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
                       argparse.RawDescriptionHelpFormatter):
-  pass
+    pass
+
 
 def parseArguments():
-  requiredArgs = getRequiredArgs()
-  optionalArgs = getOptionalArgs()
-  outputArgs = getOutputArgs()
+    requiredArgs = getRequiredArgs()
+    optionalArgs = getOptionalArgs()
+    outputArgs = getOutputArgs()
 
-  parser = argparse.ArgumentParser(
-              parents=[requiredArgs, outputArgs, optionalArgs],
-              formatter_class=CustomFormatter,
+    parser = argparse.ArgumentParser(
+        parents=[requiredArgs, outputArgs, optionalArgs],
+        formatter_class=CustomFormatter,
 
-              usage='An example usage is: %(prog)s -s smallRNA.sam -t '
-              'targetRNA.sam -o output',
+        usage='An example usage is: %(prog)s -s smallRNA.sam -t '
+        'targetRNA.sam -o output',
 
-              description=textwrap.dedent('''\
+        description=textwrap.dedent('''\
                 Given two SAM files, this script tries to find chimeras that
                 are observed between a smallRNA and a targetRNA'''),
 
-              add_help=True)
+        add_help=True)
 
-  return parser
-
-def getRequiredArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    return parser
 
-  required = parser.add_argument_group('Required arguments')
-
-  required.add_argument('--smallRNA', '-s',
-                        help='Provide smallRNA alignment SAM file',
-                        metavar='SAM file',
-                        required=True)
-  required.add_argument('--targetRNA', '-t',
-                        help='Provide targetRNA alignment SAM file',
-                        metavar='SAM file',
-                        required=True)
 
-  required.add_argument("--getGenomicLocationsSmallRNA", "-ggs",
-                        help="Do you want genomic locations for small RNA?",
-                        action="store_true")
-
-  required.add_argument("--getGenomicLocationsTargetRNA", "-ggt",
-                        help="Do you want genomic locations for target RNA?",
-                        action="store_true")
-
-  required.add_argument("--smallRNAAnnotation", "-sa",
-                        action="store",
-                        help="""Provide smallRNA annotation gtf(from Gencode)
+def getRequiredArgs():
+    parser = argparse.ArgumentParser(add_help=False)
+
+    required = parser.add_argument_group('Required arguments')
+
+    required.add_argument('--smallRNA', '-s',
+                          help='Provide smallRNA alignment SAM file',
+                          metavar='SAM file',
+                          required=True)
+    required.add_argument('--targetRNA', '-t',
+                          help='Provide targetRNA alignment SAM file',
+                          metavar='SAM file',
+                          required=True)
+
+    required.add_argument("--getGenomicLocationsSmallRNA", "-ggs",
+                          help="Do you want genomic locations for small RNA?",
+                          action="store_true")
+
+    required.add_argument("--getGenomicLocationsTargetRNA", "-ggt",
+                          help="Do you want genomic locations for target RNA?",
+                          action="store_true")
+
+    required.add_argument("--smallRNAAnnotation", "-sa",
+                          action="store",
+                          help="""Provide smallRNA annotation gtf(from Gencode)
                         or gff3(from Mirbase). Only provide gtf from Gencode or
                         gff3 from Mirbase. Does not support other gtf files""")
 
-  required.add_argument("--targetRNAAnnotation", "-ta",
-                        action="store",
-                        help="""Provide targetRNA annotation gtf(from Gencode).
+    required.add_argument("--targetRNAAnnotation", "-ta",
+                          action="store",
+                          help="""Provide targetRNA annotation gtf(from Gencode).
                         Only provide gtf from Gencode. Does not support other
                         gtf files""")
 
-  return parser
+    return parser
+
 
 def getOutputArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  output = parser.add_argument_group('Output')
+    output = parser.add_argument_group('Output')
 
-  output.add_argument("--output", "-o",
-                      help="""The output name without extension (.bed .csv will
+    output.add_argument("--output", "-o",
+                        help="""The output name without extension (.bed .csv will
                       be added)""",
-                      metavar='output prefix',
-                      required=True)
+                        metavar='output prefix',
+                        required=True)
+
+    return parser
 
-  return parser
 
 def getOptionalArgs():
-  parser = argparse.ArgumentParser(add_help=False)
+    parser = argparse.ArgumentParser(add_help=False)
 
-  optional = parser.add_argument_group('Optional arguments')
+    optional = parser.add_argument_group('Optional arguments')
 
-  optional.add_argument("--overlap", "-ov",
-                        help="""Maximum overlap to be set between two
+    optional.add_argument("--overlap", "-ov",
+                          help="""Maximum overlap to be set between two
                         molecules when determining chimeras""",
-                        default=4)
+                          default=4)
 
-  optional.add_argument("--gap", "-ga",
-                        help="""Maximum gap (number of unaligned nucleotides) to
+    optional.add_argument("--gap", "-ga",
+                          help="""Maximum gap (number of unaligned nucleotides) to
                         allowed between two molecules within a chimera""",
-                        default=9)
+                          default=9)
 
-  optional.add_argument("--logLevel", "-l",
-                        help="Set logging level",
-                        default='INFO',
-                        choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
+    optional.add_argument("--logLevel", "-l",
+                          help="Set logging level",
+                          default='INFO',
+                          choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'])
 
-  return parser
+    return parser
 
 
 def main():
-  """The main function to execute when you run the program as a script
-  """
-  parser = parseArguments()
-
-  args = parser.parse_args()
-
-  if args.logLevel == 'DEBUG':
-    logger = clashchimeras.log.debug_logger('root')
-  elif args.logLevel == 'WARNING':
-    logger = clashchimeras.log.warning_logger('root')
-  elif args.logLevel == 'ERROR':
-    logger = clashchimeras.log.error_logger('root')
-  else:
-    logger = clashchimeras.log.info_logger('root')
-
-  argCheck = Arguments(args, type='find')
-  argCheck.validateFind()
-
-  smallRNA = SAM(fileName=args.smallRNA)
-  smallRNA.read()
-
-  targetRNA = SAM(fileName=args.targetRNA)
-  targetRNA.read()
-
-  if args.getGenomicLocationsSmallRNA:
-
-    if args.smallRNAAnnotation.endswith('gff3'):
-
-      smallRNAAnnotation = GFF(fileName=args.smallRNAAnnotation)
-      smallRNAAnnotation.read()
-
-    elif args.smallRNAAnnotation.endswith('gtf'):
-      smallRNAAnnotation = GTF(fileName=args.smallRNAAnnotation)
-      if 'tRNAs' in args.smallRNAAnnotation:
-        smallRNAAnnotation.read(featureType='tRNAscan')
-      else:
-        smallRNAAnnotation.read()
-
-  if args.getGenomicLocationsTargetRNA:
-    if args.targetRNAAnnotation.endswith('gtf'):
-      targetRNAAnnotation = GTF(fileName=args.targetRNAAnnotation)
-      targetRNAAnnotation.read()
-
-  output = Output(target=args.output,
-                  smallRNABed=args.getGenomicLocationsSmallRNA,
-                  targetRNABed=args.getGenomicLocationsTargetRNA,
-                  overlap=args.overlap,
-                  gap=args.gap)
-
-  common = set(smallRNA.records.keys()).intersection(targetRNA.records.keys())
-
-  logger.info('Determining chimeras')
-
-  updateInterval = int(len(common)/100)
-
-  if updateInterval == 0:
-    updateInterval = 1
-  pbar = Bar(width=80, max=100, fill=yellow(u"█"),
-    suffix="%(percent)d%% - %(eta)ds")
-
-  for index, queryName in enumerate(common, start=1):
-    smallRNARecord = smallRNA.access(queryName)
-    targetRNARecord = targetRNA.access(queryName)
-
-    chimeraString = clashchimeras.methods.chimeraOrNot(
-      smallRNARecord.cigarString, targetRNARecord.cigarString,
-      overlap=args.overlap, gap=args.gap)
-
-    if chimeraString:
-
-      if args.getGenomicLocationsSmallRNA:
-
-        try:
-          smallRNALocation = smallRNAAnnotation.coordinates(
-                              smallRNARecord.refId,
-                              start=smallRNARecord.start,
-                              end=smallRNARecord.end)
-        except IndexError as e:
-          logger.error(e)
-          logger.error('Are you sure you have the correct annotation file for '
-                       'smallRNA?')
-          logger.error('Please check --smallRNAAnnotation -sa and '
-                       'and make sure they are matching with the alignment '
-                       'file {}'.format(args.smallRNA))
-          sys.exit()
-        output.writeSmallRNABed(queryName, smallRNALocation,
-                                targetRNARecord.refId)
-
-      if args.getGenomicLocationsTargetRNA:
-        try:
-          targetRNALocation = targetRNAAnnotation.coordinates(
-                              targetRNARecord.refId,
-                              start=targetRNARecord.start,
-                              end=targetRNARecord.end)
-        except IndexError as e:
-          logger.error(e.msg)
-          logger.error('Are you sure you have the correct annotation file for '
-                       'targetRNA?')
-          logger.error('Please check --targetRNAAnnotation -ta and '
-                       'and make sure they are matching with the alignment '
-                       'file {}'.format(args.targetRNA))
-          sys.exit()
-
-        output.writeTargetRNABed(queryName, targetRNALocation,
-                                 smallRNARecord.refId)
-
-      output.write(queryName, smallRNARecord, targetRNARecord)
-
-
-    if index % updateInterval == 0:
-      pbar.next()
-  pbar.finish()
-  logger.info('Determining chimeras finished')
+    """The main function to execute when you run the program as a script
+    """
+    parser = parseArguments()
+
+    args = parser.parse_args()
+
+    if args.logLevel == 'DEBUG':
+        logger = clashchimeras.log.debug_logger('root')
+    elif args.logLevel == 'WARNING':
+        logger = clashchimeras.log.warning_logger('root')
+    elif args.logLevel == 'ERROR':
+        logger = clashchimeras.log.error_logger('root')
+    else:
+        logger = clashchimeras.log.info_logger('root')
+
+    argCheck = Arguments(args, type='find')
+    argCheck.validateFind()
+
+    smallRNA = SAM(fileName=args.smallRNA)
+    smallRNA.read()
+
+    targetRNA = SAM(fileName=args.targetRNA)
+    targetRNA.read()
+
+    if args.getGenomicLocationsSmallRNA:
+
+        if args.smallRNAAnnotation.endswith('gff3'):
+
+            smallRNAAnnotation = GFF(fileName=args.smallRNAAnnotation)
+            smallRNAAnnotation.read()
+
+        elif args.smallRNAAnnotation.endswith('gtf'):
+            smallRNAAnnotation = GTF(fileName=args.smallRNAAnnotation)
+            if 'tRNAs' in args.smallRNAAnnotation:
+                smallRNAAnnotation.read(featureType='tRNAscan')
+            else:
+                smallRNAAnnotation.read()
+
+    if args.getGenomicLocationsTargetRNA:
+        if args.targetRNAAnnotation.endswith('gtf'):
+            targetRNAAnnotation = GTF(fileName=args.targetRNAAnnotation)
+            targetRNAAnnotation.read()
+
+    output = Output(target=args.output,
+                    smallRNABed=args.getGenomicLocationsSmallRNA,
+                    targetRNABed=args.getGenomicLocationsTargetRNA,
+                    overlap=args.overlap,
+                    gap=args.gap)
+
+    common = set(smallRNA.records.keys()).intersection(targetRNA.records.keys())
+
+    logger.info('Determining chimeras')
+
+    for index, queryName in enumerate(common, start=1):
+        smallRNARecord = smallRNA.access(queryName)
+        targetRNARecord = targetRNA.access(queryName)
+
+        chimeraString = clashchimeras.methods.chimeraOrNot(
+            smallRNARecord.cigarString, targetRNARecord.cigarString,
+            overlap=args.overlap, gap=args.gap)
+
+        if chimeraString:
+
+            if args.getGenomicLocationsSmallRNA:
+
+                try:
+                    smallRNALocation = smallRNAAnnotation.coordinates(
+                        smallRNARecord.refId,
+                        start=smallRNARecord.start,
+                        end=smallRNARecord.end)
+                except IndexError as e:
+                    logger.error(e)
+                    logger.error('Are you sure you have the correct annotation file for '
+                                 'smallRNA?')
+                    logger.error('Please check --smallRNAAnnotation -sa and '
+                                 'and make sure they are matching with the alignment '
+                                 'file {}'.format(args.smallRNA))
+                    sys.exit()
+                output.writeSmallRNABed(queryName, smallRNALocation,
+                                        targetRNARecord.refId)
+
+            if args.getGenomicLocationsTargetRNA:
+                try:
+                    targetRNALocation = targetRNAAnnotation.coordinates(
+                        targetRNARecord.refId,
+                        start=targetRNARecord.start,
+                        end=targetRNARecord.end)
+                except IndexError as e:
+                    logger.error(e.msg)
+                    logger.error('Are you sure you have the correct annotation file for '
+                                 'targetRNA?')
+                    logger.error('Please check --targetRNAAnnotation -ta and '
+                                 'and make sure they are matching with the alignment '
+                                 'file {}'.format(args.targetRNA))
+                    sys.exit()
+
+                output.writeTargetRNABed(queryName, targetRNALocation,
+                                         smallRNARecord.refId)
+
+            output.write(queryName, smallRNARecord, targetRNARecord)
+
+    logger.info('Determining chimeras finished')
 
 if __name__ == "__main__":
-   main()
+    main()
diff --git a/clashchimeras/initialize.py b/clashchimeras/initialize.py
index d25c58e..17e275f 100644
--- a/clashchimeras/initialize.py
+++ b/clashchimeras/initialize.py
@@ -23,729 +23,737 @@
 
 logger = logging.getLogger('root')
 
+
 class Releases:
-  def __init__(self, gencodeOrganism='H.sapiens', mirbaseOrganism='hsa',
-               path=os.path.join(os.path.expanduser("~"),'.CLASHchimeras')):
-    organisms = {'H.sapiens': 'http://www.gencodegenes.org/releases/',
-                 'M.musculus': 'http://www.gencodegenes.org/mouse_releases/'}
-    shortOrganisms = {'H.sapiens': 'human',
-                      'M.musculus': 'mouse'}
-    self.gencodeOrganism = gencodeOrganism
-    self.mirbaseOrganism = mirbaseOrganism
-    self.path = path
-    self.url = organisms[self.gencodeOrganism]
-    self.gencodeShortOrganism = shortOrganisms[self.gencodeOrganism]
-    self.mirbaseUrl = 'ftp://mirbase.org/pub/mirbase/CURRENT/README'
-    self.df = self.openUrl()
-    self.mirbaseDf = self.openMirbaseReadme()
-
-    self.selectGencodeRelease()
-
-    self.selectMirbaseRelease()
-
-    g = Gencode(release=self.gencodeRelease,
-                path=self.gencodePath,
-                ftpDir=self.gencodeFtpDir)
-
-    g.download()
-
-    m = Mirbase(release=self.mirbaseRelease,
-                path=self.mirbasePath,
-                ftpDir=self.mirbaseFtpDir,
-                organism=self.mirbaseOrganism)
-
-    m.download()
-
-    m.process()
-
-  def openUrl(self):
-    r = requests.get(self.url)
-    data = r.text
-    soup = BeautifulSoup(data)
-    table = soup.find("table")
-    headings = [th.get_text().strip() for th in table.find("tr").find_all("th")]
-    dataset = defaultdict(dict)
-    for index, row in enumerate(table.find_all("tr")[1:]):
-      for i, j in zip(headings,
-        (td.get_text().strip() for td in row.find_all("td"))):
-        dataset[index][i] = j
-
-    return pd.DataFrame(dataset).transpose()
-
-  def openMirbaseReadme(self):
-    with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
-      fobj = fH.open('pub/mirbase/CURRENT/README')
-      store = False
-      dataset = defaultdict(dict)
-      index = 0
-      for line in fobj.readlines():
-        if store:
-          row = line.strip().split()
-          if len(row) == 3 and row[1][0].isdigit():
-            dataset[index]['Version'] = row[0]
-            dataset[index]['Date'] = row[1]
-            dataset[index]['Entries'] = row[2]
-            index += 1
-        if 'HISTORY' in line:
-          store = True
-
-      return pd.DataFrame(dataset).transpose()
-
-  def downloadMirbaseRelease(self, key):
-    logger.warn('Are you sure??')
-    decision = input('Y/n: ')
-    if decision.lower() == 'y' or decision == '':
-
-      self.mirbaseRelease = self.mirbaseDf.loc[int(key)]['Version']
-
-      os.makedirs(os.path.join(self.path, 'Mirbase',
-                  str(self.mirbaseRelease)), exist_ok=True)
-
-      self.mirbasePath = os.path.join(self.path, 'Mirbase',
-                                      str(self.mirbaseRelease))
-
-      self.mirbaseFtpDir = os.path.join('pub/mirbase', self.mirbaseRelease)
-
-      return True
-
-  def downloadGencodeRelease(self, key):
-    logger.warn('Are you sure??')
-    decision = input('Y/n: ')
-    if decision.lower() == 'y' or decision == '':
-
-      self.gencodeRelease = self.df.loc[int(key)]['GENCODE release']
-
-      os.makedirs(os.path.join(self.path, 'Gencode', self.gencodeOrganism,
-                  str(self.gencodeRelease)), exist_ok=True)
-
-      self.gencodePath = os.path.join(self.path, 'Gencode',
-                                      self.gencodeOrganism,
-                                      str(self.gencodeRelease))
-
-      self.gencodeFtpDir = os.path.join('pub/gencode/Gencode_'+\
-                                        self.gencodeShortOrganism,
-                                        'release_'+self.gencodeRelease)
-
-      return True
-
-  def selectGencodeRelease(self):
-    cols = self.df.columns
-
-    logger.info("Select the release that you want \n{}".format(
-                tabulate(self.df[[cols[2], cols[0],
-                  cols[1], cols[3]]],
-                  headers=['Index', cols[2], cols[0], cols[1], cols[3]],
-                  tablefmt="simple")))
-
-    logger.warn('Please bare in mind that the automatic download relies on '
-                'regex search which is known to work for Gencode release 17 '
-                'and higher ')
-
-    releaseKeys = self.df.index
-
-    while True:
-      logger.warn('Please select which Gencode release to use (select index '+\
-                  '[{}]): '.format(min(self.df.index)))
-      key = input('Index: ')
-      if key.isdigit() and int(key) in releaseKeys:
-        logger.warn('This will download the Gencode release {} which '
-                    'corresponds to Ensembl release {} and Genome assembly '
-                    'version {} freezed on {}'.format(
-                    self.df.loc[int(key)]['GENCODE release'],
-                    self.df.loc[int(key)]['Ensembl release'],
-                    self.df.loc[int(key)]['Genome assembly version'],
-                    self.df.loc[int(key)]['Freeze date *']))
-        if self.downloadGencodeRelease(key):
-          break
-
-      elif key == '':
-        logger.warn('This will download the latest Gencode release {} which '
-                    'corresponds to Ensembl release {} and Genome assembly '
-                    'version {} freezed on {}'.format(
-                    self.df.loc[min(self.df.index)]['GENCODE release'],
-                    self.df.loc[min(self.df.index)]['Ensembl release'],
-                    self.df.loc[min(self.df.index)]['Genome assembly '
-                                                    'version'],
-                    self.df.loc[min(self.df.index)]['Freeze date *']))
-        if self.downloadGencodeRelease(min(self.df.index)):
-          break
-      else:
-        logger.warn('Not a valid release index:')
-
-  def selectMirbaseRelease(self):
-    cols = self.mirbaseDf.columns
-
-    logger.info("Select the Mirbase release you want \n{}".format(
-                tabulate(self.mirbaseDf, headers=['Index', cols[0], cols[1],
-                cols[2]], tablefmt="simple")))
-
-    releaseKeys = self.mirbaseDf.index
-
-    while True:
-      logger.warn('Please select which Mirbase release to use (select index '+\
-                  '[{}]): '.format(max(self.mirbaseDf.index)))
-      key = input('Index: ')
-      if key.isdigit() and int(key) in releaseKeys:
-        logger.warn('This will download miRBase release {} which contains {} '
-                    'entries and released on {}'.format(
-                    self.mirbaseDf.loc[int(key)]['Version'],
-                    self.mirbaseDf.loc[int(key)]['Entries'],
-                    self.mirbaseDf.loc[int(key)]['Date']))
-        if self.downloadMirbaseRelease(key):
-          break
-
-      elif key == '':
-        logger.warn('This will download miRBase release {} which contains {} '
-                    'entries and released on {}'.format(
-                    self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Version'],
-                    self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Entries'],
-                    self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Date']))
-        if self.downloadMirbaseRelease(max(self.mirbaseDf.index)):
-          break
-
-      else:
-        logger.warn('Not a valid release index:')
+
+    def __init__(self, gencodeOrganism='H.sapiens', mirbaseOrganism='hsa',
+                 path=os.path.join(os.path.expanduser("~"), '.CLASHchimeras')):
+        organisms = {'H.sapiens': 'http://www.gencodegenes.org/releases/',
+                     'M.musculus': 'http://www.gencodegenes.org/mouse_releases/'}
+        shortOrganisms = {'H.sapiens': 'human',
+                          'M.musculus': 'mouse'}
+        self.gencodeOrganism = gencodeOrganism
+        self.mirbaseOrganism = mirbaseOrganism
+        self.path = path
+        self.url = organisms[self.gencodeOrganism]
+        self.gencodeShortOrganism = shortOrganisms[self.gencodeOrganism]
+        self.mirbaseUrl = 'ftp://mirbase.org/pub/mirbase/CURRENT/README'
+        self.df = self.openUrl()
+        self.mirbaseDf = self.openMirbaseReadme()
+
+        self.selectGencodeRelease()
+
+        self.selectMirbaseRelease()
+
+        g = Gencode(release=self.gencodeRelease,
+                    path=self.gencodePath,
+                    ftpDir=self.gencodeFtpDir)
+
+        g.download()
+
+        m = Mirbase(release=self.mirbaseRelease,
+                    path=self.mirbasePath,
+                    ftpDir=self.mirbaseFtpDir,
+                    organism=self.mirbaseOrganism)
+
+        m.download()
+
+        m.process()
+
+    def openUrl(self):
+        r = requests.get(self.url)
+        data = r.text
+        soup = BeautifulSoup(data)
+        table = soup.find("table")
+        headings = [th.get_text().strip() for th in table.find("tr").find_all("th")]
+        dataset = defaultdict(dict)
+        for index, row in enumerate(table.find_all("tr")[1:]):
+            for i, j in zip(headings,
+                            (td.get_text().strip() for td in row.find_all("td"))):
+                dataset[index][i] = j
+
+        return pd.DataFrame(dataset).transpose()
+
+    def openMirbaseReadme(self):
+        with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
+            fobj = fH.open('pub/mirbase/CURRENT/README')
+            store = False
+            dataset = defaultdict(dict)
+            index = 0
+            for line in fobj.readlines():
+                if store:
+                    row = line.strip().split()
+                    if len(row) == 3 and row[1][0].isdigit():
+                        dataset[index]['Version'] = row[0]
+                        dataset[index]['Date'] = row[1]
+                        dataset[index]['Entries'] = row[2]
+                        index += 1
+                if 'HISTORY' in line:
+                    store = True
+
+            return pd.DataFrame(dataset).transpose()
+
+    def downloadMirbaseRelease(self, key):
+        logger.warn('Are you sure??')
+        decision = input('Y/n: ')
+        if decision.lower() == 'y' or decision == '':
+
+            self.mirbaseRelease = self.mirbaseDf.loc[int(key)]['Version']
+
+            os.makedirs(os.path.join(self.path, 'Mirbase',
+                                     str(self.mirbaseRelease)), exist_ok=True)
+
+            self.mirbasePath = os.path.join(self.path, 'Mirbase',
+                                            str(self.mirbaseRelease))
+
+            self.mirbaseFtpDir = os.path.join('pub/mirbase', self.mirbaseRelease)
+
+            return True
+
+    def downloadGencodeRelease(self, key):
+        logger.warn('Are you sure??')
+        decision = input('Y/n: ')
+        if decision.lower() == 'y' or decision == '':
+
+            self.gencodeRelease = self.df.loc[int(key)]['GENCODE release']
+
+            os.makedirs(os.path.join(self.path, 'Gencode', self.gencodeOrganism,
+                                     str(self.gencodeRelease)), exist_ok=True)
+
+            self.gencodePath = os.path.join(self.path, 'Gencode',
+                                            self.gencodeOrganism,
+                                            str(self.gencodeRelease))
+
+            self.gencodeFtpDir = os.path.join('pub/gencode/Gencode_' +
+                                              self.gencodeShortOrganism,
+                                              'release_' + self.gencodeRelease)
+
+            return True
+
+    def selectGencodeRelease(self):
+        cols = self.df.columns
+
+        logger.info("Select the release that you want \n{}".format(
+                    tabulate(self.df[[cols[2], cols[0],
+                                      cols[1], cols[3]]],
+                             headers=['Index', cols[2], cols[0], cols[1], cols[3]],
+                             tablefmt="simple")))
+
+        logger.warn('Please bare in mind that the automatic download relies on '
+                    'regex search which is known to work for Gencode release 17 '
+                    'and higher ')
+
+        releaseKeys = self.df.index
+
+        while True:
+            logger.warn('Please select which Gencode release to use (select index ' +
+                        '[{}]): '.format(min(self.df.index)))
+            key = input('Index: ')
+            if key.isdigit() and int(key) in releaseKeys:
+                logger.warn('This will download the Gencode release {} which '
+                            'corresponds to Ensembl release {} and Genome assembly '
+                            'version {} freezed on {}'.format(
+                                self.df.loc[int(key)]['GENCODE release'],
+                                self.df.loc[int(key)]['Ensembl release'],
+                                self.df.loc[int(key)]['Genome assembly version'],
+                                self.df.loc[int(key)]['Freeze date *']))
+                if self.downloadGencodeRelease(key):
+                    break
+
+            elif key == '':
+                logger.warn('This will download the latest Gencode release {} which '
+                            'corresponds to Ensembl release {} and Genome assembly '
+                            'version {} freezed on {}'.format(
+                                self.df.loc[min(self.df.index)]['GENCODE release'],
+                                self.df.loc[min(self.df.index)]['Ensembl release'],
+                                self.df.loc[min(self.df.index)]['Genome assembly '
+                                                                'version'],
+                                self.df.loc[min(self.df.index)]['Freeze date *']))
+                if self.downloadGencodeRelease(min(self.df.index)):
+                    break
+            else:
+                logger.warn('Not a valid release index:')
+
+    def selectMirbaseRelease(self):
+        cols = self.mirbaseDf.columns
+
+        logger.info("Select the Mirbase release you want \n{}".format(
+                    tabulate(self.mirbaseDf, headers=['Index', cols[0], cols[1],
+                                                      cols[2]], tablefmt="simple")))
+
+        releaseKeys = self.mirbaseDf.index
+
+        while True:
+            logger.warn('Please select which Mirbase release to use (select index ' +
+                        '[{}]): '.format(max(self.mirbaseDf.index)))
+            key = input('Index: ')
+            if key.isdigit() and int(key) in releaseKeys:
+                logger.warn('This will download miRBase release {} which contains {} '
+                            'entries and released on {}'.format(
+                                self.mirbaseDf.loc[int(key)]['Version'],
+                                self.mirbaseDf.loc[int(key)]['Entries'],
+                                self.mirbaseDf.loc[int(key)]['Date']))
+                if self.downloadMirbaseRelease(key):
+                    break
+
+            elif key == '':
+                logger.warn('This will download miRBase release {} which contains {} '
+                            'entries and released on {}'.format(
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Version'],
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Entries'],
+                                self.mirbaseDf.loc[max(self.mirbaseDf.index)]['Date']))
+                if self.downloadMirbaseRelease(max(self.mirbaseDf.index)):
+                    break
+
+            else:
+                logger.warn('Not a valid release index:')
+
 
 class Arguments:
-  def __init__(self, args, type=None):
-    self.args = args
-    if type == 'align':
-      if args.genomeIndex:
-        self.args.genomeIndex = os.path.expanduser(args.genomeIndex)
-      if args.transcriptomeIndex:
-        self.args.transcriptomeIndex = os.path.expanduser(
-          args.transcriptomeIndex)
-      if args.input:
-        self.args.input = os.path.expanduser(args.input)
-      if args.smallRNAIndex:
-        self.args.smallRNAIndex = os.path.expanduser(args.smallRNAIndex)
-      if self.args.targetRNAIndex:
-        self.args.targetRNAIndex = os.path.expanduser(args.targetRNAIndex)
-
-    if args.output:
-      self.args.output = os.path.expanduser(args.output)
-
-    if type == 'find':
-      if args.smallRNAAnnotation:
-        self.args.smallRNAAnnotation = os.path.expanduser(
-          args.smallRNAAnnotation)
-      if args.targetRNAAnnotation:
-        self.args.targetRNAAnnotation = os.path.expanduser(
-          args.targetRNAAnnotation)
-      if args.smallRNA:
-        self.args.smallRNA = os.path.expanduser(args.smallRNA)
-      if args.targetRNA:
-        self.args.targetRNA = os.path.expanduser(args.targetRNA)
-
-    self.exit = False
-
-  def hash(self, file):
-    sha256 = hashlib.sha256()
-    with open(file, 'r+b') as f:
-      while True:
-        buf = f.read(8192)
-        if not buf:
-          break
-        sha256.update(buf)
-    return sha256.hexdigest()
-
-  def validateFind(self):
-    if not os.path.exists(self.args.smallRNA):
-      logger.error('{} not found...'.format(self.args.smallRNA))
-      self.exit = True
-    if not os.path.exists(self.args.targetRNA):
-      logger.error('{} not found...'.format(self.args.smallRNA))
-      self.exit = True
-    if self.args.getGenomicLocationsSmallRNA:
-      if not self.args.smallRNAAnnotation:
-        logger.error('--smallRNAAnnotation -sa not specified. If you want '
-                     'genomic locations for smallRNA, please enable '
-                     '--getGenomicLocationsSmallRNA -ggs and specify '
-                     'annotation file using --smallRNAAnnotation -sa')
-        self.exit = True
-      else:
-        if not os.path.exists(self.args.smallRNAAnnotation):
-          logger.error('{} not found'.format(self.args.smallRNAAnnotation))
-          self.exit = True
-    if self.args.getGenomicLocationsTargetRNA:
-      if not self.args.targetRNAAnnotation:
-        logger.error('--targetRNAAnnotation -ta not specified. If you want '
-                     'genomic locations for targetRNA, please enable '
-                     '--getGenomicLocationsTargetRNA -ggt and specify '
-                     'annotation file using --targetRNAAnnotation -ta')
-        self.exit = True
-      else:
-        if not os.path.exists(self.args.targetRNAAnnotation):
-          logger.error('{} not found...'.format(self.args.targetRNAAnnotation))
-          self.exit = True
-    if self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
-      logger.error('CLASHChimeras does not detect chimeras between the same '
-                   'RNA type yet... Please hang in there, we are planning it '
-                   'in the feature')
-      self.exit = True
-
-    if self.exit:
-      logger.error('Exiting because of the above errors...')
-      sys.exit()
-    elif self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
-      logger.error('CLASHChimeras does not detect chimeras between the same '
-                   'RNA type yet... Please hang in there, we are planning it '
-                   'in the feature')
-      sys.exit()
-
-  def validateAlign(self):
-    if not os.path.exists(self.args.input):
-      logger.error('{} not found...'.format(self.args.input))
-      self.exit = True
-    if self.args.run == 'bowtie2' and (not self.args.smallRNAIndex or \
-      not self.args.targetRNAIndex):
-      logger.error('{}'.format('Please specify --smallRNAIndex -si and '+\
-                                 '--targetRNAIndex -ti properly...'))
-      self.exit = True
-    if self.args.smallRNAIndex:
-      bts = glob.glob(self.args.smallRNAIndex+'*.bt2')
-      if not len(bts) >= 6:
-        logger.error('Something wrong with the {}'.format(
-          self.args.smallRNAIndex))
-        logger.error("Can't find all the .bt2 files for that index")
-        self.exit=True
-    if self.args.targetRNAIndex:
-      bts = glob.glob(self.args.targetRNAIndex+'*.bt2')
-      if not len(bts) >= 6:
-        logger.error('Something wrong with the {}'.format(
-          self.args.targetRNAIndex))
-        logger.error("Can't find all the .bt2 files for that index")
-        self.exit=True
-    if self.args.genomeIndex:
-      bts = glob.glob(self.args.genomeIndex.strip()+'*.bt2')
-      if not len(bts) >= 6:
-        logger.error('Something wrong with the {}'.format(
-          self.args.genomeIndex))
-        logger.error("Can't find all the .bt2 files for that index")
-        self.exit=True
-    if self.args.run == 'tophat' and not self.args.genomeIndex:
-      logger.error('Please provide genome index if you want to use tophat..')
-      self.exit = True
-    if self.exit:
-      sys.exit()
+
+    def __init__(self, args, type=None):
+        self.args = args
+        if type == 'align':
+            if args.genomeIndex:
+                self.args.genomeIndex = os.path.expanduser(args.genomeIndex)
+            if args.transcriptomeIndex:
+                self.args.transcriptomeIndex = os.path.expanduser(
+                    args.transcriptomeIndex)
+            if args.input:
+                self.args.input = os.path.expanduser(args.input)
+            if args.smallRNAIndex:
+                self.args.smallRNAIndex = os.path.expanduser(args.smallRNAIndex)
+            if self.args.targetRNAIndex:
+                self.args.targetRNAIndex = os.path.expanduser(args.targetRNAIndex)
+
+        if args.output:
+            self.args.output = os.path.expanduser(args.output)
+
+        if type == 'find':
+            if args.smallRNAAnnotation:
+                self.args.smallRNAAnnotation = os.path.expanduser(
+                    args.smallRNAAnnotation)
+            if args.targetRNAAnnotation:
+                self.args.targetRNAAnnotation = os.path.expanduser(
+                    args.targetRNAAnnotation)
+            if args.smallRNA:
+                self.args.smallRNA = os.path.expanduser(args.smallRNA)
+            if args.targetRNA:
+                self.args.targetRNA = os.path.expanduser(args.targetRNA)
+
+        self.exit = False
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def validateFind(self):
+        if not os.path.exists(self.args.smallRNA):
+            logger.error('{} not found...'.format(self.args.smallRNA))
+            self.exit = True
+        if not os.path.exists(self.args.targetRNA):
+            logger.error('{} not found...'.format(self.args.smallRNA))
+            self.exit = True
+        if self.args.getGenomicLocationsSmallRNA:
+            if not self.args.smallRNAAnnotation:
+                logger.error('--smallRNAAnnotation -sa not specified. If you want '
+                             'genomic locations for smallRNA, please enable '
+                             '--getGenomicLocationsSmallRNA -ggs and specify '
+                             'annotation file using --smallRNAAnnotation -sa')
+                self.exit = True
+            else:
+                if not os.path.exists(self.args.smallRNAAnnotation):
+                    logger.error('{} not found'.format(self.args.smallRNAAnnotation))
+                    self.exit = True
+        if self.args.getGenomicLocationsTargetRNA:
+            if not self.args.targetRNAAnnotation:
+                logger.error('--targetRNAAnnotation -ta not specified. If you want '
+                             'genomic locations for targetRNA, please enable '
+                             '--getGenomicLocationsTargetRNA -ggt and specify '
+                             'annotation file using --targetRNAAnnotation -ta')
+                self.exit = True
+            else:
+                if not os.path.exists(self.args.targetRNAAnnotation):
+                    logger.error('{} not found...'.format(self.args.targetRNAAnnotation))
+                    self.exit = True
+        if self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
+            logger.error('CLASHChimeras does not detect chimeras between the same '
+                         'RNA type yet... Please hang in there, we are planning it '
+                         'in the feature')
+            self.exit = True
+
+        if self.exit:
+            logger.error('Exiting because of the above errors...')
+            sys.exit()
+        elif self.hash(self.args.smallRNA) == self.hash(self.args.targetRNA):
+            logger.error('CLASHChimeras does not detect chimeras between the same '
+                         'RNA type yet... Please hang in there, we are planning it '
+                         'in the feature')
+            sys.exit()
+
+    def validateAlign(self):
+        if not os.path.exists(self.args.input):
+            logger.error('{} not found...'.format(self.args.input))
+            self.exit = True
+        if self.args.run == 'bowtie2' and (not self.args.smallRNAIndex or
+                                           not self.args.targetRNAIndex):
+            logger.error('{}'.format('Please specify --smallRNAIndex -si and ' +
+                                     '--targetRNAIndex -ti properly...'))
+            self.exit = True
+        if self.args.smallRNAIndex:
+            bts = glob.glob(self.args.smallRNAIndex + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.smallRNAIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.targetRNAIndex:
+            bts = glob.glob(self.args.targetRNAIndex + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.targetRNAIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.genomeIndex:
+            bts = glob.glob(self.args.genomeIndex.strip() + '*.bt2')
+            if not len(bts) >= 6:
+                logger.error('Something wrong with the {}'.format(
+                    self.args.genomeIndex))
+                logger.error("Can't find all the .bt2 files for that index")
+                self.exit = True
+        if self.args.run == 'tophat' and not self.args.genomeIndex:
+            logger.error('Please provide genome index if you want to use tophat..')
+            self.exit = True
+        if self.exit:
+            sys.exit()
+
 
 class Gencode:
-  """This class makes sure the required files (sequences, annotation etc.) are
-  present and in working order. It generates sha256 checksums and autodownloads
-  the required files from Gencode Genes.
-  """
-  def __init__(self, release=None, user="anonymous", password=None,
-    path=None, ftpDir=None):
-
-    self.downloaded = []
-    self.release = release
-    self.reList = ['^GRC.+\.genome\.fa',
-                  '^gencode.+chr_patch_hapl_scaff\.annotation\.gtf',
-                  '^gencode.+pc_transcripts\.fa',
-                  '^gencode.+tRNAs\.gtf',
-                  '^gencode.+lncRNA_transcripts\.fa']
-
-    self.host = "ftp.sanger.ac.uk"
-    self.user = user
-    self.path = path
-
-    self.files = []
-    self.otherFiles = {}
-    biotypes = ['snRNA', 'snoRNA', 'misc_RNA', 'tRNA']
-    for b in biotypes:
-      self.otherFiles[b] = 'gencode.v{}.{}_transcripts.fa'.format(self.release,
-                                                                  b)
-    if self.user == "anonymous": self.password = "anonymous"
-    else: self.password = password
-
-    ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
-
-    self.dir = ftpDir
-    fileList = ftp_host.listdir(self.dir)
-    for file in fileList:
-      for rEx in self.reList:
-        if re.match(rEx, file):
-          if not 'primary_assembly' in file:
-            self.files.append(file)
-    ftp_host.close()
-    _files = self.files.copy()
-    _otherFiles = self.otherFiles.copy()
-
-    for file in _files:
-      logger.debug('Checking %s' % file)
-      _file = file.rpartition(".")[0]
-      if os.path.exists(os.path.join(self.path, _file+'.sha256sum')) and \
-        os.path.exists(os.path.join(self.path, _file)):
-        with open(os.path.join(self.path, _file+'.sha256sum')) as f:
-          s = f.readline().rstrip()
-        _s = self.hash(os.path.join(self.path, _file))
-        if s == _s:
-          logger.info("%s is present and verified" % _file)
-          self.files.remove(file)
+    """This class makes sure the required files (sequences, annotation etc.) are
+    present and in working order. It generates sha256 checksums and autodownloads
+    the required files from Gencode Genes.
+    """
+
+    def __init__(self, release=None, user="anonymous", password=None,
+                 path=None, ftpDir=None):
+
+        self.downloaded = []
+        self.release = release
+        self.reList = ['^GRC.+\.genome\.fa',
+                       '^gencode.+chr_patch_hapl_scaff\.annotation\.gtf',
+                       '^gencode.+pc_transcripts\.fa',
+                       '^gencode.+tRNAs\.gtf',
+                       '^gencode.+lncRNA_transcripts\.fa']
+
+        self.host = "ftp.sanger.ac.uk"
+        self.user = user
+        self.path = path
+
+        self.files = []
+        self.otherFiles = {}
+        biotypes = ['snRNA', 'snoRNA', 'misc_RNA', 'tRNA']
+        for b in biotypes:
+            self.otherFiles[b] = 'gencode.v{}.{}_transcripts.fa'.format(self.release,
+                                                                        b)
+        if self.user == "anonymous":
+            self.password = "anonymous"
+        else:
+            self.password = password
+
+        ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
+
+        self.dir = ftpDir
+        fileList = ftp_host.listdir(self.dir)
+        for file in fileList:
+            for rEx in self.reList:
+                if re.match(rEx, file):
+                    if not 'primary_assembly' in file:
+                        self.files.append(file)
+        ftp_host.close()
+        _files = self.files.copy()
+        _otherFiles = self.otherFiles.copy()
+
+        for file in _files:
+            logger.debug('Checking %s' % file)
+            _file = file.rpartition(".")[0]
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % _file)
+                    self.files.remove(file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % _file)
+                    logger.error('Will be downloaded again')
+            else:
+                logger.warn('%s will be downloaded' % _file)
+
+        for biotype, file in _otherFiles.items():
+            logger.debug('Checking %s' % file)
+            _file = file
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                    self.otherFiles.pop(biotype)
+                else:
+                    logger.warn('%s is generated but doesnt match the sha256sum' % file)
+                    logger.error('Will be generated again')
+            else:
+                logger.warn('%s will be generated' % file)
+
+    def download(self):
+
+        if len(self.files) == 0:
+            logger.info('Gencode files are downloaded and checksum verified...')
+        else:
+            with ftputil.FTPHost(self.host, self.user, self.password) as fH:
+                for file in self.files:
+                    logger.info('Downloading %s from ftp.sanger.ac.uk' % file)
+                    fH.download(self.dir + '/' + file, os.path.join(self.path, file),
+                                callback=None)
+                    self.downloaded.append(file)
+                    _file = file.rpartition(".")[0]
+                    p = subprocess.Popen(['gunzip', os.path.join(self.path, file)])
+                    p.communicate()
+                    sha256sum = self.hash(os.path.join(self.path, _file))
+                    with open(os.path.join(self.path, _file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Downloading, extraction and hashing of %s finished' %
+                                file)
+
+        gtfFiles = glob.glob(os.path.join(self.path, "*.annotation.gtf"))
+
+        if len(gtfFiles) == 0:
+            logger.warn('This release does not contain annotation file or they are '
+                        'not grabbed by regex. Please download it manually from {'
+                        '}'.format(self.dir))
+            gtfFile = None
+        elif len(gtfFiles) == 2:
+            gtfFiles.sort()
+            gtfFile = gtfFiles[1]
+        elif len(gtfFiles) == 1:
+            gtfFile = gtfFiles[0]
+
+        genomeFiles = glob.glob(os.path.join(self.path, "*.genome.fa"))
+
+        if len(genomeFiles) == 0:
+            logger.warn('This release does not contain genome file or they are not '
+                        'grabbed by regex. Please download it manually from {'
+                        '}'.format(self.dir))
+            genomeFile = None
+        else:
+            genomeFile = genomeFiles[0]
+
+        tRNAgtfFiles = glob.glob(os.path.join(self.path, "*.tRNAs.gtf"))
+
+        if len(tRNAgtfFiles) == 0:
+            logger.warn(('This release does not contain tRNA annotation file or they '
+                         'are not grabbed by regex. Please download it manually from {'
+                         '}'.format(self.dir)))
+            tRNAgtfFile = None
         else:
-          logger.warn('%s is downloaded but doesnt match the sha256sum' % _file)
-          logger.error('Will be downloaded again')
-      else:
-        logger.warn('%s will be downloaded' % _file)
-
-    for biotype, file in _otherFiles.items():
-      logger.debug('Checking %s' % file)
-      _file = file
-      if os.path.exists(os.path.join(self.path, _file+'.sha256sum')) and \
-        os.path.exists(os.path.join(self.path, _file)):
-        with open(os.path.join(self.path, _file+'.sha256sum')) as f:
-          s = f.readline().rstrip()
-        _s = self.hash(os.path.join(self.path, _file))
-        if s == _s:
-          logger.info("%s is present and verified" % file)
-          self.otherFiles.pop(biotype)
+            tRNAgtfFile = tRNAgtfFiles[0]
+
+        if len(self.otherFiles) == 0:
+            logger.info('Other fasta files are generated and checksum verified...')
         else:
-          logger.warn('%s is generated but doesnt match the sha256sum' % file)
-          logger.error('Will be generated again')
-      else:
-        logger.warn('%s will be generated' % file)
-
-
-  def download(self):
-
-    if len(self.files) == 0:
-      logger.info('Gencode files are downloaded and checksum verified...')
-    else:
-      with ftputil.FTPHost(self.host, self.user, self.password) as fH:
-        for file in self.files:
-          logger.info('Downloading %s from ftp.sanger.ac.uk' % file)
-          fH.download(self.dir+'/'+file, os.path.join(self.path, file),
-            callback=None)
-          self.downloaded.append(file)
-          _file = file.rpartition(".")[0]
-          p = subprocess.Popen(['gunzip', os.path.join(self.path, file)])
-          p.communicate()
-          sha256sum = self.hash(os.path.join(self.path, _file))
-          with open(os.path.join(self.path, _file+'.sha256sum'), 'w') as wH:
-            print(sha256sum, file=wH)
-          logger.info('Downloading, extraction and hashing of %s finished' % \
-            file)
-
-    gtfFiles = glob.glob(os.path.join(self.path, "*.annotation.gtf"))
-
-    if len(gtfFiles) == 0:
-      logger.warn('This release does not contain annotation file or they are '
-                  'not grabbed by regex. Please download it manually from {'
-                  '}'.format(self.dir))
-      gtfFile = None
-    elif len(gtfFiles) == 2:
-      gtfFiles.sort()
-      gtfFile = gtfFiles[1]
-    elif len(gtfFiles) == 1:
-      gtfFile = gtfFiles[0]
-
-    genomeFiles = glob.glob(os.path.join(self.path, "*.genome.fa"))
-
-    if len(genomeFiles) == 0:
-      logger.warn('This release does not contain genome file or they are not '
-                  'grabbed by regex. Please download it manually from {'
-                  '}'.format(self.dir))
-      genomeFile = None
-    else:
-      genomeFile = genomeFiles[0]
-
-
-    tRNAgtfFiles = glob.glob(os.path.join(self.path, "*.tRNAs.gtf"))
-
-    if len(tRNAgtfFiles) == 0:
-      logger.warn(('This release does not contain tRNA annotation file or they '
-                   'are not grabbed by regex. Please download it manually from {'
-                  '}'.format(self.dir)))
-      tRNAgtfFile = None
-    else:
-      tRNAgtfFile = tRNAgtfFiles[0]
-
-
-    if len(self.otherFiles) == 0:
-      logger.info('Other fasta files are generated and checksum verified...')
-    else:
-      for biotype, file in self.otherFiles.items():
-        if biotype == 'tRNA' and (tRNAgtfFile and genomeFile):
-          fasta = Fasta(genome=genomeFile, gtf=tRNAgtfFile)
-          fasta.getBiotype(biotype='tRNA', output=os.path.join(self.path,
-                                                                   file))
-          sha256sum = self.hash(os.path.join(self.path, file))
-          with open(os.path.join(self.path, file+'.sha256sum'), 'w') as wH:
-            print(sha256sum, file=wH)
-          logger.info('Extraction and hashing of %s finished' % file)
-        elif gtfFile and genomeFile:
-          fasta = Fasta(genome=genomeFile, gtf=gtfFile)
-          fasta.getBiotype(biotype=biotype, output=os.path.join(self.path,
-                                                                file))
-          sha256sum = self.hash(os.path.join(self.path, file))
-          with open(os.path.join(self.path, file+'.sha256sum'), 'w') as wH:
-            print(sha256sum, file=wH)
-          logger.info('Extraction and hashing of %s finished' % file)
-
-
-  def hash(self, file):
-    sha256 = hashlib.sha256()
-    with open(file, 'r+b') as f:
-      while True:
-        buf = f.read(8192)
-        if not buf:
-          break
-        sha256.update(buf)
-    return sha256.hexdigest()
+            for biotype, file in self.otherFiles.items():
+                if biotype == 'tRNA' and (tRNAgtfFile and genomeFile):
+                    fasta = Fasta(genome=genomeFile, gtf=tRNAgtfFile)
+                    fasta.getBiotype(biotype='tRNA', output=os.path.join(self.path,
+                                                                         file))
+                    sha256sum = self.hash(os.path.join(self.path, file))
+                    with open(os.path.join(self.path, file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Extraction and hashing of %s finished' % file)
+                elif gtfFile and genomeFile:
+                    fasta = Fasta(genome=genomeFile, gtf=gtfFile)
+                    fasta.getBiotype(biotype=biotype, output=os.path.join(self.path,
+                                                                          file))
+                    sha256sum = self.hash(os.path.join(self.path, file))
+                    with open(os.path.join(self.path, file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.info('Extraction and hashing of %s finished' % file)
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
 
 class Mirbase:
-  """This class makes sure that requried files (sequences, annotations etc.)
-  from Mirbase are downloaded. It generates sha256 checksums and autodownloads
-  the required files
-  """
-
-  def __init__(self, release=None, user="anonymous", password=None,
-    path=os.path.join(os.path.expanduser("~"),'.CLASHchimeras'),
-    organism='hsa',
-    ftpDir=None):
-    self.reList = ['^hairpin\.fa\.gz', '^mature\.fa\.gz']
-    organisms = {}
-    with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
-      tH, tP = tempfile.mkstemp()
-      fH.download(os.path.join('pub/mirbase', release, 'organisms.txt.gz'), tP)
-      for i in gzip.open(tP):
-        ii = i.decode('utf-8').strip()
-        if not ii.startswith('#'):
-          organisms[ii.split("\t")[0]] = ii.split("\t")[2]
-
-    self.shortOrganism = organism
-    self.organism = organisms.get(self.shortOrganism, None)
-    if not self.organism:
-      logger.error('{} is not a valid organism name in Mirbase'.format(
-        self.shortOrganism))
-      sys.exit()
-    self.host = 'mirbase.org'
-    self.release = release
-    self.user = user
-    self.path = path
-    self.files = []
-
-    if self.user == "anonymous": self.password = "anonymous"
-    else: self.password = password
-
-    ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
-
-    self.dir = ftpDir
-
-    fileList = ftp_host.listdir(self.dir)
-
-    self.files.append('genomes/'+self.shortOrganism+'.gff3')
-    for file in fileList:
-      for rEx in self.reList:
-        if re.match(rEx, file):
-          self.files.append(file)
-    ftp_host.close()
-    _files = self.files.copy()
-
-    for file in _files:
-      logger.debug('Checking %s' % file)
-      if file.endswith('gz'):
-        _file = file.rpartition(".")[0]
-      elif '/' in file:
-        _file = file.rpartition("/")[2]
-      if os.path.exists(os.path.join(self.path, _file+'.sha256sum')) and \
-        os.path.exists(os.path.join(self.path, _file)):
-        with open(os.path.join(self.path, _file+'.sha256sum')) as f:
-          s = f.readline().rstrip()
-        _s = self.hash(os.path.join(self.path, _file))
-        if s == _s:
-          logger.info("%s is present and verified" % file)
-          self.files.remove(file)
+    """This class makes sure that requried files (sequences, annotations etc.)
+    from Mirbase are downloaded. It generates sha256 checksums and autodownloads
+    the required files
+    """
+
+    def __init__(self, release=None, user="anonymous", password=None,
+                 path=os.path.join(os.path.expanduser("~"), '.CLASHchimeras'),
+                 organism='hsa',
+                 ftpDir=None):
+        self.reList = ['^hairpin\.fa\.gz', '^mature\.fa\.gz']
+        organisms = {}
+        with ftputil.FTPHost('mirbase.org', 'anonymous', 'anonymous') as fH:
+            tH, tP = tempfile.mkstemp()
+            fH.download(os.path.join('pub/mirbase', release, 'organisms.txt.gz'), tP)
+            for i in gzip.open(tP):
+                ii = i.decode('utf-8').strip()
+                if not ii.startswith('#'):
+                    organisms[ii.split("\t")[0]] = ii.split("\t")[2]
+
+        self.shortOrganism = organism
+        self.organism = organisms.get(self.shortOrganism, None)
+        if not self.organism:
+            logger.error('{} is not a valid organism name in Mirbase'.format(
+                self.shortOrganism))
+            sys.exit()
+        self.host = 'mirbase.org'
+        self.release = release
+        self.user = user
+        self.path = path
+        self.files = []
+
+        if self.user == "anonymous":
+            self.password = "anonymous"
         else:
-          logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
-          logger.error('Must run download method')
-      else:
-        logger.warn('%s will be downloaded when you run download method' % file)
-
-
-  def download(self):
-
-    if len(self.files) == 0:
-      logger.debug('Mirbase files are downloaded and checksum verified...')
-    else:
-      with ftputil.FTPHost(self.host, self.user, self.password) as fH:
-        for file in self.files:
-          logger.info('Downloading %s from mirbase.org' % file)
-          if "/" in file:
-            _file = file.rpartition("/")[2]
-          else:
-            _file = file
-          fH.download(self.dir+'/'+file, os.path.join(self.path, _file),
-            callback=None)
-          if _file.endswith('.gz'):
-            __file = _file.rpartition(".")[0]
-            p = subprocess.Popen(['gunzip', os.path.join(self.path, __file)])
-            p.communicate()
-          else:
-            __file = _file
-          sha256sum = self.hash(os.path.join(self.path, __file))
-          with open(os.path.join(self.path, __file+'.sha256sum'), 'w') as wH:
-            print(sha256sum, file=wH)
-          logger.debug('Downloading, extraction and hashing of %s finished' % \
-            file)
-      self.files = []
-
-  def hash(self, file):
-    sha256 = hashlib.sha256()
-    with open(file, 'r+b') as f:
-      while True:
-        buf = f.read(8192)
-        if not buf:
-          break
-        sha256.update(buf)
-    return sha256.hexdigest()
-
-  def process(self):
-    files = ['hairpin.fa', 'mature.fa']
-    for file in files:
-      temp = []
-      if os.path.exists(os.path.join(self.path, self.shortOrganism+'-'+file)) \
-       and os.path.exists(os.path.join(self.path,
-       self.shortOrganism+'-'+file+'.sha256sum')):
-        with open(os.path.join(self.path, file+'.sha256sum')) as f:
-          s = f.readline().rstrip()
-        _s = self.hash(os.path.join(self.path, file))
-        if s == _s:
-          logger.info("%s is present and verified" % file)
+            self.password = password
+
+        ftp_host = ftputil.FTPHost(self.host, self.user, self.password)
+
+        self.dir = ftpDir
+
+        fileList = ftp_host.listdir(self.dir)
+
+        self.files.append('genomes/' + self.shortOrganism + '.gff3')
+        for file in fileList:
+            for rEx in self.reList:
+                if re.match(rEx, file):
+                    self.files.append(file)
+        ftp_host.close()
+        _files = self.files.copy()
+
+        for file in _files:
+            logger.debug('Checking %s' % file)
+            if file.endswith('gz'):
+                _file = file.rpartition(".")[0]
+            elif '/' in file:
+                _file = file.rpartition("/")[2]
+            if os.path.exists(os.path.join(self.path, _file + '.sha256sum')) and \
+                    os.path.exists(os.path.join(self.path, _file)):
+                with open(os.path.join(self.path, _file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, _file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                    self.files.remove(file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
+                    logger.error('Must run download method')
+            else:
+                logger.warn('%s will be downloaded when you run download method' % file)
+
+    def download(self):
+
+        if len(self.files) == 0:
+            logger.debug('Mirbase files are downloaded and checksum verified...')
         else:
-          logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
-      else:
-        logger.debug('Extrating %s sequences from %s' % (self.organism, file))
-        with open(os.path.join(self.path, file)) as iH:
-          for rec in SeqIO.parse(iH, 'fasta'):
-            if self.organism in rec.description:
-              _temp_rec = SeqRecord(id=rec.id, description=rec.description,
-                seq=rec.seq.back_transcribe())
-              temp.append(_temp_rec)
-        SeqIO.write(temp, os.path.join(self.path, self.shortOrganism+'-'+file),
-          'fasta')
-        with open(os.path.join(self.path,
-          self.shortOrganism+'-'+file+'.sha256sum'), 'w') as wH:
-          print(self.hash(os.path.join(self.path,
-            self.shortOrganism+'-'+file+'.sha256sum')), file=wH)
+            with ftputil.FTPHost(self.host, self.user, self.password) as fH:
+                for file in self.files:
+                    logger.info('Downloading %s from mirbase.org' % file)
+                    if "/" in file:
+                        _file = file.rpartition("/")[2]
+                    else:
+                        _file = file
+                    fH.download(self.dir + '/' + file, os.path.join(self.path, _file),
+                                callback=None)
+                    if _file.endswith('.gz'):
+                        __file = _file.rpartition(".")[0]
+                        p = subprocess.Popen(['gunzip', os.path.join(self.path, __file)])
+                        p.communicate()
+                    else:
+                        __file = _file
+                    sha256sum = self.hash(os.path.join(self.path, __file))
+                    with open(os.path.join(self.path, __file + '.sha256sum'), 'w') as wH:
+                        print(sha256sum, file=wH)
+                    logger.debug('Downloading, extraction and hashing of %s finished' %
+                                 file)
+            self.files = []
+
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def process(self):
+        files = ['hairpin.fa', 'mature.fa']
+        for file in files:
+            temp = []
+            if os.path.exists(os.path.join(self.path, self.shortOrganism + '-' + file)) \
+                and os.path.exists(os.path.join(self.path,
+                                                self.shortOrganism + '-' + file + '.sha256sum')):
+                with open(os.path.join(self.path, file + '.sha256sum')) as f:
+                    s = f.readline().rstrip()
+                _s = self.hash(os.path.join(self.path, file))
+                if s == _s:
+                    logger.info("%s is present and verified" % file)
+                else:
+                    logger.warn('%s is downloaded but doesnt match the sha256sum' % file)
+            else:
+                logger.debug('Extrating %s sequences from %s' % (self.organism, file))
+                with open(os.path.join(self.path, file)) as iH:
+                    for rec in SeqIO.parse(iH, 'fasta'):
+                        if self.organism in rec.description:
+                            _temp_rec = SeqRecord(id=rec.id, description=rec.description,
+                                                  seq=rec.seq.back_transcribe())
+                            temp.append(_temp_rec)
+                SeqIO.write(temp, os.path.join(self.path, self.shortOrganism + '-' + file),
+                            'fasta')
+                with open(os.path.join(self.path,
+                                       self.shortOrganism + '-' + file + '.sha256sum'), 'w') as wH:
+                    print(self.hash(os.path.join(self.path,
+                                                 self.shortOrganism + '-' + file + '.sha256sum')), file=wH)
+
 
 class Index:
 
-  def __init__(self, root=os.path.join(os.path.expanduser("~"),
-    '.CLASHchimeras'), bowtieExecutable=None, tophatExecutable=None):
-
-    self.created = []
-
-    if bowtieExecutable is not None:
-      logger.info('Bowtie2 path {} provided and will be used'\
-        .format(bowtieExecutable))
-      self.bowtieExecutable = bowtieExecutable
-    else:
-      try:
-        bowtieVersion = float(subprocess.check_output(['bowtie2', '--version']
-          ).decode('utf-8').split("\n")[0].split()[-1].rpartition(".")[0])
-        fullBowtieVersion = subprocess.check_output(['bowtie2', '--version']
-          ).decode('utf-8').split("\n")[0].split()[-1]
-        if bowtieVersion < 2.2:
-          logger.warn("Please update bowtie2, you can download it from")
-          logger.warn("http://sourceforge.net/projects/bowtie-bio/files/bowtie2/")
+    def __init__(self, root=os.path.join(os.path.expanduser("~"),
+                                         '.CLASHchimeras'), bowtieExecutable=None, tophatExecutable=None):
+
+        self.created = []
+
+        if bowtieExecutable is not None:
+            logger.info('Bowtie2 path {} provided and will be used'
+                        .format(bowtieExecutable))
+            self.bowtieExecutable = bowtieExecutable
         else:
-          logger.info("Bowtie2 version {} found and will be used"\
-            .format(fullBowtieVersion))
-          self.bowtieExecutable = 'bowtie2'
-      except OSError as e:
-        if e.errno == os.errno.ENOENT:
-          logger.error(textwrap.fill("""
+            try:
+                bowtieVersion = float(subprocess.check_output(['bowtie2', '--version']
+                                                              ).decode('utf-8').split("\n")[0].split()[-1].rpartition(".")[0])
+                fullBowtieVersion = subprocess.check_output(['bowtie2', '--version']
+                                                            ).decode('utf-8').split("\n")[0].split()[-1]
+                if bowtieVersion < 2.2:
+                    logger.warn("Please update bowtie2, you can download it from")
+                    logger.warn("http://sourceforge.net/projects/bowtie-bio/files/bowtie2/")
+                else:
+                    logger.info("Bowtie2 version {} found and will be used"
+                                .format(fullBowtieVersion))
+                    self.bowtieExecutable = 'bowtie2'
+            except OSError as e:
+                if e.errno == os.errno.ENOENT:
+                    logger.error(textwrap.fill("""
             Can't find Bowtie2 in your path. Please install it from
             http://sourceforge.net/projects/bowtie-bio/files/bowtie2/
             """))
+                else:
+                    logger.error('Something wrong with your bowtie2 command')
+
+        if tophatExecutable is not None:
+            logger.info('Tophat2 path {} provided and will be used'
+                        .format(tophatExecutable))
+            self.tophatExecutable = tophatExecutable
         else:
-          logger.error('Something wrong with your bowtie2 command')
-
-    if tophatExecutable is not None:
-      logger.info('Tophat2 path {} provided and will be used'\
-        .format(tophatExecutable))
-      self.tophatExecutable = tophatExecutable
-    else:
-      try:
-        tophatVersion = float(subprocess.check_output(['tophat',
-          '--version']).decode('utf-8').split(" v")[-1].rpartition(".")[0])
-        fullTophatVersion = subprocess.check_output(['tophat',
-          '--version']).decode('utf-8').split(" v")[-1].rstrip()
-        if tophatVersion < 2:
-          logger.warn("Please update tophat, you can download it from")
-          logger.warn("http://ccb.jhu.edu/software/tophat/index.shtml")
-        else:
-          logger.info("Tophat version {} found and will be used"\
-            .format(fullTophatVersion))
-          self.tophatExecutable = 'tophat'
-      except OSError as e:
-        if e.errno == os.errno.ENOENT:
-          logger.error("Can't find Tophat in your path. Please install it from")
-          logger.error("http://ccb.jhu.edu/software/tophat/index.shtml")
-        else:
-          logger.error('Something wrong with your tophat command')
-
-    self.root = root
-
-  def create(self):
-
-    stdoutHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stdout'),
-                        'w')
-    stderrHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stderr'),
-                        'w')
-    indexCreated = False
-    for fa in glob.glob(os.path.join(self.root, '*.fa')):
-      indexName = fa.rpartition(".fa")[0]
-      self.created.append('{} - {}'.format(indexName, green('Bowtie2',
-                                                          style='bold')))
-      if 'genome.fa' in fa:
-        genomeIndex = fa.rpartition(".")[0]
-      if len(glob.glob(os.path.join(self.root, indexName+'*.bt2'))) == 6:
-        logger.info('Bowtie2 index for {} found'.format(fa))
-      else:
-        try:
-          logger.info('Generating bowtie2 index for {}'.format(fa))
-          indexCreated = True
-          p = subprocess.Popen([self.bowtieExecutable+'-build', fa, indexName],
-            stdout=stdoutHandle, stderr=stderrHandle)
-          p.communicate()
-          logger.info('Bowtie2 index for {} is generated'.format(fa))
-        except OSError as e:
-          if e.errno == os.errno.ENOENT:
-            logger.error("Can't find bowtie2-build in your path...")
-            logger.error("Please make sure bowtie2 is in your path and")
-            logger.error("bowtie2-build can run from your shell")
-          else:
-            logger.error("Something wrong with your bowtie2-build command")
-
-    if indexCreated:
-      logger.warn('The stdout and stderr for bowtie2-build are stored in')
-      logger.warn('CLASHChimeras-Index.stdout and CLASHChimeras-Index.stderr')
-
-    for gtf in glob.glob(os.path.join(self.root, '*.annotation.gtf')):
-      indexName = gtf.rpartition("/")[-1].rpartition(".")[0]
-      self.created.append('{} - {}'.format(gtf.rpartition(".")[0], green(
-                          'Tophat2 Transcriptome', style='bold')))
-      if len(glob.glob(os.path.join(self.root, indexName+'*.bt2'))) == 6:
-        logger.info('Tophat2 transcriptome index found for {}'.format(gtf))
-      else:
-        try:
-          indexCreated = True
-          logger.info('Generating tophat transcriptome index for {}'\
-            .format(gtf))
-          p = subprocess.Popen([self.tophatExecutable, '-G', gtf,
-            '--transcriptome-index', os.path.join(self.root, indexName),
-            genomeIndex], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-          p.communicate()
-          logger.info('Tophat2 transcriptome index generated for {}'\
-            .format(gtf))
-        except OSError as e:
-          if e.errno == os.errno.ENOENT:
-            logger.error("Can't find tophat in your path...")
-            logger.error("Please make sure tophat is in your path")
-          else:
-            logger.error("Something wrong with your tophat command")
-
-    stdoutHandle.close()
-    stderrHandle.close()
+            try:
+                tophatVersion = float(subprocess.check_output(['tophat',
+                                                               '--version']).decode('utf-8').split(" v")[-1].rpartition(".")[0])
+                fullTophatVersion = subprocess.check_output(['tophat',
+                                                             '--version']).decode('utf-8').split(" v")[-1].rstrip()
+                if tophatVersion < 2:
+                    logger.warn("Please update tophat, you can download it from")
+                    logger.warn("http://ccb.jhu.edu/software/tophat/index.shtml")
+                else:
+                    logger.info("Tophat version {} found and will be used"
+                                .format(fullTophatVersion))
+                    self.tophatExecutable = 'tophat'
+            except OSError as e:
+                if e.errno == os.errno.ENOENT:
+                    logger.error("Can't find Tophat in your path. Please install it from")
+                    logger.error("http://ccb.jhu.edu/software/tophat/index.shtml")
+                else:
+                    logger.error('Something wrong with your tophat command')
+
+        self.root = root
+
+    def create(self):
+
+        stdoutHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stdout'),
+                            'w')
+        stderrHandle = open(os.path.join(self.root, 'CLASHChimeras-Index.stderr'),
+                            'w')
+        indexCreated = False
+        for fa in glob.glob(os.path.join(self.root, '*.fa')):
+            indexName = fa.rpartition(".fa")[0]
+            self.created.append('{} - {}'.format(indexName, green('Bowtie2',
+                                                                  style='bold')))
+            if 'genome.fa' in fa:
+                genomeIndex = fa.rpartition(".")[0]
+            if len(glob.glob(os.path.join(self.root, indexName + '*.bt2'))) == 6:
+                logger.info('Bowtie2 index for {} found'.format(fa))
+            else:
+                try:
+                    logger.info('Generating bowtie2 index for {}'.format(fa))
+                    indexCreated = True
+                    p = subprocess.Popen([self.bowtieExecutable + '-build', fa, indexName],
+                                         stdout=stdoutHandle, stderr=stderrHandle)
+                    p.communicate()
+                    logger.info('Bowtie2 index for {} is generated'.format(fa))
+                except OSError as e:
+                    if e.errno == os.errno.ENOENT:
+                        logger.error("Can't find bowtie2-build in your path...")
+                        logger.error("Please make sure bowtie2 is in your path and")
+                        logger.error("bowtie2-build can run from your shell")
+                    else:
+                        logger.error("Something wrong with your bowtie2-build command")
+
+        if indexCreated:
+            logger.warn('The stdout and stderr for bowtie2-build are stored in')
+            logger.warn('CLASHChimeras-Index.stdout and CLASHChimeras-Index.stderr')
+
+        for gtf in glob.glob(os.path.join(self.root, '*.annotation.gtf')):
+            indexName = gtf.rpartition("/")[-1].rpartition(".")[0]
+            self.created.append('{} - {}'.format(gtf.rpartition(".")[0], green(
+                                'Tophat2 Transcriptome', style='bold')))
+            if len(glob.glob(os.path.join(self.root, indexName + '*.bt2'))) == 6:
+                logger.info('Tophat2 transcriptome index found for {}'.format(gtf))
+            else:
+                try:
+                    indexCreated = True
+                    logger.info('Generating tophat transcriptome index for {}'
+                                .format(gtf))
+                    p = subprocess.Popen([self.tophatExecutable, '-G', gtf,
+                                          '--transcriptome-index', os.path.join(
+                                              self.root, indexName),
+                                          genomeIndex], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                    p.communicate()
+                    logger.info('Tophat2 transcriptome index generated for {}'
+                                .format(gtf))
+                except OSError as e:
+                    if e.errno == os.errno.ENOENT:
+                        logger.error("Can't find tophat in your path...")
+                        logger.error("Please make sure tophat is in your path")
+                    else:
+                        logger.error("Something wrong with your tophat command")
+
+        stdoutHandle.close()
+        stderrHandle.close()
diff --git a/clashchimeras/log.py b/clashchimeras/log.py
index e4e24d7..2a6b0ed 100644
--- a/clashchimeras/log.py
+++ b/clashchimeras/log.py
@@ -1,26 +1,30 @@
 import logging
 import coloredlogs
 
+
 def debug_logger(name):
-  logger = logging.getLogger(name)
-  coloredlogs.install(level=logging.DEBUG, show_name=False,
-    show_hostname=True)
-  return logger
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.DEBUG, show_name=False,
+                        show_hostname=True)
+    return logger
+
 
 def info_logger(name):
-  logger = logging.getLogger(name)
-  coloredlogs.install(level=logging.INFO, show_name=False,
-    show_hostname=True)
-  return logger
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.INFO, show_name=False,
+                        show_hostname=True)
+    return logger
+
 
 def warning_logger(name):
-  logger = logging.getLogger(name)
-  coloredlogs.install(level=logging.WARNING, show_name=False,
-    show_hostname=True)
-  return logger
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.WARNING, show_name=False,
+                        show_hostname=True)
+    return logger
+
 
 def error_logger(name):
-  logger = logging.getLogger(name)
-  coloredlogs.install(level=logging.ERROR, show_name=False,
-    show_hostname=True)
-  return logger
+    logger = logging.getLogger(name)
+    coloredlogs.install(level=logging.ERROR, show_name=False,
+                        show_hostname=True)
+    return logger
diff --git a/clashchimeras/methods.py b/clashchimeras/methods.py
index 78eb739..c52d45e 100644
--- a/clashchimeras/methods.py
+++ b/clashchimeras/methods.py
@@ -2,70 +2,84 @@
 
 from itertools import repeat
 
+
 def convertCigar(cigarString):
-  cigarAttributes = re.findall("\D", cigarString)
-  cigarNumbers = re.findall("\d+", cigarString)
-  convertedCigarString = ""
-  for index, attribute in enumerate(cigarAttributes):
-    for i in repeat(attribute, int(cigarNumbers[index])):
-      convertedCigarString += i
-  return convertedCigarString
+    cigarAttributes = re.findall("\D", cigarString)
+    cigarNumbers = re.findall("\d+", cigarString)
+    convertedCigarString = ""
+    for index, attribute in enumerate(cigarAttributes):
+        for i in repeat(attribute, int(cigarNumbers[index])):
+            convertedCigarString += i
+    return convertedCigarString
+
 
 def chimeraOrNot(bitOne, bitTwo, overlap=4, gap=9):
-  combinedCigar = ""
-  for i, j in zip(bitOne, bitTwo):
-    if i == "M" and j == "M": combinedCigar += "="
-    elif i == "S" and j == "S": combinedCigar += "#"
-    elif i == "I" and j == "D": combinedCigar += "-"
-    elif i == "D" and j == "I": combinedCigar += "-"
-    elif i == "M" and j != "M": combinedCigar += "{"
-    elif i != "M" and j == "M": combinedCigar += "}"
-    elif i != "M" and j == "D": combinedCigar += "-"
-    elif i == "D" and j != "M": combinedCigar += "-"
-    elif i == "I" and j != "M": combinedCigar += "+"
-    elif i != "M" and j == "I": combinedCigar += "+"
-    else: combinedCigar += "*"
-  numberOfMs = len(list(filter(lambda x: x == "=", combinedCigar)))
-  curlyStartStart = combinedCigar.find("{")
-  curlyStartEnd = combinedCigar.rfind("{")
-  curlyEndStart = combinedCigar.find("}")
-  curlyEndEnd = combinedCigar.rfind("}")
-  listCurlies = sorted([[curlyStartStart, curlyStartEnd], [curlyEndStart,
-    curlyEndEnd]])
-  matches = combinedCigar.count("{") + combinedCigar.count("}") + \
-    combinedCigar.count("=") + combinedCigar.count("-") + \
-    combinedCigar.count("+")
-  if numberOfMs <= overlap and abs(listCurlies[0][1] - listCurlies[1][0]) \
-    <= gap and matches >= (0.75 * len(combinedCigar)):
-    return combinedCigar
-  else:
-    return None
+    combinedCigar = ""
+    for i, j in zip(bitOne, bitTwo):
+        if i == "M" and j == "M":
+            combinedCigar += "="
+        elif i == "S" and j == "S":
+            combinedCigar += "#"
+        elif i == "I" and j == "D":
+            combinedCigar += "-"
+        elif i == "D" and j == "I":
+            combinedCigar += "-"
+        elif i == "M" and j != "M":
+            combinedCigar += "{"
+        elif i != "M" and j == "M":
+            combinedCigar += "}"
+        elif i != "M" and j == "D":
+            combinedCigar += "-"
+        elif i == "D" and j != "M":
+            combinedCigar += "-"
+        elif i == "I" and j != "M":
+            combinedCigar += "+"
+        elif i != "M" and j == "I":
+            combinedCigar += "+"
+        else:
+            combinedCigar += "*"
+    numberOfMs = len(list(filter(lambda x: x == "=", combinedCigar)))
+    curlyStartStart = combinedCigar.find("{")
+    curlyStartEnd = combinedCigar.rfind("{")
+    curlyEndStart = combinedCigar.find("}")
+    curlyEndEnd = combinedCigar.rfind("}")
+    listCurlies = sorted([[curlyStartStart, curlyStartEnd], [curlyEndStart,
+                                                             curlyEndEnd]])
+    matches = combinedCigar.count("{") + combinedCigar.count("}") + \
+        combinedCigar.count("=") + combinedCigar.count("-") + \
+        combinedCigar.count("+")
+    if numberOfMs <= overlap and abs(listCurlies[0][1] - listCurlies[1][0]) \
+            <= gap and matches >= (0.75 * len(combinedCigar)):
+        return combinedCigar
+    else:
+        return None
+
 
 def findRegion(record_data):
-  if 'ENS' in record_data.refId and '|' in record_data.refId:
-    reference_id = record_data.refId
-    reference_start = int(record_data.start)
-    match_length = record_data.matchLength
-    regions = reference_id.split("|")[7:-1]
-    match_start = reference_start
-    match_end = reference_start + match_length
-    mixed = []
+    if 'ENS' in record_data.refId and '|' in record_data.refId:
+        reference_id = record_data.refId
+        reference_start = int(record_data.start)
+        match_length = record_data.matchLength
+        regions = reference_id.split("|")[7:-1]
+        match_start = reference_start
+        match_end = reference_start + match_length
+        mixed = []
 
-    for region in regions:
-      region_name = region.split(":")[0]
-      region_start = int(region.split(":")[-1].split("-")[0])
-      region_end = int(region.split(":")[-1].split("-")[-1])
-      if match_start >= region_start and match_end <= region_end:
-        mixed.append(region_name)
-        break
-      if match_start >= region_start and match_start <= region_end:
-        mixed.append(region_name)
-      if match_end >= region_start and match_end <= region_end:
-        mixed.append(region_name)
-      if match_start <= region_start and match_end >= region_end:
-        mixed.append(region_name)
-    return '{}|{}|{}'.format(reference_id.split("|")[0],
-                             reference_id.split("|")[5],
-                             "-".join(mixed))
-  else:
-    return record_data.refId
+        for region in regions:
+            region_name = region.split(":")[0]
+            region_start = int(region.split(":")[-1].split("-")[0])
+            region_end = int(region.split(":")[-1].split("-")[-1])
+            if match_start >= region_start and match_end <= region_end:
+                mixed.append(region_name)
+                break
+            if match_start >= region_start and match_start <= region_end:
+                mixed.append(region_name)
+            if match_end >= region_start and match_end <= region_end:
+                mixed.append(region_name)
+            if match_start <= region_start and match_end >= region_end:
+                mixed.append(region_name)
+        return '{}|{}|{}'.format(reference_id.split("|")[0],
+                                 reference_id.split("|")[5],
+                                 "-".join(mixed))
+    else:
+        return record_data.refId
diff --git a/clashchimeras/parsers.py b/clashchimeras/parsers.py
index 21eff2e..989c662 100644
--- a/clashchimeras/parsers.py
+++ b/clashchimeras/parsers.py
@@ -19,779 +19,785 @@
 
 logger = logging.getLogger('root')
 
-class GFF:
-  """GFF file parser for mirbase gff3 file
-
-  This class uses memory-mapped file object to read a mirbase gff3 file. It
-  contains methods to read, process a gff3 file and return genomic coordinates
-
-  Attributes:
-    fileName: A mirbase gff3 file path
-  """
-
-  def __init__(self, fileName=None):
-    self.features = {}
-    self.fileName = fileName
 
-  def read(self, featureType='miRNA_primary_transcript'):
-    """Reads gff3 file provided during class initialization
+class GFF:
+    """GFF file parser for mirbase gff3 file
 
-    Stores the byte positions of every feature in a dict object named
-    self.features
+    This class uses memory-mapped file object to read a mirbase gff3 file. It
+    contains methods to read, process a gff3 file and return genomic coordinates
 
-    Keyword Args:
-      featureType: Feature type of a gff3 record, the third element of every
-        record in the file. Please change this if you want to store mature
-        form of microRNA, by default it uses primary transcript
-        (default 'miRNA_primary_transcript')
+    Attributes:
+      fileName: A mirbase gff3 file path
     """
-    logger.info('Reading %s' % self.fileName)
-    self.fileHandle = open(self.fileName, 'r+b')
-    bytePosition = self.fileHandle.tell()
-    for line in self.fileHandle:
-      row = line.decode('utf-8').rstrip().split("\t")
-      if not row[0].startswith("#") and row[2] == featureType:
+
+    def __init__(self, fileName=None):
+        self.features = {}
+        self.fileName = fileName
+
+    def read(self, featureType='miRNA_primary_transcript'):
+        """Reads gff3 file provided during class initialization
+
+        Stores the byte positions of every feature in a dict object named
+        self.features
+
+        Keyword Args:
+          featureType: Feature type of a gff3 record, the third element of every
+            record in the file. Please change this if you want to store mature
+            form of microRNA, by default it uses primary transcript
+            (default 'miRNA_primary_transcript')
+        """
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split(";")
+                for attribute in attributes:
+                    if attribute.startswith('Name'):
+                        mirbase_name = attribute.split("=")[-1]
+                        self.features[mirbase_name] = bytePosition
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def process(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        self.mm.seek(self.features[name])
+        row = self.mm.readline().decode('utf-8').rstrip().split("\t")
         attributes = row[-1].split(";")
         for attribute in attributes:
-          if attribute.startswith('Name'):
-            mirbase_name = attribute.split("=")[-1]
-            self.features[mirbase_name] = bytePosition
-      bytePosition = self.fileHandle.tell()
-    self.fileHandle.close()
-    logger.debug('Reading %s finished' % self.fileName)
-
-  def process(self, name):
-    """A method to return a Record object providing genomic information
-
-    Args:
-      name: A valid miRNA_primary_transcript name
-
-    Returns:
-      An object Record containing scaffold, start, end, strand, mirbase_id and
-        mirbase_name as its variables for access
-    """
-    self.fileHandle = open(self.fileName, 'r+b')
-    self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
-    self.mm.seek(self.features[name])
-    row = self.mm.readline().decode('utf-8').rstrip().split("\t")
-    attributes = row[-1].split(";")
-    for attribute in attributes:
-      if attribute.startswith("ID"):
-        _id = attribute.split("=")[-1]
-      elif attribute.startswith("Name"):
-        _name = attribute.split("=")[-1]
-    record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
-      strand=row[6], mirbase_id=_id, mirbase_name=_name)
-    self.fileHandle.close()
-    return record
-
-
-  def coordinates(self, name, start=None, end=None):
-    """A method to return a bed record containing genomic coordinates for the
-      aligned segment
-
-    Keyword Args:
-      start: The alignment start position of the cDNA molecule or the relative
-        start of the particular molecule
-      end: The alignment end position in the cDNA molecule or the relative end
-        of the particular molecule
-
-    Args:
-      name: A valid miRNA_primary_transcript name
-
-    Returns:
-      A tuple of strings containing elements for a bed record
-    """
-    record = self.process(name)
-    if not start and not end:
-      start = 1
-      end = record.end - record.start + 1
-    positions = {}
-    match_positions = []
-
-    if record.strand == '+':
-      _start = 1
-      for relative, actual in enumerate(range(record.start-1, record.end), \
-        start=_start):
-        positions[relative] = actual
-      for pos in range(start, end+1):
-        match_positions.append(positions[pos])
-      return [(record.scaffold, min(match_positions), max(match_positions)+1,
-        record.mirbase_name, 0, record.strand)]
-
-    elif record.strand == '-':
-      _start = 1
-      for relative, actual in enumerate(reversed(range(record.start-1, \
-        record.end)), start=_start):
-        positions[relative] = actual
-      for pos in range(start, end+1):
-        match_positions.append(positions[pos])
-      return [(record.scaffold, min(match_positions), max(match_positions)+1,
-        record.mirbase_name, 0, record.strand)]
-
+            if attribute.startswith("ID"):
+                _id = attribute.split("=")[-1]
+            elif attribute.startswith("Name"):
+                _name = attribute.split("=")[-1]
+        record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
+                        strand=row[6], mirbase_id=_id, mirbase_name=_name)
+        self.fileHandle.close()
+        return record
+
+    def coordinates(self, name, start=None, end=None):
+        """A method to return a bed record containing genomic coordinates for the
+          aligned segment
+
+        Keyword Args:
+          start: The alignment start position of the cDNA molecule or the relative
+            start of the particular molecule
+          end: The alignment end position in the cDNA molecule or the relative end
+            of the particular molecule
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          A tuple of strings containing elements for a bed record
+        """
+        record = self.process(name)
+        if not start and not end:
+            start = 1
+            end = record.end - record.start + 1
+        positions = {}
+        match_positions = []
+
+        if record.strand == '+':
+            _start = 1
+            for relative, actual in enumerate(range(record.start - 1, record.end),
+                                              start=_start):
+                positions[relative] = actual
+            for pos in range(start, end + 1):
+                match_positions.append(positions[pos])
+            return [(record.scaffold, min(match_positions), max(match_positions) + 1,
+                     record.mirbase_name, 0, record.strand)]
+
+        elif record.strand == '-':
+            _start = 1
+            for relative, actual in enumerate(reversed(range(record.start - 1,
+                                                             record.end)), start=_start):
+                positions[relative] = actual
+            for pos in range(start, end + 1):
+                match_positions.append(positions[pos])
+            return [(record.scaffold, min(match_positions), max(match_positions) + 1,
+                     record.mirbase_name, 0, record.strand)]
 
 
 class GTF:
-  """GTF file parser for gencode gtf file
-
-  This class uses memory-mapped file object to read a gencode gtf file. It
-  contains methods to read, process a gtf file and return genomic coordinates
+    """GTF file parser for gencode gtf file
 
-  Attributes:
-    fileName: A gencode gtf file path
-  """
+    This class uses memory-mapped file object to read a gencode gtf file. It
+    contains methods to read, process a gtf file and return genomic coordinates
 
-  def __init__(self, fileName=None):
-    self.features = defaultdict(list)
-    self.biotypeFeatures = defaultdict(list)
-    self.geneFeatures = defaultdict(list)
-    self.fileName = fileName
-    self.geneIds = {}
-
-  def readBiotype(self, featureType='exon', biotype=None):
-    logger.info('Reading %s' % self.fileName)
-    self.fileHandle = open(self.fileName, 'r+b')
-    for line in self.fileHandle:
-      row = line.decode('utf-8').rstrip().split("\t")
+    Attributes:
+      fileName: A gencode gtf file path
+    """
 
-      if not row[0].startswith("#") and row[2] == featureType:
-        attributes = row[-1].split("; ")
+    def __init__(self, fileName=None):
+        self.features = defaultdict(list)
+        self.biotypeFeatures = defaultdict(list)
+        self.geneFeatures = defaultdict(list)
+        self.fileName = fileName
+        self.geneIds = {}
+
+    def readBiotype(self, featureType='exon', biotype=None):
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+
+                havana_transcript = '-'
+                havana_gene = '-'
+                exon_number = '0'
+
+                for attribute in attributes:
+
+                    if attribute.startswith("transcript_id"):
+                        transcript_id = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("transcript_type"):
+                        transcript_type = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("exon_number"):
+                        exon_number = int(attribute.split(" ")[-1])
+
+                    elif attribute.startswith("havana_gene"):
+                        havana_gene = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("havana_transcript"):
+                        havana_transcript = attribute.split(" ")[-1][1:-2]
+
+                    elif attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("gene_name"):
+                        gene_name = attribute.split(" ")[-1][1:-1]
+
+                    elif attribute.startswith("transcript_name"):
+                        transcript_name = attribute.split(" ")[-1][1:-1]
+
+                if biotype == 'tRNA':
+                    if transcript_type == "tRNAscan":
+                        self.biotypeFeatures[transcript_id].append((exon_number, row[0],
+                                                                    int(row[3]), int(row[4]),
+                                                                    row[6], gene_id,
+                                                                    havana_gene,
+                                                                    havana_transcript,
+                                                                    transcript_name,
+                                                                    gene_name))
+                else:
+                    if transcript_type == biotype:
+                        self.biotypeFeatures[transcript_id].append((exon_number, row[0],
+                                                                    int(row[3]), int(row[4]),
+                                                                    row[6], gene_id,
+                                                                    havana_gene,
+                                                                    havana_transcript,
+                                                                    transcript_name,
+                                                                    gene_name))
+
+        self.fileHandle.close()
+
+    def read(self, featureType='exon'):
+        """Reads gtf file provided during class initialization
+
+        Stores the byte positions of every feature in a defaultdict(list) object
+        named self.features
+
+        Keyword Args:
+          featureType: Feature type of a gtf record, the third element of every
+            record in the file. Please change this if you want to get specific
+            records (e.g. 'UTR') (default 'exon')
+        """
+
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+                for attribute in attributes:
+                    if attribute.startswith("transcript_id"):
+                        transcript_id = attribute.split(" ")[-1][1:-1]
+                        self.features[transcript_id].append(bytePosition)
+                        self.geneIds[transcript_id] = gene_id
+                    if attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            row = line.decode('utf-8').rstrip().split("\t")
+            if not row[0].startswith("#") and row[2] == featureType:
+                attributes = row[-1].split("; ")
+                for attribute in attributes:
+                    if attribute.startswith("gene_id"):
+                        gene_id = attribute.split(" ")[-1][1:-1]
+                        self.geneFeatures[gene_id].append(bytePosition)
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def process(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid gencode transcript_id
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        positions = self.features[name]
+        for position in positions:
+            self.mm.seek(position)
+            row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+            attributes = row[-1].split("; ")
+            _eid = '-'
+            _enb = '0'
+            for attribute in attributes:
+                if attribute.startswith("transcript_type"):
+                    _tt = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("transcript_id"):
+                    _tid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("exon_id"):
+                    _eid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("exon_number"):
+                    _enb = int(attribute.split(" ")[-1])
+                elif attribute.startswith("gene_name"):
+                    _gn = attribute.split(" ")[-1][1:-1]
+            record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
+                            strand=row[6], transcript_type=_tt, transcript_id=_tid, exon_id=_eid,
+                            exon_number=_enb, gene_name=_gn)
+            yield record
+        self.fileHandle.close()
+
+    def geneExonicRegions(self, df):
+        """Given a DataFrame with the exon coordinates from Gencode for a single
+        gene, return the total number of coding regions in that gene.
+        """
+        scaffold = df.iloc[0].scaffold
+        strand = df.iloc[0].strand
+        gene_type = df.iloc[0].gene_type
+        gene_id = df.iloc[0].gene_id
+        gene_name = df.iloc[0].gene_name
+        start = df.start.min()
+        end = df.end.max()
+        bp = [False] * (end - start + 1)
+        for i in range(df.shape[0]):
+            s = df.iloc[i]['start'] - start
+            e = df.iloc[i]['end'] - start + 1
+            bp[s:e] = [True] * (e - s)
+        regions = list(range(start, end + 1))
+        groups = []
+
+        for i, j in groupby(bp):
+            groups.append((i, len(list(j))))
+        e_start = 0
+
+        for i in groups:
+            e_end = e_start + i[1]
+            if i[0]:
+                record = Record(scaffold=scaffold, start=regions[e_start],
+                                end=regions[e_end - 1], gene_type=gene_type, gene_id=gene_id,
+                                gene_name=gene_name, strand=strand)
+                yield record
+            e_start += i[1]
+
+    def geneProcess(self, name):
+        """A method to return a Record object providing genomic information
+
+        Args:
+          name: A valid gencode gene_id
+
+        Returns:
+          An object Record containing scaffold, start, end, strand, mirbase_id and
+            mirbase_name as its variables for access
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        positions = self.geneFeatures[name]
+        exons = []
+        for position in positions:
+            self.mm.seek(position)
+            row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+            attributes = row[-1].split("; ")
+            for attribute in attributes:
+                if attribute.startswith("gene_type"):
+                    _gt = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("gene_id"):
+                    _gid = attribute.split(" ")[-1][1:-1]
+                elif attribute.startswith("gene_name"):
+                    _gn = attribute.split(" ")[-1][1:-1]
+            exons.append((row[0], int(row[3]), int(row[4]), row[6], _gt, _gid, _gn))
+        self.fileHandle.close()
+        exons_df = pd.DataFrame(exons, columns=['scaffold', 'start', 'end',
+                                                'strand', 'gene_type', 'gene_id', 'gene_name'])
+
+        for record in self.geneExonicRegions(exons_df):
+            yield record
+
+    def coordinates(self, name, start=None, end=None):
+        """A generator to return a bed record containing genomic coordinates for the
+          aligned segment
+
+        Keyword Args:
+          start: The alignment start position of the cDNA molecule or the relative
+            start of the particular molecule
+          end: The alignment end position in the cDNA molecule or the relative end
+            of the particular molecule
+
+        Args:
+          name: A valid miRNA_primary_transcript name
+
+        Returns:
+          A list of tuple(s) of strings containing elements for a bed record. There
+          may be more than one because of alternate splicing.
+        """
+        if "|" in name:
+            self.name = name.split("|")[0]
+        else:
+            self.name = name
+        positions = {}
+        match_positions = []
+        records = []
+        segments = []
+        result_segments = []
+        for record in self.process(self.name):
+            records.append(record)
+        records.sort(key=lambda x: int(x.exon_number))
+
+        if records[0].strand == '+':
+            _start = 1
+            for record in records:
+                for relative, actual in enumerate(range(record.start, record.end + 1),
+                                                  start=_start):
+                    positions[relative] = actual
+                _start = relative + 1
+            for pos in range(start, end):
+                match_positions.append(positions[pos])
+            for key, group in groupby(enumerate(match_positions),
+                                      lambda x: x[0] - x[-1]):
+                segment = list(map(itemgetter(1), group))
+                segments.append([segment[0], segment[-1]])
+            for segment in segments:
+                for record in records:
+                    if segment[0] >= record.start and segment[1] <= record.end:
+                        result_segments.append((record.scaffold, segment[0], segment[1],
+                                                record.transcript_id + '|' + record.gene_name, 0, record.strand))
+
+        elif records[0].strand == '-':
+            _start = 1
+            for record in records:
+                for relative, actual in enumerate(reversed(range(record.start,
+                                                                 record.end + 1)), start=_start):
+                    positions[relative] = actual
+                _start = relative + 1
+            for pos in range(start, end):
+                match_positions.append(positions[pos])
+            for key, group in groupby(enumerate(reversed(match_positions)),
+                                      lambda x: x[0] - x[-1]):
+                segment = list(map(itemgetter(1), group))
+                segments.append([segment[0], segment[-1]])
+            for segment in segments:
+                for record in records:
+                    if segment[0] >= record.start and segment[1] <= record.end:
+                        result_segments.append((record.scaffold, segment[0], segment[1],
+                                                record.transcript_id + '|' + record.gene_name, 0, record.strand))
+
+        if len(result_segments) == 0:
+            logger.debug('%s, %s, %s' % (name, start, end))
+            logger.debug('%s' % str(segments))
+            for r in records:
+                logger.debug('%s %s %s %s' % (r.scaffold, r.strand,
+                                              r.start, r.end))
+
+        return result_segments
 
-        havana_transcript = '-'
-        havana_gene = '-'
-        exon_number = '0'
 
-        for attribute in attributes:
+class SAM:
+    """SAM file parser for parsing bowtie2 generated files
 
-          if attribute.startswith("transcript_id"):
-            transcript_id = attribute.split(" ")[-1][1:-1]
+    This class uses memory-mapped file object to read a sam file
 
-          elif attribute.startswith("transcript_type"):
-            transcript_type = attribute.split(" ")[-1][1:-1]
+    Attributes:
+      fileName: A sam file path
+    """
 
-          elif attribute.startswith("exon_number"):
-            exon_number = int(attribute.split(" ")[-1])
+    def __init__(self, fileName=None):
+        self.fileName = fileName
+        self.records = {}
+
+    def read(self, flag=0):
+        """Reads sam file provided during class initialization
+
+        Stores the byte position of every record based on the keyword arg flag
+        provided, to a dict object named self.records
+
+        Keyword Args:
+          flag: The SAM alignment flag for a record. For default, it uses the
+          primary alignment for every record and ignores secondary alignments
+          (default 0)
+        """
+        logger.info('Reading %s' % self.fileName)
+        self.fileHandle = open(self.fileName, 'r+b')
+        bytePosition = self.fileHandle.tell()
+        for line in self.fileHandle:
+            read = line.decode('utf-8').split("\t")
+            if not read[0].startswith("@") and read[1] == str(flag):
+                self.records[read[0]] = bytePosition
+            bytePosition = self.fileHandle.tell()
+        self.fileHandle.close()
+        logger.debug('Reading %s finished' % self.fileName)
+
+    def access(self, queryName):
+        """Provides random access of a record from the sam file
+
+        Args:
+          queryName: The query name of the read from the sam file
+
+        Returns:
+          A list generated after splitting the record line from sam file
+        """
+        self.fileHandle = open(self.fileName, 'r+b')
+        self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
+        self.mm.seek(self.records[queryName])
+        row = self.mm.readline().decode('utf-8').rstrip().split("\t")
+        self.fileHandle.close()
+
+        return self.pretty(row)
+
+    def filterPotentialChimeras(self, min_length=30, flag=0, target=None):
+        """Generated a filtered fasta file from a sam file
+
+        This filtered fasta file contains reads that can be potentially chimeras.
+        The criteria for filtering is based on the minimum length
+
+        Keyword Args:
+          min_length: To be selected as a potential chimera, this is the minimum
+            read length (default 30)
+          flag: The SAM alignment flag describing the type of alignment (default 0)
+          target: The prefix for output file
+        """
+        logger.debug('Filtering {} for potential chimeras'.format(target))
+        target = '{}.filter.fasta'.format(target.rpartition(".")[0])
+        if os.path.exists(target):
+            logger.info('Skipping filtering for {}'.format(target))
+        else:
+            with open(target, 'w') as oH:
+                with open(self.fileName) as iH:
+                    for row in csv.reader(iH, delimiter="\t"):
+                        if not row[0].startswith('@') and row[1] == str(flag):
+                            if len(row[9]) >= 30:
+                                print(textwrap.fill('>%s' % row[0], width=80), file=oH)
+                                print(textwrap.fill('%s' % row[9], width=80), file=oH)
+            logger.debug('Filtering finished')
+        return target
+
+    def pretty(self, row):
+        refId = row[2]
+        start = int(row[3])
+        for i in row[10:]:
+            if i.startswith('MD'):
+                mismatchInfo = i
+        sequence = row[9]
+        cigar = row[5]
+        cigarString = clashchimeras.methods.convertCigar(row[5])
+        matchLength = cigarString.count("M") + cigarString.count("D")
+        end = start + matchLength - 1
+        record = Record(refId=refId, start=start, mismatchInfo=mismatchInfo,
+                        sequence=sequence, cigarString=cigarString, matchLength=matchLength,
+                        cigar=cigar, end=end)
+        return record
 
-          elif attribute.startswith("havana_gene"):
-            havana_gene = attribute.split(" ")[-1][1:-1]
 
-          elif attribute.startswith("havana_transcript"):
-            havana_transcript = attribute.split(" ")[-1][1:-2]
+class Output:
+    """Contains methods for writing output files
+
+    This class is used to generate every kind of output generated by this
+    package which includes plain text, ansi colored text and bed file
+
+    Attributes:
+      target: A prefix for output file which will be automatically followed by
+        extension (default 'wip')
+      overlap: Minimum overlap to be set between two molecules when determining
+        chimera (default 4)
+      gap: Maximum gap (number of unknown nucleotides) to be allowed between
+        two molecules within a chimera (default 9)
+    """
 
-          elif attribute.startswith("gene_id"):
-            gene_id = attribute.split(" ")[-1][1:-1]
+    def __init__(self,
+                 target=None,
+                 smallRNABed=False,
+                 targetRNABed=False,
+                 overlap=4,
+                 gap=9):
+        self.target = target
+        self.overlap = overlap
+        self.gap = gap
+
+        if smallRNABed:
+            self.smallRNABedHandle = open('{}.smallRNA.bed'.format(self.target), 'w')
+            print('# BED locations of smallRNA part of the identified chimera',
+                  file=self.smallRNABedHandle)
+            self.smallRNABedCSV = csv.writer(self.smallRNABedHandle, delimiter="\t")
+            self.smallRNABedCSV.writerow(
+                ['# The name field represents the following:'])
+            self.smallRNABedCSV.writerow(
+                ['#  E.g. 201980-1-48|hsa-mir-100==PAPSS1'])
+            self.smallRNABedCSV.writerow(
+                ['#    201980-1-48 is the fasta identifier'])
+            self.smallRNABedCSV.writerow(
+                ["#      201980 is the unique identifier"])
+            self.smallRNABedCSV.writerow(
+                ["#      1 is the number of times that sequence was observed in raw "
+                 "fastq "])
+            self.smallRNABedCSV.writerow(
+                ["#      48 is the length of the sequence"])
+            self.smallRNABedCSV.writerow(
+                ['#    hsa-mir-100 represents the smallRNA transcript'])
+            self.smallRNABedCSV.writerow(
+                ['#    PAPSS1 represents the gene symbol for targetRNA transcript '
+                 'transcript '])
+        if targetRNABed:
+            self.targetRNABedHandle = open('{}.targetRNA.bed'.format(self.target),
+                                           'w')
+            self.targetRNABedCSV = csv.writer(self.targetRNABedHandle, delimiter="\t")
+            self.targetRNABedCSV.writerow(
+                ['# The name field represents the following:'])
+            self.targetRNABedCSV.writerow(
+                ['#  E.g. 136019-1-48|ENST00000375759.6|SPEN==hsa-mir-103a-2'])
+            self.targetRNABedCSV.writerow(
+                ['#    136019-1-48 is the fasta identifier'])
+            self.targetRNABedCSV.writerow(
+                ["#      136019 is the unique identifier"])
+            self.targetRNABedCSV.writerow(
+                ["#      1 is the number of times that sequence was observed in raw "
+                 "fastq "])
+            self.targetRNABedCSV.writerow(
+                ["#      48 is the length of the sequence"])
+            self.targetRNABedCSV.writerow(
+                ["#    ENST00000375759.6 is the targetRNA transcript identifier"])
+            self.targetRNABedCSV.writerow(
+                ['#    SPEN is the gene symbol for for targetRNA transcript '
+                 'ENST00000375759.6'])
+            self.targetRNABedCSV.writerow(
+                ['#    hsa-mir-103a-2 represents the smallRNA transcript '])
+
+        self.hybWriter = open('%s.chimeras.tsv' % self.target, 'w')
+        self.hybComments()
+
+    def hybComments(self):
+        print("# fasta Identifier: The identifier in <sample>.unique.fasta. ",
+              "#\tE.g. 123456-3-68 ",
+              "#\t123456 is the unique identifier",
+              "#\t3 is the number of times that sequence was observed in raw "
+              "fastq ",
+              "#\t68 is the length of the sequence", sep="\n", file=self.hybWriter)
+
+        print("# smallRNA: The cDNA ID of the type of RNA labelled as smallRNA in "
+              "the analysis",
+              "#\tE.g. hsa-let-7b (miRBase identifier)",
+              "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
+              sep="\n", file=self.hybWriter)
+
+        print("# smallRNA_start: cDNA alignment start position of the smallRNA "
+              "part of the chimera", file=self.hybWriter)
+
+        print("# smallRNA_MDtag: Showing the MD tag from the smallRNA SAM "
+              "alignment for the chimera",
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
+              "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
+
+        print('# smallRNA_cigar: Cigar string from the smallRNA SAM alignment for '
+              'the chimera',
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
+
+        print('# arbitrary_chimera: The chimera representation indicating what '
+              'part of the sequence represents smallRNA and targetRNA',
+              '#\t{ is representing a match with smallRNA',
+              '#\t} is representing a match with targetRNA',
+              '#\t# is representing unaligned sequences (identified as --gap -ga)',
+              '#\t- is representing a deletion (D in cigar string)',
+              '#\t+ is representing a deletion (I in cigar string)',
+              '#\tE.g {{{{{{{{-{{{{{{{{{{{{{##}}}}}}}}}}+}}}}}}}}}}}}}}}}}}}}}}'
+              '#\tE.g The first 22 nucleotides are aligning to smallRNA cDNA',
+              '#\tE.g The last 33 nucleotides are aligning to targetRNA cDNA',
+              sep="\n", file=self.hybWriter)
+
+        print('# read_sequence: The actual sequence that is appeared in raw '
+              'reads', file=self.hybWriter)
+
+        print("# targetRNA: The cDNA ID of the type of RNA labelled as targetRNA "
+              "in "
+              "the analysis",
+              "#\tE.g. hsa-let-7b (miRBase identifier)",
+              "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
+              sep="\n", file=self.hybWriter)
+
+        print("# targetRNA_start: cDNA alignment start position of the targetRNA "
+              "part of the chimera", file=self.hybWriter)
+
+        print("# targetRNA_MDtag: Showing the MD tag from the targetRNA SAM "
+              "alignment for the chimera",
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
+              "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
+
+        print('# targetRNA_cigar: Cigar string from the targetRNA SAM alignment '
+              'for '
+              'the chimera',
+              "#\tSAM file format specification",
+              "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
+              '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
+
+        print("# fasta_Identifier", "smallRNA", "smallRNA_start", "smallRNA_MDtag",
+              "smallRNA_cigar", "arbitrary_chimera", "read_sequence", "targetRNA",
+              "targetRNA_start", "targetRNA_MDtag", "targetRNA_cigar", sep="\t",
+              file=self.hybWriter)
+
+    def writeTargetRNABed(self, query, targetRNASegments, smallRNA):
+        if "ENS" in smallRNA and "|" in smallRNA:
+            _smallRNA = smallRNA.split("|")[5]
+        else:
+            _smallRNA = smallRNA
+        for segment in targetRNASegments:
+            _segment = list(segment)
+            _segment[3] = query + "|" + _segment[3] + "==" + _smallRNA
+            self.targetRNABedCSV.writerow(_segment)
+
+    def writeSmallRNABed(self, query, smallRNASegments, targetRNA):
+        if "ENS" in targetRNA and "|" in targetRNA:
+            _targetRNA = targetRNA.split("|")[5]
+        else:
+            _targetRNA = targetRNA
+        for segment in smallRNASegments:
+            _segment = list(segment)
+            _segment[3] = query + "|" + _segment[3] + "==" + _targetRNA
+            self.smallRNABedCSV.writerow(_segment)
+
+    def write(self, queryName, smallRNA, targetRNA):
+        chimeraString = clashchimeras.methods.chimeraOrNot(smallRNA.cigarString,
+                                                           targetRNA.cigarString, overlap=self.overlap, gap=self.gap)
+        smallRNARegion = clashchimeras.methods.findRegion(smallRNA)
+        targetRNARegion = clashchimeras.methods.findRegion(targetRNA)
+        print(queryName, smallRNARegion, smallRNA.start, smallRNA.mismatchInfo,
+              smallRNA.cigar, chimeraString, smallRNA.sequence,
+              targetRNARegion, targetRNA.start,
+              targetRNA.mismatchInfo, targetRNA.cigar, sep="\t", file=self.hybWriter)
+
+    def __del__(self):
+        self.hybWriter.close()
 
-          elif attribute.startswith("gene_name"):
-            gene_name = attribute.split(" ")[-1][1:-1]
 
-          elif attribute.startswith("transcript_name"):
-            transcript_name = attribute.split(" ")[-1][1:-1]
+class Fasta:
 
+    def __init__(self, genome=None, gtf=None):
+        self.genome = genome
+        self.gtf = gtf
+        self.faidx = pyfaidx.Fasta(self.genome)
 
+    def getBiotype(self, output=None, biotype=None):
+        self.sequences = []
+        g = GTF(fileName=self.gtf)
         if biotype == 'tRNA':
-          if transcript_type == "tRNAscan":
-            self.biotypeFeatures[transcript_id].append((exon_number, row[0],
-                                                      int(row[3]), int(row[4]),
-                                                      row[6], gene_id,
-                                                      havana_gene,
-                                                      havana_transcript,
-                                                      transcript_name,
-                                                      gene_name))
+            g.readBiotype(biotype=biotype, featureType='tRNAscan')
         else:
-          if transcript_type == biotype:
-            self.biotypeFeatures[transcript_id].append((exon_number, row[0],
-                                                      int(row[3]), int(row[4]),
-                                                      row[6], gene_id,
-                                                      havana_gene,
-                                                      havana_transcript,
-                                                      transcript_name,
-                                                      gene_name))
-
-    self.fileHandle.close()
-
-  def read(self, featureType='exon'):
-    """Reads gtf file provided during class initialization
-
-    Stores the byte positions of every feature in a defaultdict(list) object
-    named self.features
-
-    Keyword Args:
-      featureType: Feature type of a gtf record, the third element of every
-        record in the file. Please change this if you want to get specific
-        records (e.g. 'UTR') (default 'exon')
-    """
-
-    logger.info('Reading %s' % self.fileName)
-    self.fileHandle = open(self.fileName, 'r+b')
-    bytePosition = self.fileHandle.tell()
-    for line in self.fileHandle:
-      row = line.decode('utf-8').rstrip().split("\t")
-      if not row[0].startswith("#") and row[2] == featureType:
-        attributes = row[-1].split("; ")
-        for attribute in attributes:
-          if attribute.startswith("transcript_id"):
-            transcript_id = attribute.split(" ")[-1][1:-1]
-            self.features[transcript_id].append(bytePosition)
-            self.geneIds[transcript_id] = gene_id
-          if attribute.startswith("gene_id"):
-            gene_id = attribute.split(" ")[-1][1:-1]
-      bytePosition = self.fileHandle.tell()
-    self.fileHandle.close()
-
-    self.fileHandle = open(self.fileName, 'r+b')
-    bytePosition = self.fileHandle.tell()
-    for line in self.fileHandle:
-      row = line.decode('utf-8').rstrip().split("\t")
-      if not row[0].startswith("#") and row[2] == featureType:
-        attributes = row[-1].split("; ")
-        for attribute in attributes:
-          if attribute.startswith("gene_id"):
-            gene_id = attribute.split(" ")[-1][1:-1]
-            self.geneFeatures[gene_id].append(bytePosition)
-      bytePosition = self.fileHandle.tell()
-    self.fileHandle.close()
+            g.readBiotype(biotype=biotype)
+        for transcript_id, exons in g.biotypeFeatures.items():
+            temp_seq = ''
+            exons.sort(key=itemgetter(0))
+            for exon in exons:
+                if exon[4] == '-':
+                    temp_seq += (-self.faidx[exon[1]][exon[2] - 1:exon[3]]).seq
+                elif exon[4] == '+':
+                    temp_seq += self.faidx[exon[1]][exon[2] - 1:exon[3]].seq
+
+            _id = '{}|{}|{}|{}|{}|{}|{}'.format(transcript_id,
+                                                exons[0][5],
+                                                exons[0][6],
+                                                exons[0][7],
+                                                exons[0][8],
+                                                exons[0][9],
+                                                len(temp_seq))
+
+            temp_rec = SeqRecord(seq=Seq(temp_seq), id=_id,
+                                 description='')
+
+            self.sequences.append(temp_rec)
+
+        if not output:
+            logger.error('Please provide output file..')
+            sys.exit()
+        else:
+            logger.info('Writing {}'.format(output))
+            SeqIO.write(self.sequences, output, 'fasta')
 
-    logger.debug('Reading %s finished' % self.fileName)
 
-  def process(self, name):
-    """A method to return a Record object providing genomic information
+class Fastq:
 
-    Args:
-      name: A valid gencode transcript_id
+    def __init__(self, fileName=None, compressed=False):
+        self.fileName = fileName
+        self.compressed = compressed
+        self.n = 4
+        self.sequences = Counter()
+        self.uniqueOutput = fileName.rpartition(".")[0] + '.unique.fasta'
 
-    Returns:
-      An object Record containing scaffold, start, end, strand, mirbase_id and
-        mirbase_name as its variables for access
-    """
-    self.fileHandle = open(self.fileName, 'r+b')
-    self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
-    positions = self.features[name]
-    for position in positions:
-      self.mm.seek(position)
-      row = self.mm.readline().decode('utf-8').rstrip().split("\t")
-      attributes = row[-1].split("; ")
-      _eid = '-'
-      _enb = '0'
-      for attribute in attributes:
-        if attribute.startswith("transcript_type"):
-          _tt = attribute.split(" ")[-1][1:-1]
-        elif attribute.startswith("transcript_id"):
-          _tid = attribute.split(" ")[-1][1:-1]
-        elif attribute.startswith("exon_id"):
-          _eid = attribute.split(" ")[-1][1:-1]
-        elif attribute.startswith("exon_number"):
-          _enb = int(attribute.split(" ")[-1])
-        elif attribute.startswith("gene_name"):
-          _gn = attribute.split(" ")[-1][1:-1]
-      record = Record(scaffold=row[0], start=int(row[3]), end=int(row[4]),
-        strand=row[6], transcript_type=_tt, transcript_id=_tid, exon_id=_eid,
-        exon_number=_enb, gene_name=_gn)
-      yield record
-    self.fileHandle.close()
-
-  def geneExonicRegions(self, df):
-    """Given a DataFrame with the exon coordinates from Gencode for a single
-    gene, return the total number of coding regions in that gene.
-    """
-    scaffold = df.iloc[0].scaffold
-    strand = df.iloc[0].strand
-    gene_type = df.iloc[0].gene_type
-    gene_id = df.iloc[0].gene_id
-    gene_name = df.iloc[0].gene_name
-    start = df.start.min()
-    end = df.end.max()
-    bp = [False] * (end - start + 1)
-    for i in range(df.shape[0]):
-      s = df.iloc[i]['start'] - start
-      e = df.iloc[i]['end'] - start + 1
-      bp[s:e] = [True] * (e - s)
-    regions = list(range(start, end+1))
-    groups = []
-
-    for i, j in groupby(bp):
-      groups.append((i, len(list(j))))
-    e_start = 0
-
-    for i in groups:
-      e_end = e_start + i[1]
-      if i[0]:
-        record = Record(scaffold=scaffold, start=regions[e_start],
-          end=regions[e_end-1], gene_type=gene_type, gene_id=gene_id,
-          gene_name=gene_name, strand=strand)
+    def recordIterator(self):
+        record = []
+        record_length = 0
+        for line in self.fileHandle:
+            if record_length == self.n:
+                yield record
+                record_length = 0
+                record = []
+            record.append(line.decode().rstrip())
+            record_length += 1
         yield record
-      e_start += i[1]
-
-  def geneProcess(self, name):
-    """A method to return a Record object providing genomic information
-
-    Args:
-      name: A valid gencode gene_id
-
-    Returns:
-      An object Record containing scaffold, start, end, strand, mirbase_id and
-        mirbase_name as its variables for access
-    """
-    self.fileHandle = open(self.fileName, 'r+b')
-    self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
-    positions = self.geneFeatures[name]
-    exons = []
-    for position in positions:
-      self.mm.seek(position)
-      row = self.mm.readline().decode('utf-8').rstrip().split("\t")
-      attributes = row[-1].split("; ")
-      for attribute in attributes:
-        if attribute.startswith("gene_type"):
-          _gt = attribute.split(" ")[-1][1:-1]
-        elif attribute.startswith("gene_id"):
-          _gid = attribute.split(" ")[-1][1:-1]
-        elif attribute.startswith("gene_name"):
-          _gn = attribute.split(" ")[-1][1:-1]
-      exons.append((row[0], int(row[3]), int(row[4]), row[6], _gt, _gid, _gn))
-    self.fileHandle.close()
-    exons_df = pd.DataFrame(exons, columns = ['scaffold', 'start', 'end',
-      'strand', 'gene_type', 'gene_id', 'gene_name'])
-
-    for record in self.geneExonicRegions(exons_df):
-      yield record
-
-  def coordinates(self, name, start=None, end=None):
-    """A generator to return a bed record containing genomic coordinates for the
-      aligned segment
-
-    Keyword Args:
-      start: The alignment start position of the cDNA molecule or the relative
-        start of the particular molecule
-      end: The alignment end position in the cDNA molecule or the relative end
-        of the particular molecule
-
-    Args:
-      name: A valid miRNA_primary_transcript name
-
-    Returns:
-      A list of tuple(s) of strings containing elements for a bed record. There
-      may be more than one because of alternate splicing.
-    """
-    if "|" in name:
-      self.name = name.split("|")[0]
-    else:
-      self.name = name
-    positions = {}
-    match_positions = []
-    records = []
-    segments = []
-    result_segments = []
-    for record in self.process(self.name):
-      records.append(record)
-    records.sort(key=lambda x: int(x.exon_number))
-
-    if records[0].strand == '+':
-      _start = 1
-      for record in records:
-        for relative, actual in enumerate(range(record.start, record.end+1),\
-          start=_start):
-          positions[relative] = actual
-        _start = relative + 1
-      for pos in range(start, end):
-        match_positions.append(positions[pos])
-      for key, group in groupby(enumerate(match_positions),
-        lambda x: x[0] - x[-1]):
-        segment = list(map(itemgetter(1), group))
-        segments.append([segment[0], segment[-1]])
-      for segment in segments:
-        for record in records:
-          if segment[0] >= record.start and segment[1] <= record.end:
-            result_segments.append((record.scaffold, segment[0], segment[1],
-              record.transcript_id + '|' + record.gene_name, 0, record.strand))
-
-    elif records[0].strand == '-':
-      _start = 1
-      for record in records:
-        for relative, actual in enumerate(reversed(range(record.start, \
-          record.end+1)), start=_start):
-          positions[relative] = actual
-        _start = relative + 1
-      for pos in range(start, end):
-        match_positions.append(positions[pos])
-      for key, group in groupby(enumerate(reversed(match_positions)),
-        lambda x: x[0] - x[-1]):
-        segment = list(map(itemgetter(1), group))
-        segments.append([segment[0], segment[-1]])
-      for segment in segments:
-        for record in records:
-          if segment[0] >= record.start and segment[1] <= record.end:
-            result_segments.append((record.scaffold, segment[0], segment[1],
-              record.transcript_id + '|' + record.gene_name, 0, record.strand))
-
-    if len(result_segments) == 0:
-      logger.debug('%s, %s, %s' % (name, start, end))
-      logger.debug('%s' % str(segments))
-      for r in records:
-        logger.debug('%s %s %s %s' % (r.scaffold, r.strand,
-          r.start, r.end))
-
-    return result_segments
-
-class SAM:
-  """SAM file parser for parsing bowtie2 generated files
-
-  This class uses memory-mapped file object to read a sam file
-
-  Attributes:
-    fileName: A sam file path
-  """
-
-  def __init__(self, fileName=None):
-    self.fileName = fileName
-    self.records = {}
-
-  def read(self, flag=0):
-    """Reads sam file provided during class initialization
-
-    Stores the byte position of every record based on the keyword arg flag
-    provided, to a dict object named self.records
-
-    Keyword Args:
-      flag: The SAM alignment flag for a record. For default, it uses the
-      primary alignment for every record and ignores secondary alignments
-      (default 0)
-    """
-    logger.info('Reading %s' % self.fileName)
-    self.fileHandle = open(self.fileName, 'r+b')
-    bytePosition = self.fileHandle.tell()
-    for line in self.fileHandle:
-      read = line.decode('utf-8').split("\t")
-      if not read[0].startswith("@") and read[1] == str(flag):
-        self.records[read[0]] = bytePosition
-      bytePosition = self.fileHandle.tell()
-    self.fileHandle.close()
-    logger.debug('Reading %s finished' % self.fileName)
-
-  def access(self, queryName):
-    """Provides random access of a record from the sam file
-
-    Args:
-      queryName: The query name of the read from the sam file
-
-    Returns:
-      A list generated after splitting the record line from sam file
-    """
-    self.fileHandle = open(self.fileName, 'r+b')
-    self.mm = mmap.mmap(self.fileHandle.fileno(), 0)
-    self.mm.seek(self.records[queryName])
-    row = self.mm.readline().decode('utf-8').rstrip().split("\t")
-    self.fileHandle.close()
 
-    return self.pretty(row)
+    def createUnique(self):
+        if self.compressed:
+            self.fileHandle = gzip.open(self.fileName, 'rb')
+        else:
+            self.fileHandle = open(self.fileName, 'rb')
+        logger.info('Reading {}'.format(self.fileName))
+        for record in self.recordIterator():
+            self.sequences[record[1]] += 1
+        logger.info('Writing {}'.format(self.uniqueOutput))
+        with open(self.uniqueOutput, 'w') as wH:
+            for index, (sequence, counts) in enumerate(sorted(self.sequences.items(),
+                                                              key=itemgetter(1), reverse=True), start=1):
+                print('>{}-{}-{}'.format(index, counts, len(sequence)), file=wH)
+                print(textwrap.fill(sequence, width=80), file=wH)
+        logger.debug('Finished writing {}'.format(self.uniqueOutput))
+        self.fileHandle.close()
 
-  def filterPotentialChimeras(self, min_length=30, flag=0, target=None):
-    """Generated a filtered fasta file from a sam file
 
-    This filtered fasta file contains reads that can be potentially chimeras.
-    The criteria for filtering is based on the minimum length
+class Record:
+    """A custom object (preferred over dict) for easy access using variables
 
-    Keyword Args:
-      min_length: To be selected as a potential chimera, this is the minimum
-        read length (default 30)
-      flag: The SAM alignment flag describing the type of alignment (default 0)
-      target: The prefix for output file
+    It's a dependency for GTF and GFF classes
     """
-    logger.debug('Filtering {} for potential chimeras'.format(target))
-    target = '{}.filter.fasta'.format(target.rpartition(".")[0])
-    if os.path.exists(target):
-      logger.info('Skipping filtering for {}'.format(target))
-    else:
-      with open(target, 'w') as oH:
-        with open(self.fileName) as iH:
-          for row in csv.reader(iH, delimiter="\t"):
-            if not row[0].startswith('@') and row[1] == str(flag):
-              if len(row[9]) >= 30:
-                print(textwrap.fill('>%s' % row[0], width=80), file=oH)
-                print(textwrap.fill('%s' % row[9], width=80), file=oH)
-      logger.debug('Filtering finished')
-    return target
-
-  def pretty(self, row):
-    refId = row[2]
-    start = int(row[3])
-    for i in row[10:]:
-      if i.startswith('MD'): mismatchInfo = i
-    sequence = row[9]
-    cigar = row[5]
-    cigarString = clashchimeras.methods.convertCigar(row[5])
-    matchLength = cigarString.count("M") + cigarString.count("D")
-    end = start + matchLength - 1
-    record = Record(refId=refId, start=start, mismatchInfo=mismatchInfo,
-      sequence=sequence, cigarString=cigarString, matchLength=matchLength,
-      cigar=cigar, end=end)
-    return record
-
-class Output:
-  """Contains methods for writing output files
-
-  This class is used to generate every kind of output generated by this
-  package which includes plain text, ansi colored text and bed file
-
-  Attributes:
-    target: A prefix for output file which will be automatically followed by
-      extension (default 'wip')
-    overlap: Minimum overlap to be set between two molecules when determining
-      chimera (default 4)
-    gap: Maximum gap (number of unknown nucleotides) to be allowed between
-      two molecules within a chimera (default 9)
-  """
-
-  def __init__(self,
-               target=None,
-               smallRNABed=False,
-               targetRNABed=False,
-               overlap=4,
-               gap=9):
-    self.target = target
-    self.overlap = overlap
-    self.gap = gap
-
-    if smallRNABed:
-      self.smallRNABedHandle = open('{}.smallRNA.bed'.format(self.target), 'w')
-      print('# BED locations of smallRNA part of the identified chimera',
-            file=self.smallRNABedHandle)
-      self.smallRNABedCSV = csv.writer(self.smallRNABedHandle, delimiter="\t")
-      self.smallRNABedCSV.writerow(
-      ['# The name field represents the following:'])
-      self.smallRNABedCSV.writerow(
-      ['#  E.g. 201980-1-48|hsa-mir-100==PAPSS1'])
-      self.smallRNABedCSV.writerow(
-      ['#    201980-1-48 is the fasta identifier'])
-      self.smallRNABedCSV.writerow(
-      ["#      201980 is the unique identifier"])
-      self.smallRNABedCSV.writerow(
-      ["#      1 is the number of times that sequence was observed in raw "
-      "fastq "])
-      self.smallRNABedCSV.writerow(
-      ["#      48 is the length of the sequence"])
-      self.smallRNABedCSV.writerow(
-      ['#    hsa-mir-100 represents the smallRNA transcript'])
-      self.smallRNABedCSV.writerow(
-      ['#    PAPSS1 represents the gene symbol for targetRNA transcript '
-      'transcript '])
-    if targetRNABed:
-      self.targetRNABedHandle = open('{}.targetRNA.bed'.format(self.target),
-                                     'w')
-      self.targetRNABedCSV = csv.writer(self.targetRNABedHandle, delimiter="\t")
-      self.targetRNABedCSV.writerow(
-      ['# The name field represents the following:'])
-      self.targetRNABedCSV.writerow(
-      ['#  E.g. 136019-1-48|ENST00000375759.6|SPEN==hsa-mir-103a-2'])
-      self.targetRNABedCSV.writerow(
-      ['#    136019-1-48 is the fasta identifier'])
-      self.targetRNABedCSV.writerow(
-      ["#      136019 is the unique identifier"])
-      self.targetRNABedCSV.writerow(
-      ["#      1 is the number of times that sequence was observed in raw "
-      "fastq "])
-      self.targetRNABedCSV.writerow(
-      ["#      48 is the length of the sequence"])
-      self.targetRNABedCSV.writerow(
-      ["#    ENST00000375759.6 is the targetRNA transcript identifier"])
-      self.targetRNABedCSV.writerow(
-      ['#    SPEN is the gene symbol for for targetRNA transcript '
-      'ENST00000375759.6'])
-      self.targetRNABedCSV.writerow(
-      ['#    hsa-mir-103a-2 represents the smallRNA transcript '])
-
-    self.hybWriter = open('%s.chimeras.tsv' % self.target, 'w')
-    self.hybComments()
-
-  def hybComments(self):
-    print("# fasta Identifier: The identifier in <sample>.unique.fasta. ",
-          "#\tE.g. 123456-3-68 ",
-          "#\t123456 is the unique identifier",
-          "#\t3 is the number of times that sequence was observed in raw "
-          "fastq ",
-          "#\t68 is the length of the sequence", sep="\n", file=self.hybWriter)
-
-    print("# smallRNA: The cDNA ID of the type of RNA labelled as smallRNA in "
-          "the analysis",
-          "#\tE.g. hsa-let-7b (miRBase identifier)",
-          "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
-          sep="\n", file=self.hybWriter)
-
-    print("# smallRNA_start: cDNA alignment start position of the smallRNA "
-          "part of the chimera", file=self.hybWriter)
-
-    print("# smallRNA_MDtag: Showing the MD tag from the smallRNA SAM "
-          "alignment for the chimera",
-          "#\tSAM file format specification",
-          "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
-          "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
-          "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
-
-    print('# smallRNA_cigar: Cigar string from the smallRNA SAM alignment for '
-          'the chimera',
-          "#\tSAM file format specification",
-          "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
-          '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
-
-    print('# arbitrary_chimera: The chimera representation indicating what '
-          'part of the sequence represents smallRNA and targetRNA',
-          '#\t{ is representing a match with smallRNA',
-          '#\t} is representing a match with targetRNA',
-          '#\t# is representing unaligned sequences (identified as --gap -ga)',
-          '#\t- is representing a deletion (D in cigar string)',
-          '#\t+ is representing a deletion (I in cigar string)',
-          '#\tE.g {{{{{{{{-{{{{{{{{{{{{{##}}}}}}}}}}+}}}}}}}}}}}}}}}}}}}}}}'
-          '#\tE.g The first 22 nucleotides are aligning to smallRNA cDNA',
-          '#\tE.g The last 33 nucleotides are aligning to targetRNA cDNA',
-          sep="\n", file=self.hybWriter)
-
-    print('# read_sequence: The actual sequence that is appeared in raw '
-          'reads', file=self.hybWriter)
-
-    print("# targetRNA: The cDNA ID of the type of RNA labelled as targetRNA "
-          "in "
-      "the analysis",
-      "#\tE.g. hsa-let-7b (miRBase identifier)",
-      "#\tE.g. ENST00000619178.1|SNORD3D| (Gencode snoRNA identifier)",
-      sep="\n", file=self.hybWriter)
-
-    print("# targetRNA_start: cDNA alignment start position of the targetRNA "
-          "part of the chimera", file=self.hybWriter)
-
-    print("# targetRNA_MDtag: Showing the MD tag from the targetRNA SAM "
-          "alignment for the chimera",
-          "#\tSAM file format specification",
-          "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
-          "#\tMD Z String for mismatching positions.Regex:[0-9]+((["
-          "A-Z]|\^[A-Z]+)[0-9]+)*9", sep="\n", file=self.hybWriter)
-
-    print('# targetRNA_cigar: Cigar string from the targetRNA SAM alignment '
-          'for '
-          'the chimera',
-          "#\tSAM file format specification",
-          "#\thttp://samtools.github.io/hts-specs/SAMv1.pdf",
-          '#\tSee CIGAR in the file', sep="\n", file=self.hybWriter)
-
-    print("# fasta_Identifier", "smallRNA", "smallRNA_start", "smallRNA_MDtag",
-          "smallRNA_cigar", "arbitrary_chimera", "read_sequence", "targetRNA",
-          "targetRNA_start", "targetRNA_MDtag", "targetRNA_cigar", sep="\t",
-          file=self.hybWriter)
-
-  def writeTargetRNABed(self, query, targetRNASegments, smallRNA):
-    if "ENS" in smallRNA and "|" in smallRNA:
-      _smallRNA = smallRNA.split("|")[5]
-    else:
-      _smallRNA = smallRNA
-    for segment in targetRNASegments:
-      _segment = list(segment)
-      _segment[3] = query + "|" + _segment[3] + "==" + _smallRNA
-      self.targetRNABedCSV.writerow(_segment)
-
-  def writeSmallRNABed(self, query, smallRNASegments, targetRNA):
-    if "ENS" in targetRNA and "|" in targetRNA:
-      _targetRNA = targetRNA.split("|")[5]
-    else:
-      _targetRNA = targetRNA
-    for segment in smallRNASegments:
-      _segment = list(segment)
-      _segment[3] = query + "|" + _segment[3] + "==" + _targetRNA
-      self.smallRNABedCSV.writerow(_segment)
-
-  def write(self, queryName, smallRNA, targetRNA):
-    chimeraString = clashchimeras.methods.chimeraOrNot(smallRNA.cigarString,
-      targetRNA.cigarString, overlap=self.overlap, gap=self.gap)
-    smallRNARegion = clashchimeras.methods.findRegion(smallRNA)
-    targetRNARegion = clashchimeras.methods.findRegion(targetRNA)
-    print(queryName, smallRNARegion, smallRNA.start, smallRNA.mismatchInfo,
-      smallRNA.cigar, chimeraString, smallRNA.sequence,
-      targetRNARegion, targetRNA.start,
-      targetRNA.mismatchInfo, targetRNA.cigar, sep="\t", file=self.hybWriter)
-
-  def __del__(self):
-    self.hybWriter.close()
-
-class Fasta:
-  def __init__(self, genome=None, gtf=None):
-    self.genome = genome
-    self.gtf = gtf
-    self.faidx = pyfaidx.Fasta(self.genome)
-
-  def getBiotype(self, output=None, biotype=None):
-    self.sequences = []
-    g = GTF(fileName=self.gtf)
-    if biotype == 'tRNA':
-      g.readBiotype(biotype=biotype, featureType='tRNAscan')
-    else:
-      g.readBiotype(biotype=biotype)
-    for transcript_id, exons in g.biotypeFeatures.items():
-      temp_seq = ''
-      exons.sort(key=itemgetter(0))
-      for exon in exons:
-        if exon[4] == '-':
-          temp_seq += (-self.faidx[exon[1]][exon[2]-1:exon[3]]).seq
-        elif exon[4] == '+':
-          temp_seq += self.faidx[exon[1]][exon[2]-1:exon[3]].seq
-
-      _id = '{}|{}|{}|{}|{}|{}|{}'.format(transcript_id,
-                                          exons[0][5],
-                                          exons[0][6],
-                                          exons[0][7],
-                                          exons[0][8],
-                                          exons[0][9],
-                                          len(temp_seq))
-
-      temp_rec = SeqRecord(seq=Seq(temp_seq), id=_id,
-        description='')
-
-      self.sequences.append(temp_rec)
-
-    if not output:
-      logger.error('Please provide output file..')
-      sys.exit()
-    else:
-      logger.info('Writing {}'.format(output))
-      SeqIO.write(self.sequences, output, 'fasta')
-
-
-class Fastq:
-  def __init__(self, fileName=None, compressed=False):
-    self.fileName = fileName
-    self.compressed = compressed
-    self.n = 4
-    self.sequences = Counter()
-    self.uniqueOutput = fileName.rpartition(".")[0] + '.unique.fasta'
-
-  def recordIterator(self):
-    record = []
-    record_length = 0
-    for line in self.fileHandle:
-      if record_length == self.n:
-        yield record
-        record_length = 0
-        record = []
-      record.append(line.decode().rstrip())
-      record_length += 1
-    yield record
-
-  def createUnique(self):
-    if self.compressed:
-      self.fileHandle = gzip.open(self.fileName, 'rb')
-    else:
-      self.fileHandle = open(self.fileName, 'rb')
-    logger.info('Reading {}'.format(self.fileName))
-    for record in self.recordIterator():
-      self.sequences[record[1]] += 1
-    logger.info('Writing {}'.format(self.uniqueOutput))
-    with open(self.uniqueOutput, 'w')  as wH:
-      for index, (sequence, counts) in enumerate(sorted(self.sequences.items(),
-        key=itemgetter(1), reverse=True), start=1):
-        print('>{}-{}-{}'.format(index, counts, len(sequence)), file=wH)
-        print(textwrap.fill(sequence, width=80), file=wH)
-    logger.debug('Finished writing {}'.format(self.uniqueOutput))
-    self.fileHandle.close()
-
-class Record:
-  """A custom object (preferred over dict) for easy access using variables
 
-  It's a dependency for GTF and GFF classes
-  """
-  def __init__(self, **kwds):
-    self.__dict__.update(kwds)
+    def __init__(self, **kwds):
+        self.__dict__.update(kwds)
diff --git a/clashchimeras/runners.py b/clashchimeras/runners.py
index 4ef0cb1..9f0a033 100644
--- a/clashchimeras/runners.py
+++ b/clashchimeras/runners.py
@@ -1,4 +1,3 @@
-import glob
 import hashlib
 import logging
 import os
@@ -8,164 +7,173 @@
 
 logger = logging.getLogger('root')
 
+
 class Bowtie:
-  """Class for calling bowtie2 aligner
-  """
-
-  def __init__(self, args=None):
-    self.parameters = []
-    self.inputFile = args.input
-    self.compressed = args.gzip
-    self.bowtieExecutable = args.bowtieExecutable
-    self.uniqueInput = self.inputFile.rpartition(".")[0] + '.unique.fasta'
-    if os.path.exists(self.uniqueInput) and os.path.exists(
-      self.uniqueInput+'.sha256sum'):
-      with open(self.uniqueInput+'.sha256sum') as f:
-        s = f.readline().rstrip()
-      _s = self.hash(self.uniqueInput)
-      if s == _s:
-        logger.info('{} is present and verified'.format(self.uniqueInput))
-      else:
-        logger.warn('{} is present but checksum verification failed'.format(
-          self.uniqueInput))
-        logger.warn('{} will be generated again'.format(self.uniqueInput))
-    else:
-      _f = Fastq(fileName=self.inputFile, compressed=self.compressed)
-      _f.createUnique()
-      sha256sum = self.hash(self.uniqueInput)
-      with open(self.uniqueInput+'.sha256sum', 'w') as wH:
-        print(sha256sum, file=wH)
-    if not args.reverseComplement:
-      self.parameters.append('--norc')
-    self.parameters.append('-N')
-    self.parameters.append(str(args.mismatch))
-    if not args.unaligned:
-      self.parameters.append('--no-unal')
-    self.parameters.append('-p')
-    self.parameters.append(str(args.threads))
-    self.parameters.append('--'+args.preset)
-    self.smallRNAIndex = args.smallRNAIndex
-    self.targetRNAIndex = args.targetRNAIndex
-    self.output = args.output
-    self.runAligner = True
-
-  def hash(self, file):
-    sha256 = hashlib.sha256()
-    with open(file, 'r+b') as f:
-      while True:
-        buf = f.read(8192)
-        if not buf:
-          break
-        sha256.update(buf)
-    return sha256.hexdigest()
-
-  def run(self, output=None, filtered=None):
-    self.bowtie2 = []
-    if output=='smallRNA':
-      self.outputSam = '{}.{}.sam'.format(self.output, 'smallRNA')
-    elif output=='targetRNA':
-      self.outputSam = '{}.{}.sam'.format(self.output, 'targetRNA')
-    if not self.bowtieExecutable:
-      self.bowtie2.append('bowtie2')
-    else:
-      self.bowtie2.append(self.bowtieExecutable)
-    self.bowtie2.append('-f')
-    if filtered:
-      self.bowtie2.append(filtered)
-    else:
-      self.bowtie2.append(self.uniqueInput)
-    self.bowtie2.append('-x')
-    if output == 'smallRNA':
-      self.bowtie2.append('"'+self.smallRNAIndex+'"')
-    elif output == 'targetRNA':
-      self.bowtie2.append('"'+self.targetRNAIndex+'"')
-    self.bowtie2 = self.bowtie2 + self.parameters
-    self.bowtie2.append('-S')
-    self.bowtie2.append(self.outputSam)
-
-    if os.path.exists(self.outputSam) and os.path.exists(
-      self.outputSam+'.sha256sum'):
-      with open(self.outputSam+'.sha256sum') as f:
-        s = f.readline().rstrip()
-      _s = self.hash(self.outputSam)
-      if s == _s:
-        logger.info('{} is present and verified'.format(self.outputSam))
-        self.runAligner = False
-      else:
-        logger.warn('{} is present but checksum verification failed'.format(
-          self.outputSam))
-        logger.warn('{} will be generated again')
+    """Class for calling bowtie2 aligner
+    """
+
+    def __init__(self, args=None):
+        self.parameters = []
+        self.inputFile = args.input
+        self.compressed = args.gzip
+        self.bowtieExecutable = args.bowtieExecutable
+        self.uniqueInput = self.inputFile.rpartition(".")[0] + '.unique.fasta'
+        if os.path.exists(self.uniqueInput) and os.path.exists(
+                self.uniqueInput + '.sha256sum'):
+            with open(self.uniqueInput + '.sha256sum') as f:
+                s = f.readline().rstrip()
+            _s = self.hash(self.uniqueInput)
+            if s == _s:
+                logger.info(
+                    '{} is present and verified'.format(self.uniqueInput))
+            else:
+                logger.warn('{} is present but checksum verification '
+                            'failed'.format(self.uniqueInput))
+                logger.warn(
+                    '{} will be generated again'.format(self.uniqueInput))
+        else:
+            _f = Fastq(fileName=self.inputFile, compressed=self.compressed)
+            _f.createUnique()
+            sha256sum = self.hash(self.uniqueInput)
+            with open(self.uniqueInput + '.sha256sum', 'w') as wH:
+                print(sha256sum, file=wH)
+        if not args.reverseComplement:
+            self.parameters.append('--norc')
+        self.parameters.append('-N')
+        self.parameters.append(str(args.mismatch))
+        if not args.unaligned:
+            self.parameters.append('--no-unal')
+        self.parameters.append('-p')
+        self.parameters.append(str(args.threads))
+        self.parameters.append('--' + args.preset)
+        self.smallRNAIndex = args.smallRNAIndex
+        self.targetRNAIndex = args.targetRNAIndex
+        self.output = args.output
         self.runAligner = True
 
-    if self.runAligner:
-      logger.info('Executing bowtie2 and generating {}'.format(self.outputSam))
-      logger.info('The complete command for bowtie2 is \n{}'.format(
-                  " ".join(self.bowtie2)))
-      bowtie2Process = subprocess.Popen(self.bowtie2, stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-      bowtie2Stdout, bowtie2Stderr = bowtie2Process.communicate()
-      logger.warn(bowtie2Stdout.decode('utf-8'))
-      logger.warn(bowtie2Stderr.decode('utf-8'))
-      if os.path.exists(self.outputSam):
-        logger.info('Bowtie2 execution finished and {} generated'.format(
-          self.outputSam))
-        sha256sum = self.hash(self.outputSam)
-        with open(self.outputSam+'.sha256sum', 'w') as wH:
-          print(sha256sum, file=wH)
-      else:
-        logger.error('Something went wrong with your bowtie2 command.')
-        logger.error('Please check your bowtie2 stderr and report a bug....')
+    def hash(self, file):
+        sha256 = hashlib.sha256()
+        with open(file, 'r+b') as f:
+            while True:
+                buf = f.read(8192)
+                if not buf:
+                    break
+                sha256.update(buf)
+        return sha256.hexdigest()
+
+    def run(self, output=None, filtered=None):
+        self.bowtie2 = []
+        if output == 'smallRNA':
+            self.outputSam = '{}.{}.sam'.format(self.output, 'smallRNA')
+        elif output == 'targetRNA':
+            self.outputSam = '{}.{}.sam'.format(self.output, 'targetRNA')
+        if not self.bowtieExecutable:
+            self.bowtie2.append('bowtie2')
+        else:
+            self.bowtie2.append(self.bowtieExecutable)
+        self.bowtie2.append('-f')
+        if filtered:
+            self.bowtie2.append(filtered)
+        else:
+            self.bowtie2.append(self.uniqueInput)
+        self.bowtie2.append('-x')
+        if output == 'smallRNA':
+            self.bowtie2.append('"' + self.smallRNAIndex + '"')
+        elif output == 'targetRNA':
+            self.bowtie2.append('"' + self.targetRNAIndex + '"')
+        self.bowtie2 = self.bowtie2 + self.parameters
+        self.bowtie2.append('-S')
+        self.bowtie2.append(self.outputSam)
+
+        if os.path.exists(self.outputSam) and os.path.exists(
+                self.outputSam + '.sha256sum'):
+            with open(self.outputSam + '.sha256sum') as f:
+                s = f.readline().rstrip()
+            _s = self.hash(self.outputSam)
+            if s == _s:
+                logger.info(
+                    '{} is present and verified'.format(self.outputSam))
+                self.runAligner = False
+            else:
+                logger.warn('{} is present but checksum verification '
+                            'failed'.format(self.outputSam))
+                logger.warn('{} will be generated again')
+                self.runAligner = True
+
+        if self.runAligner:
+            logger.info(
+                'Executing bowtie2 and generating {}'.format(self.outputSam))
+            logger.info('The complete command for bowtie2 is \n{}'.format(
+                        " ".join(self.bowtie2)))
+            bowtie2Process = subprocess.Popen(self.bowtie2,
+                                              stdout=subprocess.PIPE,
+                                              stderr=subprocess.PIPE)
+            bowtie2Stdout, bowtie2Stderr = bowtie2Process.communicate()
+            logger.warn(bowtie2Stdout.decode('utf-8'))
+            logger.warn(bowtie2Stderr.decode('utf-8'))
+            if os.path.exists(self.outputSam):
+                logger.info('Bowtie2 execution finished and {} '
+                            'generated'.format(self.outputSam))
+                sha256sum = self.hash(self.outputSam)
+                with open(self.outputSam + '.sha256sum', 'w') as wH:
+                    print(sha256sum, file=wH)
+            else:
+                logger.error('Something went wrong with your bowtie2 command.')
+                logger.error(
+                    'Please check your bowtie2 stderr and report a bug....')
 
 
 class Tophat:
-  def __init__(self, args=None):
-    self.inputFile = args.input
-    if args.genomeIndex:
-      args.genomeIndex = os.path.expanduser(args.genomeIndex)
-    if args.transcriptomeIndex:
-      args.transcriptomeIndex = os.path.expanduser(
-        args.transcriptomeIndex)
-    if args.output:
-      args.output = os.path.expanduser(args.output)
-    if args.input:
-      args.input = os.path.expanduser(args.input)
-
-    self.tophatOutput = args.output
-    self.genomeIndex = args.genomeIndex
-
-    self.tophatRun = []
-    if args.tophatExecutable is not None:
-      self.tophatExecutable = args.tophatExecutable
-    else:
-      self.tophatExecutable = 'tophat'
-    self.tophatRun.append(self.tophatExecutable)
-    self.tophatRun.append('--b2-'+args.tophatPreset)
-    self.tophatRun.append('--b2-N')
-    self.tophatRun.append(str(args.mismatch))
-
-    if args.transcriptomeIndex:
-      self.tophatRun.append('--transcriptome-index='+args.transcriptomeIndex
-                            .strip())
-    else:
-      logger.warn('Transcriptome index is not provided for tophat')
-    self.tophatRun.append('-p')
-    self.tophatRun.append(str(args.threads))
-    self.tophatRun.append('-o')
-    self.tophatRun.append(self.tophatOutput)
-    self.tophatRun.append(self.genomeIndex)
-    self.tophatRun.append(self.inputFile)
-
-  def run(self):
-    logger.info('Executing Tophat2 for {}'.format(self.inputFile))
-    logger.info('The complete command for tophat is \n{}'.format(" ".join(
-      self.tophatRun)))
-    logger.info('The stdout and stderr for tophat run will be dumped after '
-                'its finished')
-    tophatProcess = subprocess.Popen(self.tophatRun, stdout=subprocess.PIPE,
-      stderr=subprocess.PIPE)
-    tophatStderr, tophatStdout = tophatProcess.communicate()
-    logger.warn('Tophat stderr')
-    logger.info(tophatStderr.decode("utf-8"))
-    logger.warn('Tophat stdout')
-    logger.info(tophatStdout.decode("utf-8"))
-    logger.info('Execution of Tophat2 finished')
+
+    def __init__(self, args=None):
+        self.inputFile = args.input
+        if args.genomeIndex:
+            args.genomeIndex = os.path.expanduser(args.genomeIndex)
+        if args.transcriptomeIndex:
+            args.transcriptomeIndex = os.path.expanduser(
+                args.transcriptomeIndex)
+        if args.output:
+            args.output = os.path.expanduser(args.output)
+        if args.input:
+            args.input = os.path.expanduser(args.input)
+
+        self.tophatOutput = args.output
+        self.genomeIndex = args.genomeIndex
+
+        self.tophatRun = []
+        if args.tophatExecutable is not None:
+            self.tophatExecutable = args.tophatExecutable
+        else:
+            self.tophatExecutable = 'tophat'
+        self.tophatRun.append(self.tophatExecutable)
+        self.tophatRun.append('--b2-' + args.tophatPreset)
+        self.tophatRun.append('--b2-N')
+        self.tophatRun.append(str(args.mismatch))
+
+        if args.transcriptomeIndex:
+            self.tophatRun.append('--transcriptome-index=' +
+                                  args.transcriptomeIndex.strip())
+        else:
+            logger.warn('Transcriptome index is not provided for tophat')
+        self.tophatRun.append('-p')
+        self.tophatRun.append(str(args.threads))
+        self.tophatRun.append('-o')
+        self.tophatRun.append(self.tophatOutput)
+        self.tophatRun.append(self.genomeIndex)
+        self.tophatRun.append(self.inputFile)
+
+    def run(self):
+        logger.info('Executing Tophat2 for {}'.format(self.inputFile))
+        logger.info('The complete command for tophat is \n{}'.format(" ".join(
+            self.tophatRun)))
+        logger.info('The stdout and stderr for tophat run will be '
+                    'dumped after its finished')
+        tophatProcess = subprocess.Popen(self.tophatRun,
+                                         stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+        tophatStderr, tophatStdout = tophatProcess.communicate()
+        logger.warn('Tophat stderr')
+        logger.info(tophatStderr.decode("utf-8"))
+        logger.warn('Tophat stdout')
+        logger.info(tophatStdout.decode("utf-8"))
+        logger.info('Execution of Tophat2 finished')
diff --git a/dist/CLASHChimeras-0.1b3-py3.5.egg b/dist/CLASHChimeras-0.1b3-py3.5.egg
new file mode 100644
index 0000000..b232367
Binary files /dev/null and b/dist/CLASHChimeras-0.1b3-py3.5.egg differ
diff --git a/setup.py b/setup.py
index cd44dfb..813c429 100644
--- a/setup.py
+++ b/setup.py
@@ -1,59 +1,61 @@
+import logging
 import os
 import sys
-import logging
 
 from setuptools import setup
 
 logging.basicConfig(level=logging.DEBUG,
-  format='[%(asctime)s] - %(name)s - %(levelname)s - %(message)s'
-)
+                    format='[%(asctime)s] - %(name)s - %(levelname)s - '
+                    '%(message)s')
 logger = logging.getLogger('CLASHChimeras')
 
 major, minor = sys.version_info.major, sys.version_info.minor
 if major == 3 and minor < 4:
-  logger.warn('This package is tested with Python 3.4')
-  logger.warn('However it should work with Python version 3.x {}'.format(
-      ' but should you face any problems, please install Python >= 3.4'
-  ))
+    logger.warn('This package is tested with Python 3.4')
+    logger.warn('However it should work with Python version 3.x {}'.format(
+        ' but should you face any problems, please install Python >= 3.4'
+    ))
 elif major < 3:
-  logger.error('Please install Python version >= 3 to use this package')
-  sys.exit(2)
+    logger.error('Please install Python version >= 3 to use this package')
+    sys.exit(2)
 
-requirements = open(os.path.join(os.path.dirname(__file__), 'requirements.txt')).readlines()
-version = open(os.path.join(os.path.dirname(__file__), 'VERSION')).readline().rstrip()
+requirements = open(os.path.join(os.path.dirname(
+    __file__), 'requirements.txt')).readlines()
+version = open(os.path.join(os.path.dirname(
+    __file__), 'VERSION')).readline().rstrip()
 
 setup(
-  name = "CLASHChimeras",
-  version = version,
-  author = "Kashyap Chhatbar",
-  author_email = "kashyap.c@ed.ac.uk",
-  description = "Python package to find chimeras in CRAC/CLASH and"
-                " HITS-CLIP datasets",
-  install_requires=requirements,
-  url = 'https://github.com/kashyapchhatbar/CLASHChimeras',
+    name="CLASHChimeras",
+    version=version,
+    author="Kashyap Chhatbar",
+    author_email="kashyap.c@ed.ac.uk",
+    description="Python package to find chimeras in CRAC/CLASH and"
+    " HITS-CLIP datasets",
+    install_requires=requirements,
+    url='https://github.com/kashyapchhatbar/CLASHChimeras',
 
-  classifiers=['Development Status :: 4 - Beta',
+    classifiers=['Development Status :: 4 - Beta',
 
-               'Topic :: Scientific/Engineering :: Bio-Informatics',
-               'Topic :: Scientific/Engineering :: Medical Science Apps.',
+                 'Topic :: Scientific/Engineering :: Bio-Informatics',
+                 'Topic :: Scientific/Engineering :: Medical Science Apps.',
 
-               'License :: OSI Approved :: MIT License',
+                 'License :: OSI Approved :: MIT License',
 
-               'Environment :: Console',
-               'Intended Audience :: Science/Research',
-               'Intended Audience :: Developers',
+                 'Environment :: Console',
+                 'Intended Audience :: Science/Research',
+                 'Intended Audience :: Developers',
 
-               'Operating System :: POSIX :: Linux',
-               'Operating System :: MacOS',
+                 'Operating System :: POSIX :: Linux',
+                 'Operating System :: MacOS',
 
-               'Programming Language :: Python :: 3 :: Only',
-               ],
+                 'Programming Language :: Python :: 3 :: Only',
+                 ],
 
-  keywords='clash chimeras hybrids hits-clip bioinformatics',
+    keywords='clash chimeras hybrids hits-clip bioinformatics',
 
-  license='MIT',
+    license='MIT',
 
-  py_modules = ['clashchimeras.align',
+    py_modules=['clashchimeras.align',
                 'clashchimeras.download',
                 'clashchimeras.find',
                 'clashchimeras.initialize',
@@ -62,7 +64,7 @@
                 'clashchimeras.parsers',
                 'clashchimeras.runners'],
 
-  scripts = ['scripts/align-for-chimeras',
+    scripts=['scripts/align-for-chimeras',
              'scripts/download-for-chimeras',
              'scripts/find-chimeras']