diff --git a/XICRA_pip/XICRA/config/software/dependencies.csv b/XICRA_pip/XICRA/config/software/dependencies.csv index c7146e7..43af1d0 100644 --- a/XICRA_pip/XICRA/config/software/dependencies.csv +++ b/XICRA_pip/XICRA/config/software/dependencies.csv @@ -6,7 +6,7 @@ Rscript,--version,.*version\s([0-9\.]+).*,3.5.1,Rscript java,,,na,java python,--version,([0-9\.]+),3.6,python perl,--version,v([0-9]\.[0-9]+\.[0-9]),5.18.1,perl -make,--version,GNU Make ([0-9]\.[0-9].*),4.0,make +make,--version,GNU Make ([0-9]\.[0-9].*),4,make git,--version,git version ([0-9]\.[0-9]+\.[0-9]),2.1.0,git fastqjoin,,,na,fastq-join sRNAbench,-h,([0-9\.]+),1.5,sRNAbench.jar @@ -14,4 +14,5 @@ miRTop,--version,([0-9\.]+).*,0.4.23,mirtop optimir,,,na,optimir miraligner,,,na,miraligner.jar STAR,--version,([0-9\.]+).*,2.6.1,STAR -featureCounts,-v,v([0-9\.]+).*,1.5.1,featureCounts \ No newline at end of file +featureCounts,-v,v([0-9\.]+).*,1.5.1,featureCounts +MINTmap,,,na,MINTmap diff --git a/XICRA_pip/XICRA/modules/tRNA.py b/XICRA_pip/XICRA/modules/tRNA.py index e10db25..70b0205 100644 --- a/XICRA_pip/XICRA/modules/tRNA.py +++ b/XICRA_pip/XICRA/modules/tRNA.py @@ -27,7 +27,6 @@ from XICRA.modules import help_XICRA from XICRA.scripts import generate_DE from XICRA.scripts import MINTMap_caller -from HCGB.functions import fasta_functions ############################################## def run_tRNA(options): @@ -116,6 +115,102 @@ def run_tRNA(options): ## species print ("+ Species provided:", options.species) + ## set database path if necessary + if not (options.database): + install_path = os.path.dirname(os.path.realpath(__file__)) + options.database = os.path.join(install_path, "db_files") + else: + options.database = os.path.abspath(options.database) + + ## generate output folder, if necessary + if not options.project: + print ("\n+ Create output folder(s):") + functions.files_functions.create_folder(outdir) + + ## for samples + outdir_dict = functions.files_functions.outdir_project(outdir, options.project, pd_samples_retrieved, "tRNA", options.debug) + + ## optimize threads + name_list = set(pd_samples_retrieved["new_name"].tolist()) + threads_job = functions.main_functions.optimize_threads(options.threads, len(name_list)) ## threads optimization + max_workers_int = int(options.threads/threads_job) + + ## to FIX: MINTmap requires to chdir to folder to create results + max_workers_int = 1 + + ## debug message + if (Debug): + print (colored("**DEBUG: options.threads " + str(options.threads) + " **", 'yellow')) + print (colored("**DEBUG: max_workers " + str(max_workers_int) + " **", 'yellow')) + print (colored("**DEBUG: cpu_here " + str(threads_job) + " **", 'yellow')) + + print ("+ Create a tRNA analysis for each sample retrieved...") + + ## call tRNA_analysis: + ## Get user software selection: mintmap, ... + + ## dictionary results + global results_df + results_df = pd.DataFrame(columns=("name", "soft", "type", "filename")) + # Group dataframe by sample name + sample_frame = pd_samples_retrieved.groupby(["new_name"]) + + ## send for each sample + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_int) as executor: + commandsSent = { executor.submit(tRNA_analysis, + sorted(cluster["sample"].tolist()), + outdir_dict[name], name, threads_job, + options.soft_name, options.species, + options.database, Debug): name for name, cluster in sample_frame } + + for cmd2 in concurrent.futures.as_completed(commandsSent): + details = commandsSent[cmd2] + try: + data = cmd2.result() + except Exception as exc: + print ('***ERROR:') + print (cmd2) + print('%r generated an exception: %s' % (details, exc)) + + print ("\n\n+ tRNA analysis is finished...") + print ("+ Let's summarize all results...") + + ## outdir + outdir_report = functions.files_functions.create_subfolder("report", outdir) + expression_folder = functions.files_functions.create_subfolder("tRNA", outdir_report) + + ## debugging messages + if options.debug: + print (results_df) + + ## merge all parse gtf files created + print ("+ Summarize tRNA analysis for all samples...") + generate_DE.generate_DE(results_df, options.debug, expression_folder, default_name="tRNA_expression-") + + print ("\n*************** Finish *******************") + start_time_partial = functions.time_functions.timestamp(start_time_total) + print ("\n+ Exiting tRNA module.") + return() + + +######################################### +def tRNA_analysis(reads, folder, name, threads, soft_list, species, database, Debug): + ## + for soft in soft_list: + if (soft == "mintmap"): + ## create mintmap + MINTmap_folder = functions.files_functions.create_subfolder('mintmap', folder) + code_success = MINTMap_caller.MINTmap_caller(MINTmap_folder, reads, name, threads, species, Debug) + + if not code_success: + print ('** Some error ocurred during MINTmap analysis for sample %s...' %name) + return () + + ## save results in dataframe + filename_amb = os.path.join(MINTmap_folder, 'mintmap_parse', name + '_amb.tsv') + filename_exc = os.path.join(MINTmap_folder, 'mintmap_parse', name + '_exc.tsv') + results_df.loc[len(results_df)] = name, soft, "amb", filename_amb + results_df.loc[len(results_df)] = name, soft, "exc", filename_exc \ No newline at end of file diff --git a/XICRA_pip/XICRA/scripts/MINTMap_caller.py b/XICRA_pip/XICRA/scripts/MINTMap_caller.py index b1c7288..b234f06 100644 --- a/XICRA_pip/XICRA/scripts/MINTMap_caller.py +++ b/XICRA_pip/XICRA/scripts/MINTMap_caller.py @@ -1,157 +1,208 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -#=============================================================================== -# def MINTmap(reads, folder, file_name, num_threads): -# MINTmap = config['EXECUTABLES']['MINTmap_folder'] + 'MINTmap.pl' -# MINTmap_table = config['EXECUTABLES']['MINTmap_folder'] + 'LookupTable.tRFs.MINTmap_v1.txt' -# MINTmap_tRNAseq = config['EXECUTABLES']['MINTmap_folder'] + 'tRNAspace.Spliced.Sequences.MINTmap_v1.fa' -# MINTmap_tRF = config['EXECUTABLES']['MINTmap_folder'] + 'OtherAnnotations.MINTmap_v1.txt' -# MINTmap_MINTplates = config['EXECUTABLES']['MINTmap_folder'] + 'MINTplates/' -# results = [] -# command2sent = [] -# -# ## open file -# output_file = open(file_name, 'a') -# output_file.write("\nMINTmap:\n") -# -# for jread in reads: -# for prefix in prefix_list: -# if paired_end: -# sample_search = re.search(r"(%s)\_(\d{1,2})\_(.*)" % prefix, jread) -# else: -# name_sample = os.path.basename(jread) -# name_dir = os.path.dirname(jread) -# sample_search = re.search(r"(.*)\_trimmed\.fastq", name_sample) -# -# if sample_search: -# if paired_end: -# outdir = sample_search.group(1) + "_" + sample_search.group(2) -# else: -# outdir = sample_search.group(1) -# -# sample_folder = folder + '/' + outdir + '/' -# results.append(sample_folder) -# logfile = sample_folder + outdir + '_logfile.txt' -# if (os.path.isdir(sample_folder)): -# print ('\tMINTmap analysis for sample %s already exists' %outdir) -# else: -# #MINTmap.pl -f trimmedfastqfile [-p outputprefix] [-l lookuptable] [-s tRNAsequences] [-o tRFtypes] [-d customRPM] [-a assembly] [-j MINTplatesPath] [-h] -# fol = functions.create_subfolder(outdir, folder) -# cmd = 'perl '+ MINTmap + ' -f %s -p %s -l %s -s %s -o %s -j %s > %s' %(jread, sample_folder + outdir, MINTmap_table, MINTmap_tRNAseq, MINTmap_tRF, MINTmap_MINTplates, logfile) -# # get command -# command2sent.append(cmd) -# # print into file -# output_file.write(cmd) -# output_file.write('\n') -# -# #sent commands on threads -# output_file.close() -# -# #print ("Commands:") -# #print (len(command2sent)) -# functions.sender(command2sent, num_threads) -# -# results = set(results) ## BUG: if single-end option, it sends as many as prefixes each command -# #print ("results:") -# #print (len(results)) -# -# return results -# ############### -# -# ############### -# def tRFs_analysis(path, count, reads, time_partial, output_file, num_threads): -# ############################## -# ####### Step: sRNAbench ###### -# ############################## -# print ("\n+ Run MINTmap: ") -# name_MINTmap_folder = str(count) + '.1.tRFs_MINTmap' -# MINTmap_folder = functions.create_subfolder(name_MINTmap_folder, path) -# results = MINTmap(reads, MINTmap_folder, output_file, num_threads) -# -# print ("\n+ Get MINTmap matrix: ") -# name_MINTmap_matrix = str(count) + '.2.tRFs_matrix' -# MINTmap_matrix_folder = functions.create_subfolder(name_MINTmap_matrix, path) -# -# for folder in results: -# files = os.listdir(folder) -# for item in files: -# if 'countsmeta' in item: -# continue -# if item.endswith('html'): -# continue -# if 'ambigu' in item: -# parse_tRF(folder, item, MINTmap_matrix_folder, 'ambiguous') -# elif 'exclu' in item: -# parse_tRF(folder, item, MINTmap_matrix_folder, 'exclusive') -# -# ## functions.timestamp -# time_partial = functions.timestamp(time_partial) -# -# ############### -# def parse_tRF(path, fileGiven, matrix_folder, ident): -# pathFile = path + '/' + fileGiven -# sample_search = re.search('(.*)\-MINTmap_v1.*', fileGiven) -# if sample_search: -# sample_name = sample_search.group(1) -# #sample_folder = matrix_folder + '/' + sample_name -# skip = 0 -# tsv_file = matrix_folder + '/' + sample_name + '_' + ident + '.tsv' -# if os.path.isfile(tsv_file): -# print ('\tMatrix for ', sample_name , ' (' + ident + ') is already generated') -# skip = 1 -# if skip == 0: -# ## Open file -# fil = open(tsv_file, 'w') -# string2write = 'type\tsample_name\tident\tname\tvariant\tUID\tseq\texpression\n' -# fil.write(string2write) -# ## Read file -# expression_file = open(pathFile) -# expression_text = expression_file.read() -# expression_lines = expression_text.splitlines() -# for line in expression_lines: -# if not line.startswith('MINTbase'): -# UID = line.split('\t')[0] -# seq = line.split('\t')[1] -# variant = line.split('\t')[2] -# expression = line.split('\t')[3] -# tRNA_name = line.split('\t')[-1].split(',')[0] -# -# # tRF-31-87R8WP9N1EWJ0 TCCCTGGTGGTCTAGTGGTTAGGATTCGGCG 5'-tRF 921 7026.67 452.60 na trna77_GluCTC_6_+_28949976_28950047@1.31.31, trna80_GluCTC_1_-_161417018_161417089@1.31.31 -# tRNA_search = re.search(r"trna.{1,3}\_(.{6})\_(.{1,2})\_.*", tRNA_name) -# tRNA_family = 'na' -# if tRNA_search: -# tRNA_family = tRNA_search.group(1) -# if (tRNA_search.group(2) == 'MT'): -# tRNA_family = tRNA_family + '_MT' -# -# -# string2write = 'tRFs\t'+ sample_name + '\t' + ident + '\t' + tRNA_family +'\t' + variant +'\t' + UID + '\t' + seq + '\t' + expression + '\n' -# fil.write(string2write) -# -# fil.close() -# ############### -#=============================================================================== +#!/usr/bin/env python3 +########################################################## +## Jose F. Sanchez, Marta Lopez & Lauro Sumoy ## +## Copyright (C) 2019-2021 Lauro Sumoy Lab, IGTP, Spain ## +########################################################## +''' +Calls MINTMap to create tRNA profile +''' +## useful imports +import time +import io +import os +import re +import sys +from sys import argv +from io import open +from termcolor import colored + +## import my modules +from HCGB import functions +from XICRA.config import set_config +import HCGB.functions.aesthetics_functions as HCGB_aes + +############################ +def MINTmap_caller(MINTmap_folder, reads, name, num_threads, species, Debug): + # check if previously generated and succeeded + filename_stamp = MINTmap_folder + '/.success_all' + if os.path.isfile(filename_stamp): + stamp = functions.time_functions.read_time_stamp(filename_stamp) + print (colored("\tA previous command generated results on: %s [%s -- %s]" %(stamp, name, 'MINTmap'), 'yellow')) + return True + + else: + # Call MINTMap_analysis + code_returned = MINTMap_analysis(MINTmap_folder, reads, name, num_threads, species, Debug) + if code_returned: + functions.time_functions.print_time_stamp(filename_stamp) + return True + else: + print ('** Sample %s failed...' %name) + return(False) + +############################ +def MINTMap_analysis(path_folder, reads, name, num_threads, species, Debug): + + ## check species + species_code="" + if species=="hsa": + species_code="default" + else: + print (colored("** ERROR: Not available yet. No mapping bundle available for species " + species, 'red')) + exit() + + ## debug messages + if Debug: + HCGB_aes.debug_message("species: " + species, "yellow") + HCGB_aes.debug_message("species_code: " + species_code, "yellow") + + ## ATTENTION: MINTmap needs to chdir to output folder + path_here = os.getcwd() + + ## debug messages + if Debug: + HCGB_aes.debug_message("path_here: " + path_here, "yellow") + + filename_stamp = path_folder + '/.success_mintmap' + if os.path.isfile(filename_stamp): + stamp = functions.time_functions.read_time_stamp(filename_stamp) + print (colored("\tA previous command generated results on: %s [%s -- %s]" %(stamp, name, 'MINTmap call'), 'yellow')) + else: + # Call MINTMap_analysis + print ("\n+ Run MINTmap: ") + codeReturn = MINTmap(reads, path_folder, name, num_threads, species_code, Debug) + os.chdir(path_here) + + if not codeReturn: + print ('** Sample %s failed...' %name) + return False + + ## create time stamp + functions.time_functions.print_time_stamp(filename_stamp) + + ## Get MINTmap matrix + MINTmap_matrix_folder = functions.files_functions.create_subfolder("mintmap_parse", path_folder) + + files = os.listdir(path_folder) + for item in files: + abs_path_file = os.path.abspath(os.path.join(path_folder, item)) + + ## debug messages + if Debug: + HCGB_aes.debug_message("abs_path_file: " + abs_path_file, "yellow") + + ## parse them + if 'countsmeta' in item: + continue + if item.endswith('html'): + continue + if 'ambigu' in item: + amb_file = parse_tRF(abs_path_file, name, MINTmap_matrix_folder, 'amb', Debug) + elif 'exclu' in item: + exc_file = parse_tRF(abs_path_file, name, MINTmap_matrix_folder, 'exc', Debug) + + ## debug messages + if Debug: + HCGB_aes.debug_message("exc_file: " + exc_file, "yellow") + HCGB_aes.debug_message("amb_file: " + amb_file, "yellow") + + + if functions.files_functions.is_non_zero_file(amb_file) and functions.files_functions.is_non_zero_file(exc_file): + filename_stamp = path_folder + '/.success_all' + functions.time_functions.print_time_stamp(filename_stamp) + + + return(True) + +############## +def parse_tRF(pathFile, sample_name, matrix_folder, ident, Debug): + + ## tsv file name + tsv_file = os.path.join(matrix_folder, sample_name + "_" + ident + '.tsv') + + ## time stamp + filename_stamp = matrix_folder + '/.success_parse' + if os.path.isfile(filename_stamp) and functions.files_functions.is_non_zero_file(tsv_file): + stamp = functions.time_functions.read_time_stamp(filename_stamp) + print (colored("\tA previous command generated results on: %s [%s -- %s]" %(stamp, sample_name, 'MINTmap - parse'), 'yellow')) + else: + ## Open file + fil = open(tsv_file, 'w') + string2write = 'UID\tRead\ttRNA\tvariant\tident\texpression\tsoft\n' + fil.write(string2write) + ## Read file + expression_file = open(pathFile) + expression_text = expression_file.read() + expression_lines = expression_text.splitlines() + + ## debug messages + if Debug: + HCGB_aes.debug_message("MINTmap file: " + pathFile, "yellow") + + + for line in expression_lines: + + # ------------------------------ # + # Example line: + # tRF-31-87R8WP9N1EWJ0 TCCCTGGTGGTCTAGTGGTTAGGATTCGGCG 5'-tRF 921 7026.67 452.60 na trna77_GluCTC_6_+_28949976_28950047@1.31.31, trna80_GluCTC_1_-_161417018_161417089@1.31.31 + # ------------------------------ # + + if not line.startswith('#'): + if not line.startswith('License Plate'): + UID = line.split('\t')[0] ## License Plate + seq = line.split('\t')[1] ## tRF sequence + variant = line.split('\t')[2] ## tRF type + expression = line.split('\t')[3] ## unnormalized counts + ### there are other RPM counts taking into account several things such as total base pairs, reads, etc. We would use raw counts + + ## Get tRNA name + tRNA_name = line.split('\t')[-1].split(',')[0] + tRNA_search = re.search(r"trna.{1,3}\_(.{6})\_(.{1,2})\_.*", tRNA_name) + tRNA_family = 'na' + if tRNA_search: + tRNA_family = tRNA_search.group(1) + if (tRNA_search.group(2) == 'MT'): + tRNA_family = tRNA_family + '_MT' + + + string2write = UID + '\t' + seq + '\t' + tRNA_family +'\t' + variant +'\t' + ident + '\t' + expression + '\tmintmap\n' + + ## debug messages + if Debug: + HCGB_aes.debug_message(string2write, "yellow") + + fil.write(string2write) + + fil.close() + + return(tsv_file) + +############## +def MINTmap(reads, outpath, name, num_threads, species_code, Debug): + + outpath = os.path.abspath(outpath) + functions.files_functions.create_folder(outpath) + + ## change path where the results are required + os.chdir(outpath) + + mintmap_exe = set_config.get_exe("MINTmap", Debug=Debug) + logfile = os.path.join(outpath, 'MINTmap.log') + + ## output + outpath_file = os.path.join(outpath, name) + + if (len(reads) > 1): + print (colored("** ERROR: Only 1 fastq file is allowed please joined reads before...", 'red')) + exit() + + ## species bundle + if species_code == "default": + ## create command: use default mapping bundle provided with MINTmap + cmd = '%s -p %s %s 2> %s' %(mintmap_exe, name, reads[0], logfile) + else: + ## create command: use specific mapping bundle path + cmd = '%s -p %s -m %s %s 2> %s' %(mintmap_exe, name, species_code, reads[0], logfile) + + return(functions.system_call_functions.system_call(cmd)) + \ No newline at end of file diff --git a/XICRA_pip/XICRA/scripts/generate_DE.py b/XICRA_pip/XICRA/scripts/generate_DE.py index 3cf8769..50fc48c 100644 --- a/XICRA_pip/XICRA/scripts/generate_DE.py +++ b/XICRA_pip/XICRA/scripts/generate_DE.py @@ -16,13 +16,15 @@ from HCGB import functions #################### -def generate_DE(dataframe_results, Debug, outfolder): +def generate_DE(dataframe_results, Debug, outfolder, default_name='miRNA_expression-'): """Builds final expression matrices comparing all samples. Generates three .csv for each software used: miRNA_expression-soft_name_dup.csv: counts with duplicated reads for each sample miRNA_expression-soft_name.csv: final matrix, counts of each isomiR (with miRNA and variant info) without duplicated reads - miRNA_expression-soft_name_seq.csv: table with the miRTop identifier and the corresponding DNA sequence + miRNA_expression-soft_name_seq.csv: table with the miRTop identifier and the corresponding DNA sequence + + miRNA_expression- is the default analysis but other can be provided such as tRNA, piRNA, etc :param dataframe_results: dataframe with the paths of the outputs of each sample and software :param Debug: display complete log @@ -61,7 +63,7 @@ def generate_DE(dataframe_results, Debug, outfolder): all_data_filtered, all_data_duplicated = discard_UID_duplicated(all_data) ## dump data in folder provided - csv_outfile = os.path.join(outfolder, 'miRNA_expression-' + soft_name) + csv_outfile = os.path.join(outfolder, default_name, + soft_name) all_data_filtered.to_csv(csv_outfile + ".csv", quoting=csv.QUOTE_NONNUMERIC) all_data_duplicated.to_csv(csv_outfile + '_dup.csv', quoting=csv.QUOTE_NONNUMERIC) all_seqs.to_csv(csv_outfile + '_seq.csv', quoting=csv.QUOTE_NONNUMERIC) @@ -115,7 +117,7 @@ def discard_UID_duplicated(df_data): return (clean_data_expression, duplicates_expression) #################### -def generate_matrix(dict_files, soft_name, Debug): +def generate_matrix(dict_files, soft_name, Debug, type_analysis="miRNA"): """ """ ###################################################### @@ -144,33 +146,57 @@ def generate_matrix(dict_files, soft_name, Debug): print ('\t - Information not available for sample: ', sample) continue - ## get info, generate unique name and merge for samples - ## header of tsv files: - ## UID Read miRNA Variant iso_5p iso_3p iso_add3p iso_snp sRNAbench - - data['Variant'].fillna('NA', inplace=True) - data['unique_id'] = data.apply(lambda data: data['miRNA'] + '&' + data['Variant'] + '&' + data['UID'], axis=1) - - ## parse according to software - if (soft_name == 'srnabench'): - ## sRNAbench mirtop creates a column id with sRNAbench instead of sample name - new_data = data.filter(['unique_id', 'sRNAbench'], axis=1) - new_data = new_data.set_index('unique_id') - new_data = new_data.rename(columns={'sRNAbench': sample}) - - if (soft_name == 'optimir'): - ## OptimiR mirtop creates a column containing sample name and other tags (trim, joined, fastq...) - regex=re.compile(sample + '.*') - search_list = list(filter(regex.match, data.columns.values.tolist())) - new_data = data.filter(['unique_id', search_list[0]], axis=1) - new_data = new_data.set_index('unique_id') - new_data = new_data.rename(columns={search_list[0]: sample}) - - if (soft_name == 'miraligner'): - ## miraligner mirtop creates a column containing sample name and other tags (trim, joined, fastq...) - new_data = data.filter(['unique_id', sample], axis=1) - new_data = new_data.set_index('unique_id') + #### + if type_analysis=="miRNA": + + ## ------------------------------------------ ## + ## Create matrix for miRNA results + ## ------------------------------------------ ## + + ## get info, generate unique name and merge for samples + ## header of tsv files: + ## UID Read miRNA Variant iso_5p iso_3p iso_add3p iso_snp sRNAbench + + data['Variant'].fillna('NA', inplace=True) + data['unique_id'] = data.apply(lambda data: data['miRNA'] + '&' + data['Variant'] + '&' + data['UID'], axis=1) + + ## parse according to software + if (soft_name == 'srnabench'): + ## sRNAbench mirtop creates a column id with sRNAbench instead of sample name + new_data = data.filter(['unique_id', 'sRNAbench'], axis=1) + new_data = new_data.set_index('unique_id') + new_data = new_data.rename(columns={'sRNAbench': sample}) + + if (soft_name == 'optimir'): + ## OptimiR mirtop creates a column containing sample name and other tags (trim, joined, fastq...) + regex=re.compile(sample + '.*') + search_list = list(filter(regex.match, data.columns.values.tolist())) + new_data = data.filter(['unique_id', search_list[0]], axis=1) + new_data = new_data.set_index('unique_id') + new_data = new_data.rename(columns={search_list[0]: sample}) + + if (soft_name == 'miraligner'): + ## miraligner mirtop creates a column containing sample name and other tags (trim, joined, fastq...) + new_data = data.filter(['unique_id', sample], axis=1) + new_data = new_data.set_index('unique_id') + + #### + elif type_analysis=="tRNA": + ## ------------------------------------------ ## + ## Create matrix for tRNA results + ## ------------------------------------------ ## + ## UID Read tRNA variant ident expression soft\n' + + data['variant'].fillna('NA', inplace=True) + data['unique_id'] = data.apply(lambda data: data['tRNA'] + '&' + data['variant'] + '&' + data['UID'], axis=1) + + ## parse according to software + if (soft_name == 'mintmap'): + new_data = data.filter(['unique_id', 'mintmap'], axis=1) + new_data = new_data.set_index('unique_id') + new_data = new_data.rename(columns={'mintmap': sample}) + ## sequence information seq_data = data.filter(['UID', 'Read'], axis=1) seq_data = seq_data.set_index('UID') diff --git a/XICRA_pip/main/XICRA b/XICRA_pip/main/XICRA index b1fc8d5..3018ef8 100644 --- a/XICRA_pip/main/XICRA +++ b/XICRA_pip/main/XICRA @@ -299,6 +299,7 @@ in_out_group_tRNA.add_argument("--noTrim", action='store_true', help="Use non-tr options_group_tRNA = subparser_tRNA.add_argument_group("Options") options_group_tRNA.add_argument("-t", "--threads", type=int, help="Number of CPUs to use [Default: 2].", default=2) options_group_tRNA.add_argument("--species", help="Species tag ID [Default: hsa (Homo sapiens)].", default='hsa') +options_group_tRNA.add_argument("--database", help="Path to store tRNA annotation files downloaded: GtRNAdb, etc") software_group_tRNA = subparser_tRNA.add_argument_group("Software") software_group_tRNA.add_argument("--software", dest='soft_name', nargs='*',