Skip to content

Commit

Permalink
add MINTmap analysis in XICRA
Browse files Browse the repository at this point in the history
  • Loading branch information
JFsanchezherrero committed Oct 18, 2021
1 parent ad21169 commit cb16406
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 190 deletions.
5 changes: 3 additions & 2 deletions XICRA_pip/XICRA/config/software/dependencies.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ Rscript,--version,.*version\s([0-9\.]+).*,3.5.1,Rscript
java,,,na,java
python,--version,([0-9\.]+),3.6,python
perl,--version,v([0-9]\.[0-9]+\.[0-9]),5.18.1,perl
make,--version,GNU Make ([0-9]\.[0-9].*),4.0,make
make,--version,GNU Make ([0-9]\.[0-9].*),4,make
git,--version,git version ([0-9]\.[0-9]+\.[0-9]),2.1.0,git
fastqjoin,,,na,fastq-join
sRNAbench,-h,([0-9\.]+),1.5,sRNAbench.jar
miRTop,--version,([0-9\.]+).*,0.4.23,mirtop
optimir,,,na,optimir
miraligner,,,na,miraligner.jar
STAR,--version,([0-9\.]+).*,2.6.1,STAR
featureCounts,-v,v([0-9\.]+).*,1.5.1,featureCounts
featureCounts,-v,v([0-9\.]+).*,1.5.1,featureCounts
MINTmap,,,na,MINTmap
97 changes: 96 additions & 1 deletion XICRA_pip/XICRA/modules/tRNA.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from XICRA.modules import help_XICRA
from XICRA.scripts import generate_DE
from XICRA.scripts import MINTMap_caller
from HCGB.functions import fasta_functions

##############################################
def run_tRNA(options):
Expand Down Expand Up @@ -116,6 +115,102 @@ def run_tRNA(options):
## species
print ("+ Species provided:", options.species)

## set database path if necessary
if not (options.database):
install_path = os.path.dirname(os.path.realpath(__file__))
options.database = os.path.join(install_path, "db_files")
else:
options.database = os.path.abspath(options.database)

## generate output folder, if necessary
if not options.project:
print ("\n+ Create output folder(s):")
functions.files_functions.create_folder(outdir)

## for samples
outdir_dict = functions.files_functions.outdir_project(outdir, options.project, pd_samples_retrieved, "tRNA", options.debug)

## optimize threads
name_list = set(pd_samples_retrieved["new_name"].tolist())
threads_job = functions.main_functions.optimize_threads(options.threads, len(name_list)) ## threads optimization
max_workers_int = int(options.threads/threads_job)

## to FIX: MINTmap requires to chdir to folder to create results
max_workers_int = 1

## debug message
if (Debug):
print (colored("**DEBUG: options.threads " + str(options.threads) + " **", 'yellow'))
print (colored("**DEBUG: max_workers " + str(max_workers_int) + " **", 'yellow'))
print (colored("**DEBUG: cpu_here " + str(threads_job) + " **", 'yellow'))

print ("+ Create a tRNA analysis for each sample retrieved...")

## call tRNA_analysis:
## Get user software selection: mintmap, ...

## dictionary results
global results_df
results_df = pd.DataFrame(columns=("name", "soft", "type", "filename"))

# Group dataframe by sample name
sample_frame = pd_samples_retrieved.groupby(["new_name"])

## send for each sample
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_int) as executor:
commandsSent = { executor.submit(tRNA_analysis,
sorted(cluster["sample"].tolist()),
outdir_dict[name], name, threads_job,
options.soft_name, options.species,
options.database, Debug): name for name, cluster in sample_frame }

for cmd2 in concurrent.futures.as_completed(commandsSent):
details = commandsSent[cmd2]
try:
data = cmd2.result()
except Exception as exc:
print ('***ERROR:')
print (cmd2)
print('%r generated an exception: %s' % (details, exc))

print ("\n\n+ tRNA analysis is finished...")
print ("+ Let's summarize all results...")

## outdir
outdir_report = functions.files_functions.create_subfolder("report", outdir)
expression_folder = functions.files_functions.create_subfolder("tRNA", outdir_report)

## debugging messages
if options.debug:
print (results_df)

## merge all parse gtf files created
print ("+ Summarize tRNA analysis for all samples...")
generate_DE.generate_DE(results_df, options.debug, expression_folder, default_name="tRNA_expression-")

print ("\n*************** Finish *******************")
start_time_partial = functions.time_functions.timestamp(start_time_total)
print ("\n+ Exiting tRNA module.")
return()


#########################################
def tRNA_analysis(reads, folder, name, threads, soft_list, species, database, Debug):

##
for soft in soft_list:
if (soft == "mintmap"):
## create mintmap
MINTmap_folder = functions.files_functions.create_subfolder('mintmap', folder)
code_success = MINTMap_caller.MINTmap_caller(MINTmap_folder, reads, name, threads, species, Debug)

if not code_success:
print ('** Some error ocurred during MINTmap analysis for sample %s...' %name)
return ()

## save results in dataframe
filename_amb = os.path.join(MINTmap_folder, 'mintmap_parse', name + '_amb.tsv')
filename_exc = os.path.join(MINTmap_folder, 'mintmap_parse', name + '_exc.tsv')
results_df.loc[len(results_df)] = name, soft, "amb", filename_amb
results_df.loc[len(results_df)] = name, soft, "exc", filename_exc

Loading

0 comments on commit cb16406

Please sign in to comment.