diff --git a/XICRA_pip/XICRA/config/python/python_requirement_summary.txt b/XICRA_pip/XICRA/config/python/python_requirement_summary.txt index 122d672..f189a1d 100644 --- a/XICRA_pip/XICRA/config/python/python_requirement_summary.txt +++ b/XICRA_pip/XICRA/config/python/python_requirement_summary.txt @@ -10,4 +10,4 @@ HCGB future networkx numpy -gitdir +gitdir \ No newline at end of file diff --git a/XICRA_pip/XICRA/modules/citation.py b/XICRA_pip/XICRA/modules/citation.py index b5814b5..9fb13ab 100644 --- a/XICRA_pip/XICRA/modules/citation.py +++ b/XICRA_pip/XICRA/modules/citation.py @@ -1,12 +1,6 @@ #usr/bin/env python ## useful imports -import time -import io -import os - -## import functions -from HCGB import functions ################ def run(options): diff --git a/XICRA_pip/XICRA/modules/config.py b/XICRA_pip/XICRA/modules/config.py index 57ed3d8..99cb8e3 100644 --- a/XICRA_pip/XICRA/modules/config.py +++ b/XICRA_pip/XICRA/modules/config.py @@ -5,9 +5,6 @@ ########################################################## ## useful imports -import time -import io -import os import sys from termcolor import colored from distutils.version import LooseVersion @@ -21,7 +18,6 @@ ################ def run_config(options): ## init time - start_time_total = time.time() ## debugging messages global Debug diff --git a/XICRA_pip/XICRA/modules/database.py b/XICRA_pip/XICRA/modules/database.py index 1b1f314..5db9edc 100644 --- a/XICRA_pip/XICRA/modules/database.py +++ b/XICRA_pip/XICRA/modules/database.py @@ -15,24 +15,15 @@ import os import sys import re -import time from io import open -import shutil -import concurrent.futures -import pandas as pd from termcolor import colored ## import my modules -from HCGB import sampleParser -from HCGB import functions -import HCGB.functions.main_functions as HCGB_main -import HCGB.functions.files_functions as HCGB_files import HCGB.functions.aesthetics_functions as HCGB_aes +import HCGB.functions.files_functions as HCGB_files +import HCGB.functions.main_functions as HCGB_main +import HCGB.functions.time_functions as HCGB_time -from XICRA.config import set_config -from XICRA.modules import help_XICRA -from XICRA.scripts import generate_DE -from XICRA.scripts import MINTMap_caller ############################################## def run_db(options): @@ -49,10 +40,10 @@ def miRNA_db(options): Debug = options.debug options.miRNA_db = os.path.join(options.database, "miRNA_db") - functions.files_functions.create_folder(options.miRNA_db) + HCGB_files.create_folder(options.miRNA_db) ## First check if already provided files - list_files = functions.main_functions.get_fullpath_list(options.miRNA_db, options.debug) + list_files = HCGB_main.get_fullpath_list(options.miRNA_db, options.debug) ## Check for files from miRBase: miRBase_files = ["hsa.gff3", "hairpin.fa", "mature.fa", "miRNA.str"] @@ -74,8 +65,8 @@ def miRNA_db(options): download_data=True miRBase_files_dict[file_req] = "" else: - file_retrieved = functions.main_functions.retrieve_matching_files(options.miRNA_db, file_req, options.debug, starts=False) - if functions.files_functions.is_non_zero_file(file_retrieved[0]): + file_retrieved = HCGB_main.retrieve_matching_files(options.miRNA_db, file_req, options.debug, starts=False) + if HCGB_main.is_non_zero_file(file_retrieved[0]): miRBase_files_dict[file_req] = file_retrieved[0] else: miRBase_files_dict[file_req] = "" @@ -98,7 +89,7 @@ def miRNA_db(options): print (colored("\t** Download it form miRBase", 'green')) file_name = options.species + ".gff3" ftp_site1 = "https://www.mirbase.org/ftp/CURRENT/genomes/" - options.miRNA_gff = functions.main_functions.urllib_request(options.miRNA_db, ftp_site1, file_name, Debug) + options.miRNA_gff = HCGB_main.urllib_request(options.miRNA_db, ftp_site1, file_name, Debug) else: if (options.miRNA_gff): @@ -116,7 +107,7 @@ def miRNA_db(options): if Debug: print (colored("\t** ATTENTION: No hairpin fasta file provided", 'yellow')) print (colored("\t** Download it form miRBase", 'green')) - options.hairpinFasta = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "hairpin.fa.gz", Debug) + options.hairpinFasta = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "hairpin.fa.gz", Debug) else: if (options.hairpinFasta): @@ -135,7 +126,7 @@ def miRNA_db(options): if Debug: print (colored("\t** ATTENTION: No mature miRNA fasta file provided", 'yellow')) print (colored("\t** Download it form miRBase", 'green')) - options.matureFasta = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "mature.fa.gz", Debug) + options.matureFasta = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "mature.fa.gz", Debug) else: if (options.matureFasta): @@ -153,7 +144,7 @@ def miRNA_db(options): if Debug: print (colored("\t** ATTENTION: No miRBase_str file provided", 'yellow')) print (colored("\t** Download it form miRBase", 'green')) - options.miRBase_str = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "miRNA.str.gz", Debug) + options.miRBase_str = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "miRNA.str.gz", Debug) ## extract else: @@ -178,7 +169,7 @@ def tRNA_db(database, debug): # We can try to check if it correctly generated... ## TODO print ("+ Create folder to store several databases: ", database) - functions.files_functions.create_folder(database) + HCGB_files.create_folder(database) tRNA_db = os.path.join(database, "tRNA_db") if os.path.isdir(tRNA_db): @@ -188,7 +179,7 @@ def tRNA_db(database, debug): ## If missing, download them, if all files ok, return! print ("+ Create folder to store tRNA information: ", tRNA_db) - functions.files_functions.create_folder(tRNA_db) + HCGB_files.create_folder(tRNA_db) @@ -196,7 +187,7 @@ def tRNA_db(database, debug): return (tRNA_db) ############################################## -def check_tRNA_db(path2test): +def check_tRNA_db(path2test, debug=False): ## Check for # LookupTable.tRFs.MINTmap_v2.txt # OtherAnnotations.MINTmap_v2.txt @@ -204,9 +195,9 @@ def check_tRNA_db(path2test): # tables.cfg ## First check if already provided files - list_files = functions.main_functions.get_fullpath_list(path2test, debug) + list_files = HCGB_main.get_fullpath_list(path2test, debug) - print() + print(list_files) return True ############################################## @@ -214,16 +205,16 @@ def piRNA_db(database, piRNA_db, debug): print ("+ Create folder to store several databases: ", database) - functions.files_functions.create_folder(database) + HCGB_files.create_folder(database) if not piRNA_db: piRNA_db = os.path.join(database, "piRNA_db") print ("+ Create folder to store piRNA information: ", piRNA_db, debug) - functions.files_functions.create_folder(piRNA_db) + HCGB_files.create_folder(piRNA_db) ## First check if already provided files - list_files = functions.main_functions.get_fullpath_list(piRNA_db, debug) + list_files = HCGB_main.get_fullpath_list(piRNA_db, debug) ## Check for files: @@ -354,7 +345,7 @@ def piRNA_info(database_folder, species_name="hsa", Debug=False): ## check if previously trimmed and succeeded if os.path.isfile(ncRNA_timestamp): - stamp = functions.time_functions.read_time_stamp(ncRNA_timestamp) + stamp = HCGB_time.read_time_stamp(ncRNA_timestamp) print (colored("\tA previous command generated results on: %s [%s]" %(stamp, 'merged ncRNA'), 'yellow')) else: diff --git a/XICRA_pip/XICRA/modules/help_XICRA.py b/XICRA_pip/XICRA/modules/help_XICRA.py index 98f64cf..f892084 100644 --- a/XICRA_pip/XICRA/modules/help_XICRA.py +++ b/XICRA_pip/XICRA/modules/help_XICRA.py @@ -7,7 +7,8 @@ Help messages for different scripts, modules """ from termcolor import colored -from HCGB import functions +import HCGB.functions.aesthetics_functions as HCGB_aes + ############### def help_fastq_format(): @@ -15,7 +16,7 @@ def help_fastq_format(): Explanation of fastq format details. """ - functions.aesthetics_functions.boxymcboxface("Name format for samples") + HCGB_aes.boxymcboxface("Name format for samples") print ("Format for fastq files can be:\n") @@ -29,23 +30,23 @@ def help_fastq_format(): print ("\nThe input file names should be structured considering the following aspects:") print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Length limitation") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("There is a limitation for the sample ID ('name') of 25 characters.") print (colored("** XICRA provides an option to rename samples if necessary: module prep option --rename **", 'yellow')) print ("\n") - functions.aesthetics_functions.print_sepLine("*",15,"red") + HCGB_aes.print_sepLine("*",15,"red") print ("Extensions:") - functions.aesthetics_functions.print_sepLine("*",15,"red") + HCGB_aes.print_sepLine("*",15,"red") print("The suported extensions are:\n") print ("- name_L00x_R2.fastq\tname_L00x_R2.fq\n- name_L00x_R2.fastq.gz\tname_L00x_R2.fq.gz") print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Single-end files") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print("It is possible to provide NGS single-end files although some steps of the process could not be accomplished") print("using single-end files.\n") print ("- name.fastq.gz") @@ -54,27 +55,27 @@ def help_fastq_format(): print (colored('** Use option --single-end in the different XICRA modules. **', 'yellow')) print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Paired-end files") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Paired-end files are full supported. The format for these files are:\n") print ("- name_1.fastq.gz, name_2.fastq.gz") print ("- name_R1.fastq.gz, name_R2.fastq.gz") print (colored('** No parameter is needed in to specify this kind of files. **', 'yellow')) print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Lane information") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print("Files might contain lane information (L00x and/or 00x). XICRA") print("supports these names as long as follow these examples:") print("- name_L00x_R1.fastq.gz, name_L00x_R2.fastq.gz") print("- name_L00x_1.fastq.gz, name_L00x_2.fastq.gz") print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Name extensions") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print("It can also be the case that the reads of a sample are divided in different files.") print("In those cases, the files should contain a name final extension: ") print("- name1_L001_R1_001.fastq.gz, name1_L001_R2_001.fastq.gz") @@ -83,16 +84,16 @@ def help_fastq_format(): print("- name1_L002_R1_002.fastq.gz, name1_L002_R2_002.fastq.gz") print ("\n") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print ("Extra information") - functions.aesthetics_functions.print_sepLine("*",20,"red") + HCGB_aes.print_sepLine("*",20,"red") print("In some cases, files might contain other extra information. In the following example,") print("XYZ is the extra information:") print("- name1_L001_XYZ_R1_001.fastq.gz, name1_L001_XYZ_R2_001.fastq.gz") print("- name1_L001_XYZ_R1_002.fastq.gz, name1_L001_XYZ_R2_002.fastq.gz") print ("\n") - functions.aesthetics_functions.boxymcboxface("Sample identification") + HCGB_aes.boxymcboxface("Sample identification") print ("XICRA will store the names of all the input files. After that, it will identify the samples.") print ("It can be the case that more than one file belong to the same sample. In order to pass this information") @@ -100,9 +101,9 @@ def help_fastq_format(): print ("input file names:") print ("\n") - functions.aesthetics_functions.print_sepLine("*",55,"red") + HCGB_aes.print_sepLine("*",55,"red") print ("Option --include_lane:") - functions.aesthetics_functions.print_sepLine("*",55,"red") + HCGB_aes.print_sepLine("*",55,"red") print ("If you want to include lane tags (L00X, 00X) into each each sample name (differentiate samples considering the lane):") print (colored('** Use option --include_lane within each module and the lane tag will also be used to identify samples. **\n', 'yellow')) print("However, if you want to consider as a single sample the different lanes, you need to merge the") @@ -127,9 +128,9 @@ def help_fastq_format(): print("\t identify one sample, merging all the corresponding files:") print("\t - Sample 1: sample1_R1, sample1_R2\n") - functions.aesthetics_functions.print_sepLine("*",55,"red") + HCGB_aes.print_sepLine("*",55,"red") print ("Option --include_all:") - functions.aesthetics_functions.print_sepLine("*",55,"red") + HCGB_aes.print_sepLine("*",55,"red") print("In some cases, files might contain other extra information and it is necessary to use all") print("file name to identify samples:") print (colored('** If that is the case use --include_all in al modules. **\n', 'yellow')) diff --git a/XICRA_pip/XICRA/modules/map.py b/XICRA_pip/XICRA/modules/map.py index 028c076..3f2a163 100644 --- a/XICRA_pip/XICRA/modules/map.py +++ b/XICRA_pip/XICRA/modules/map.py @@ -16,7 +16,8 @@ from XICRA.scripts import STAR_caller, multiQC_report from HCGB import sampleParser -from HCGB.functions import time_functions, files_functions +import HCGB.functions.time_functions as HCGB_time +import HCGB.functions.files_functions as HCGB_files ## set to use as a module ## allow multiple software to map @@ -65,7 +66,7 @@ def mapReads_module_STAR(options, pd_samples_retrieved, outdir_dict, Debug, ## options STAR_exe = set_config.get_exe("STAR", Debug=Debug) cwd_folder = os.path.abspath("./") - folder=files_functions.create_subfolder('STAR_files', cwd_folder) + folder=HCGB_files.create_subfolder('STAR_files', cwd_folder) ## For many samples it will have to load genome index in memory every time. ## For a unique sample it will not matter. Take care genome might stay in memory. @@ -95,7 +96,7 @@ def mapReads_module_STAR(options, pd_samples_retrieved, outdir_dict, Debug, #STAR_caller.load_Genome(folder, STAR_exe, options.genomeDir, options.threads) ## functions.time_functions.timestamp - start_time_partial = time_functions.timestamp(start_time_partial) + start_time_partial = HCGB_time.timestamp(start_time_partial) print ("+ Mapping sequencing reads for each sample retrieved...") @@ -118,13 +119,13 @@ def mapReads_module_STAR(options, pd_samples_retrieved, outdir_dict, Debug, print ("\n\n+ Mapping reads has finished...") ## functions.time_functions.timestamp - start_time_partial = time_functions.timestamp(start_time_partial) + start_time_partial = HCGB_time.timestamp(start_time_partial) ## remove reference genome from memory #STAR_caller.remove_Genome(STAR_exe, options.genomeDir, folder, options.threads) ## functions.time_functions.timestamp - start_time_partial = time_functions.timestamp(start_time_partial) + start_time_partial = HCGB_time.timestamp(start_time_partial) ## retrieve mapping files if options.detached: @@ -146,7 +147,7 @@ def mapReads_module_STAR(options, pd_samples_retrieved, outdir_dict, Debug, print ("+ No report generation...") else: print ("\n+ Generating a report using MultiQC module.") - outdir_report = files_functions.create_subfolder("report", outdir) + outdir_report = HCGB_files.create_subfolder("report", outdir) ## get subdirs generated and call multiQC report module givenList = [] @@ -162,7 +163,7 @@ def mapReads_module_STAR(options, pd_samples_retrieved, outdir_dict, Debug, print (my_outdir_list) print ("\n") - map_report = files_functions.create_subfolder("STAR", outdir_report) + map_report = HCGB_files.create_subfolder("STAR", outdir_report) multiQC_report.multiQC_module_call(my_outdir_list, "STAR", map_report,"-dd 2") print ('\n+ A summary HTML report of each sample is generated in folder: %s' %map_report) @@ -202,7 +203,7 @@ def mapReads_caller_STAR(files, folder, name, threads, STAR_exe, genomeDir, limi ## check if previously joined and succeeded filename_stamp = folder + '/.success' if os.path.isfile(filename_stamp): - stamp = time_functions.read_time_stamp(filename_stamp) + stamp = HCGB_time.read_time_stamp(filename_stamp) print (colored("\tA previous command generated results on: %s [%s -- %s]" %(stamp, name, 'STAR'), 'yellow')) else: ## @@ -221,7 +222,7 @@ def mapReads_caller_STAR(files, folder, name, threads, STAR_exe, genomeDir, limi code_returned = STAR_caller.mapReads("LoadAndKeep", files, folder, name, STAR_exe, genomeDir, limitRAM_option, threads, Debug, multimapping) if (code_returned): - time_functions.print_time_stamp(filename_stamp) + HCGB_time.print_time_stamp(filename_stamp) else: print ("+ Mapping sample %s failed..." %name) diff --git a/XICRA_pip/XICRA/modules/tRNA.py b/XICRA_pip/XICRA/modules/tRNA.py index 0335a1e..dde4033 100644 --- a/XICRA_pip/XICRA/modules/tRNA.py +++ b/XICRA_pip/XICRA/modules/tRNA.py @@ -18,7 +18,12 @@ ## import my modules from HCGB import sampleParser -from HCGB import functions +import HCGB.functions.aesthetics_functions as HCGB_aes +import HCGB.functions.time_functions as HCGB_time +import HCGB.functions.files_functions as HCGB_files +import HCGB.functions.info_functions as HCGB_info +import HCGB.functions.main_functions as HCGB_main + from XICRA.modules import help_XICRA from XICRA.scripts import generate_DE from XICRA.scripts import MINTMap_caller @@ -59,10 +64,10 @@ def run_tRNA(options): else: options.pair = True - functions.aesthetics_functions.pipeline_header('XICRA') - functions.aesthetics_functions.boxymcboxface("tRNA analysis") + HCGB_aes.pipeline_header('XICRA') + HCGB_aes.boxymcboxface("tRNA analysis") print ("--------- Starting Process ---------") - functions.time_functions.print_time() + HCGB_time.print_time() ## absolute path for in & out input_dir = os.path.abspath(options.input) @@ -120,7 +125,7 @@ def run_tRNA(options): options.database = os.path.abspath(options.database) print ("+ Create folder to store results: ", options.database) - functions.files_functions.create_folder(options.database) + HCGB_files.create_folder(options.database) ## TODO ## use -m option with database provided @@ -134,14 +139,14 @@ def run_tRNA(options): ## generate output folder, if necessary if not options.project: print ("\n+ Create output folder(s):") - functions.files_functions.create_folder(outdir) + HCGB_files.create_folder(outdir) ## for samples - outdir_dict = functions.files_functions.outdir_project(outdir, options.project, pd_samples_retrieved, "tRNA", options.debug) + outdir_dict = HCGB_files.outdir_project(outdir, options.project, pd_samples_retrieved, "tRNA", options.debug) ## optimize threads name_list = set(pd_samples_retrieved["new_name"].tolist()) - threads_job = functions.main_functions.optimize_threads(options.threads, len(name_list)) ## threads optimization + threads_job = HCGB_main.optimize_threads(options.threads, len(name_list)) ## threads optimization max_workers_int = int(options.threads/threads_job) ## to FIX: MINTmap requires to chdir to folder to create results @@ -187,8 +192,8 @@ def run_tRNA(options): print ("+ Let's summarize all results...") ## outdir - outdir_report = functions.files_functions.create_subfolder("report", outdir) - expression_folder = functions.files_functions.create_subfolder("tRNA", outdir_report) + outdir_report = HCGB_files.create_subfolder("report", outdir) + expression_folder = HCGB_files.create_subfolder("tRNA", outdir_report) ## debugging messages if options.debug: @@ -213,7 +218,7 @@ def run_tRNA(options): generate_DE.generate_DE(results_df, options.debug, expression_folder, type_analysis="tRNA") print ("\n*************** Finish *******************") - functions.time_functions.timestamp(start_time_total) + HCGB_time.timestamp(start_time_total) print ("\n+ Exiting tRNA module.") return() @@ -225,7 +230,7 @@ def tRNA_analysis(reads, folder, name, threads, soft_list, species, database, De for soft in soft_list: if (soft == "mintmap"): ## create mintmap - MINTmap_folder = functions.files_functions.create_subfolder('mintmap', folder) + MINTmap_folder = HCGB_files.create_subfolder('mintmap', folder) code_success = MINTMap_caller.MINTmap_caller(MINTmap_folder, reads, name, threads, species, database, Debug) if not code_success: