Skip to content

Commit

Permalink
discarding unncessary lines
Browse files Browse the repository at this point in the history
  • Loading branch information
Jose Sanchez committed Oct 16, 2024
1 parent 1153971 commit dedd654
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ HCGB
future
networkx
numpy
gitdir
gitdir
6 changes: 0 additions & 6 deletions XICRA_pip/XICRA/modules/citation.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#usr/bin/env python

## useful imports
import time
import io
import os

## import functions
from HCGB import functions

################
def run(options):
Expand Down
4 changes: 0 additions & 4 deletions XICRA_pip/XICRA/modules/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
##########################################################

## useful imports
import time
import io
import os
import sys
from termcolor import colored
from distutils.version import LooseVersion
Expand All @@ -21,7 +18,6 @@
################
def run_config(options):
## init time
start_time_total = time.time()

## debugging messages
global Debug
Expand Down
49 changes: 20 additions & 29 deletions XICRA_pip/XICRA/modules/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,15 @@
import os
import sys
import re
import time
from io import open
import shutil
import concurrent.futures
import pandas as pd
from termcolor import colored

## import my modules
from HCGB import sampleParser
from HCGB import functions
import HCGB.functions.main_functions as HCGB_main
import HCGB.functions.files_functions as HCGB_files
import HCGB.functions.aesthetics_functions as HCGB_aes
import HCGB.functions.files_functions as HCGB_files
import HCGB.functions.main_functions as HCGB_main
import HCGB.functions.time_functions as HCGB_time

from XICRA.config import set_config
from XICRA.modules import help_XICRA
from XICRA.scripts import generate_DE
from XICRA.scripts import MINTMap_caller

##############################################
def run_db(options):
Expand All @@ -49,10 +40,10 @@ def miRNA_db(options):
Debug = options.debug

options.miRNA_db = os.path.join(options.database, "miRNA_db")
functions.files_functions.create_folder(options.miRNA_db)
HCGB_files.create_folder(options.miRNA_db)

## First check if already provided files
list_files = functions.main_functions.get_fullpath_list(options.miRNA_db, options.debug)
list_files = HCGB_main.get_fullpath_list(options.miRNA_db, options.debug)

## Check for files from miRBase:
miRBase_files = ["hsa.gff3", "hairpin.fa", "mature.fa", "miRNA.str"]
Expand All @@ -74,8 +65,8 @@ def miRNA_db(options):
download_data=True
miRBase_files_dict[file_req] = ""
else:
file_retrieved = functions.main_functions.retrieve_matching_files(options.miRNA_db, file_req, options.debug, starts=False)
if functions.files_functions.is_non_zero_file(file_retrieved[0]):
file_retrieved = HCGB_main.retrieve_matching_files(options.miRNA_db, file_req, options.debug, starts=False)
if HCGB_main.is_non_zero_file(file_retrieved[0]):
miRBase_files_dict[file_req] = file_retrieved[0]
else:
miRBase_files_dict[file_req] = ""
Expand All @@ -98,7 +89,7 @@ def miRNA_db(options):
print (colored("\t** Download it form miRBase", 'green'))
file_name = options.species + ".gff3"
ftp_site1 = "https://www.mirbase.org/ftp/CURRENT/genomes/"
options.miRNA_gff = functions.main_functions.urllib_request(options.miRNA_db, ftp_site1, file_name, Debug)
options.miRNA_gff = HCGB_main.urllib_request(options.miRNA_db, ftp_site1, file_name, Debug)

else:
if (options.miRNA_gff):
Expand All @@ -116,7 +107,7 @@ def miRNA_db(options):
if Debug:
print (colored("\t** ATTENTION: No hairpin fasta file provided", 'yellow'))
print (colored("\t** Download it form miRBase", 'green'))
options.hairpinFasta = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "hairpin.fa.gz", Debug)
options.hairpinFasta = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "hairpin.fa.gz", Debug)

else:
if (options.hairpinFasta):
Expand All @@ -135,7 +126,7 @@ def miRNA_db(options):
if Debug:
print (colored("\t** ATTENTION: No mature miRNA fasta file provided", 'yellow'))
print (colored("\t** Download it form miRBase", 'green'))
options.matureFasta = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "mature.fa.gz", Debug)
options.matureFasta = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "mature.fa.gz", Debug)

else:
if (options.matureFasta):
Expand All @@ -153,7 +144,7 @@ def miRNA_db(options):
if Debug:
print (colored("\t** ATTENTION: No miRBase_str file provided", 'yellow'))
print (colored("\t** Download it form miRBase", 'green'))
options.miRBase_str = functions.main_functions.urllib_request(options.miRNA_db, ftp_site, "miRNA.str.gz", Debug)
options.miRBase_str = HCGB_main.urllib_request(options.miRNA_db, ftp_site, "miRNA.str.gz", Debug)
## extract

else:
Expand All @@ -178,7 +169,7 @@ def tRNA_db(database, debug):
# We can try to check if it correctly generated...
## TODO
print ("+ Create folder to store several databases: ", database)
functions.files_functions.create_folder(database)
HCGB_files.create_folder(database)

tRNA_db = os.path.join(database, "tRNA_db")
if os.path.isdir(tRNA_db):
Expand All @@ -188,42 +179,42 @@ def tRNA_db(database, debug):

## If missing, download them, if all files ok, return!
print ("+ Create folder to store tRNA information: ", tRNA_db)
functions.files_functions.create_folder(tRNA_db)
HCGB_files.create_folder(tRNA_db)



## folder and database generated
return (tRNA_db)

##############################################
def check_tRNA_db(path2test):
def check_tRNA_db(path2test, debug=False):
## Check for
# LookupTable.tRFs.MINTmap_v2.txt
# OtherAnnotations.MINTmap_v2.txt
# tRNAspace.Spliced.Sequences.With49ntFlank.MINTmap_v2.fa
# tables.cfg

## First check if already provided files
list_files = functions.main_functions.get_fullpath_list(path2test, debug)
list_files = HCGB_main.get_fullpath_list(path2test, debug)

print()
print(list_files)
return True

##############################################
def piRNA_db(database, piRNA_db, debug):


print ("+ Create folder to store several databases: ", database)
functions.files_functions.create_folder(database)
HCGB_files.create_folder(database)

if not piRNA_db:
piRNA_db = os.path.join(database, "piRNA_db")

print ("+ Create folder to store piRNA information: ", piRNA_db, debug)
functions.files_functions.create_folder(piRNA_db)
HCGB_files.create_folder(piRNA_db)

## First check if already provided files
list_files = functions.main_functions.get_fullpath_list(piRNA_db, debug)
list_files = HCGB_main.get_fullpath_list(piRNA_db, debug)

## Check for files:

Expand Down Expand Up @@ -354,7 +345,7 @@ def piRNA_info(database_folder, species_name="hsa", Debug=False):

## check if previously trimmed and succeeded
if os.path.isfile(ncRNA_timestamp):
stamp = functions.time_functions.read_time_stamp(ncRNA_timestamp)
stamp = HCGB_time.read_time_stamp(ncRNA_timestamp)
print (colored("\tA previous command generated results on: %s [%s]" %(stamp, 'merged ncRNA'), 'yellow'))

else:
Expand Down
43 changes: 22 additions & 21 deletions XICRA_pip/XICRA/modules/help_XICRA.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
Help messages for different scripts, modules
"""
from termcolor import colored
from HCGB import functions
import HCGB.functions.aesthetics_functions as HCGB_aes


###############
def help_fastq_format():
"""
Explanation of fastq format details.
"""

functions.aesthetics_functions.boxymcboxface("Name format for samples")
HCGB_aes.boxymcboxface("Name format for samples")


print ("Format for fastq files can be:\n")
Expand All @@ -29,23 +30,23 @@ def help_fastq_format():
print ("\nThe input file names should be structured considering the following aspects:")
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Length limitation")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("There is a limitation for the sample ID ('name') of 25 characters.")
print (colored("** XICRA provides an option to rename samples if necessary: module prep option --rename **", 'yellow'))
print ("\n")

functions.aesthetics_functions.print_sepLine("*",15,"red")
HCGB_aes.print_sepLine("*",15,"red")
print ("Extensions:")
functions.aesthetics_functions.print_sepLine("*",15,"red")
HCGB_aes.print_sepLine("*",15,"red")
print("The suported extensions are:\n")
print ("- name_L00x_R2.fastq\tname_L00x_R2.fq\n- name_L00x_R2.fastq.gz\tname_L00x_R2.fq.gz")
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Single-end files")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print("It is possible to provide NGS single-end files although some steps of the process could not be accomplished")
print("using single-end files.\n")
print ("- name.fastq.gz")
Expand All @@ -54,27 +55,27 @@ def help_fastq_format():
print (colored('** Use option --single-end in the different XICRA modules. **', 'yellow'))
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Paired-end files")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Paired-end files are full supported. The format for these files are:\n")
print ("- name_1.fastq.gz, name_2.fastq.gz")
print ("- name_R1.fastq.gz, name_R2.fastq.gz")
print (colored('** No parameter is needed in to specify this kind of files. **', 'yellow'))
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Lane information")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print("Files might contain lane information (L00x and/or 00x). XICRA")
print("supports these names as long as follow these examples:")
print("- name_L00x_R1.fastq.gz, name_L00x_R2.fastq.gz")
print("- name_L00x_1.fastq.gz, name_L00x_2.fastq.gz")
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Name extensions")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print("It can also be the case that the reads of a sample are divided in different files.")
print("In those cases, the files should contain a name final extension: ")
print("- name1_L001_R1_001.fastq.gz, name1_L001_R2_001.fastq.gz")
Expand All @@ -83,26 +84,26 @@ def help_fastq_format():
print("- name1_L002_R1_002.fastq.gz, name1_L002_R2_002.fastq.gz")
print ("\n")

functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print ("Extra information")
functions.aesthetics_functions.print_sepLine("*",20,"red")
HCGB_aes.print_sepLine("*",20,"red")
print("In some cases, files might contain other extra information. In the following example,")
print("XYZ is the extra information:")
print("- name1_L001_XYZ_R1_001.fastq.gz, name1_L001_XYZ_R2_001.fastq.gz")
print("- name1_L001_XYZ_R1_002.fastq.gz, name1_L001_XYZ_R2_002.fastq.gz")
print ("\n")

functions.aesthetics_functions.boxymcboxface("Sample identification")
HCGB_aes.boxymcboxface("Sample identification")

print ("XICRA will store the names of all the input files. After that, it will identify the samples.")
print ("It can be the case that more than one file belong to the same sample. In order to pass this information")
print ("to XICRA, a combination of the following parameters may be needed depending on the characteristics of the")
print ("input file names:")
print ("\n")

functions.aesthetics_functions.print_sepLine("*",55,"red")
HCGB_aes.print_sepLine("*",55,"red")
print ("Option --include_lane:")
functions.aesthetics_functions.print_sepLine("*",55,"red")
HCGB_aes.print_sepLine("*",55,"red")
print ("If you want to include lane tags (L00X, 00X) into each each sample name (differentiate samples considering the lane):")
print (colored('** Use option --include_lane within each module and the lane tag will also be used to identify samples. **\n', 'yellow'))
print("However, if you want to consider as a single sample the different lanes, you need to merge the")
Expand All @@ -127,9 +128,9 @@ def help_fastq_format():
print("\t identify one sample, merging all the corresponding files:")
print("\t - Sample 1: sample1_R1, sample1_R2\n")

functions.aesthetics_functions.print_sepLine("*",55,"red")
HCGB_aes.print_sepLine("*",55,"red")
print ("Option --include_all:")
functions.aesthetics_functions.print_sepLine("*",55,"red")
HCGB_aes.print_sepLine("*",55,"red")
print("In some cases, files might contain other extra information and it is necessary to use all")
print("file name to identify samples:")
print (colored('** If that is the case use --include_all in al modules. **\n', 'yellow'))
Expand Down
Loading

0 comments on commit dedd654

Please sign in to comment.