From 591bc5f740eb4912ee1675433cb9e059b9b5b467 Mon Sep 17 00:00:00 2001 From: "Jose F. Sanchez" Date: Tue, 20 Sep 2016 13:16:55 +0200 Subject: [PATCH] Fixed a bug when using -NPG option in the Marker phase --- files/installerRock-builder/DOMINO_macOSX.xml | 2 +- src/perl/DM_Assembly_v1.0.0.pl | 29 ++++++++++++------- src/perl/DM_Clean_v1.0.0.pl | 24 +++++++++------ src/perl/DM_MarkerScan_v1.0.0.pl | 17 +++++++---- src/perl/DOMINO.pm | 7 +++-- 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/files/installerRock-builder/DOMINO_macOSX.xml b/files/installerRock-builder/DOMINO_macOSX.xml index b0dbd3a..6fb5b21 100644 --- a/files/installerRock-builder/DOMINO_macOSX.xml +++ b/files/installerRock-builder/DOMINO_macOSX.xml @@ -1,7 +1,7 @@ DOMINO DOMINO - 1.0 + 1.0.0 /Users/jfsh/GIT_REPOS/domino_github/README.md /Users/jfsh/GIT_REPOS/domino_github/LICENSE /Users/jfsh/GIT_REPOS/developers/DOMINO_installer_folder/DOMINO_icon-small.png diff --git a/src/perl/DM_Assembly_v1.0.0.pl b/src/perl/DM_Assembly_v1.0.0.pl index 6ea976f..bde7576 100644 --- a/src/perl/DM_Assembly_v1.0.0.pl +++ b/src/perl/DM_Assembly_v1.0.0.pl @@ -610,7 +610,7 @@ =head1 CITATION } else { for (my $i = 0; $i < scalar @file_abs_path_user; $i++) { &check_file($file_abs_path[$i]); - system("ln -s $file_abs_path[$i]"); + &debugger_print("ln -s $file_abs_path[$i]"); system("ln -s $file_abs_path[$i]"); }}} } else { ## Go to DOMINO_clean_data and get the files @@ -634,7 +634,7 @@ =head1 CITATION if ($$files_ref[$i] =~ /.*MIDtag\.fastq/) { next; } if ($$files_ref[$i] =~ /.*id\-.*fastq/) { my $file_path = $clean_folder."/".$$files_ref[$i]; - &check_file($file_path); system("ln -s $file_path"); + &check_file($file_path); &debugger_print("ln -s $file_path"); system("ln -s $file_path"); }} } print "\n"; &time_log(); @@ -685,6 +685,7 @@ =head1 CITATION ## we would split in a reasonable amount of processes to proceed in parallel with the assembly # Get memory + &debugger_print("cat /proc/meminfo > memory_server.txt"); system("cat /proc/meminfo > memory_server.txt"); my $memory_server_file = $assembly_directory."/memory_server.txt"; my $total_available; @@ -767,10 +768,11 @@ =head1 CITATION $spades_path .= "-s ".$domino_files{$keys}{'original'}[0]; } $spades_path .= " -t $noOfProcesses_SPAdes"; - print "Sending command:\n".$spades_path."\n"; unless ($debugger) { $spades_path .= " > /dev/null"; ## discarding SPAdes output } + &debugger_print("Sending command: $spades_path\n"); + print "Sending SPAdes command...\n"; my $system_call = system($spades_path); if ($system_call != 0) { &printError("Something happened when calling SPAdes for assembly reads..."); DOMINO::dieNicely(); @@ -848,14 +850,15 @@ =head1 CITATION my $abs_path_manifest_file = $assembly_directory."/".$MIRA_manifest_file; push (@{ $domino_files{$keys}{'manifest'}}, $abs_path_manifest_file); - - print $abs_path_manifest_file."\n- Calling MIRA now for $keys assembly...\n- It might take a while...\n"; + &debugger_print("MIRA manifest: $abs_path_manifest_file\n"); + print "- Calling MIRA now for $keys assembly...\n- It might take a while...\n"; my $mira_exe = $MIRA_exec." -t $noOfProcesses ".$abs_path_manifest_file; if ($debugger) { $mira_exe .= " 2> $error_log"; ## show on screen or maybe print to a file } else { $mira_exe .= " > /dev/null 2> $error_log"; ## discarding MIRA output } - print $mira_exe."\n\n"; my $system_call = system($mira_exe); + print "- Sending MIRA command for $keys...\n"; &debugger_print("MIRA command: $mira_exe\n\n"); + my $system_call = system($mira_exe); if ($system_call != 0) { &printError("Something happened when calling MIRA for assembly reads..."); DOMINO::dieNicely(); } print "\n"; &time_log(); push(@{ $domino_files{$keys}{'contigs_fasta'}}, $domino_files{$keys}{'DIR'}[0]."/".$keys."_d_results/".$keys."_out.unpadded.fasta"); @@ -932,9 +935,13 @@ =head1 CITATION } ## Use BLAST for clustering sequences - print "\n\n+ Generate a BLAST database for $contigs2cluster...\n"; my $db_generated = DOMINO::makeblastdb($contigs2cluster, $BLAST, $error_log); + print "\n\n+ Generate a BLAST database for $contigs2cluster...\n"; + my ($db_generated, $db_generated_message) = DOMINO::makeblastdb($contigs2cluster, $BLAST, $error_log); + &debugger_print($db_generated_message); my $blast_search = "blast_search.txt"; - print "+ BLAST search now...\n"; my $blastn = DOMINO::blastn($contigs2cluster, $db_generated, $blast_search, $BLAST); + print "+ BLAST search now...\n"; + my ($blastn, $blastn_message) = DOMINO::blastn($contigs2cluster, $db_generated, $blast_search, $BLAST); + &debugger_print($blastn); if ($blastn != 0) { &printError("BLASTN failed...\n"); exit(); } my $contig_length_Ref = DOMINO::readFASTA_hash($contigs2cluster); my $perc_aln_desired = 0.85; my $iden_desired = 85; @@ -1021,8 +1028,8 @@ =head1 CITATION ## Generate directories my $MID_directory = $CAP3_directory."/".$taxa; unless (-d $MID_directory) { mkdir $MID_directory, 0755; } - system("ln -s $contigsMIRA_Fasta_file $MID_directory/"); - system("ln -s $contigsMIRA_qual_file $MID_directory/"); + &debugger_print("ln -s $contigsMIRA_Fasta_file $MID_directory/"); system("ln -s $contigsMIRA_Fasta_file $MID_directory/"); + &debugger_print("ln -s $contigsMIRA_qual_file $MID_directory/"); system("ln -s $contigsMIRA_qual_file $MID_directory/"); push(@{$domino_files{$taxa}{'CAP3_dir'}}, $MID_directory); chdir $MID_directory; @@ -1035,7 +1042,7 @@ =head1 CITATION my @tmp_array = split("/", $contigsMIRA_Fasta_file); my $fasta_name_contigsMIRA = $tmp_array[-1]; my $command_CAP3 = $CAP3_exec." ".$fasta_name_contigsMIRA." -o ".$overlap_CAP3." -p ".$similar_CAP3." 2> $error_log"; - print $command_CAP3."\n"; my $system_CAP3 = system($command_CAP3); + &debugger_print("CAP3 command: $command_CAP3\n"); my $system_CAP3 = system($command_CAP3); if ($system_CAP3 != 0) { &printError("Some error happened when calling CAP3 for assembly reads..."); DOMINO::dieNicely(); } ########################################### diff --git a/src/perl/DM_Clean_v1.0.0.pl b/src/perl/DM_Clean_v1.0.0.pl index 4997d60..5cf9d3c 100644 --- a/src/perl/DM_Clean_v1.0.0.pl +++ b/src/perl/DM_Clean_v1.0.0.pl @@ -1059,7 +1059,7 @@ =head1 CITATION # Setting the minimum cutoff for PHRED quality, default 20 ## Sending command - print "NGS QC Toolkit command: ".$NGS_QC_call."\n"; + &debugger_print("NGS QC Toolkit command: $NGS_QC_call\n"); my $NGSQC_result = system($NGS_QC_call); if ($NGSQC_result != 0) { &printError("Cleaning step failed when calling NGSQC for file $dust_clean_reads..."); exit(); } print "\n"; &time_log(); @@ -1102,18 +1102,22 @@ =head1 CITATION &extracting_fastq($domino_files_threads_QC{$keys}{"NGS_QC"}[$i], $name[0], "YES"); push (@{ $domino_files_threads_QC{$keys}{"FASTA4BLAST"} }, $name[0].".filtered.fasta"); push (@{ $domino_files_threads_QC{$keys}{"QUAL4BLAST"} }, $name[0].".filtered.qual"); - push (@reads_db, DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[$i], $BLAST, $error_log)); + my ($blast_DB, $blast_DB_message) = DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[$i], $BLAST, $error_log); + &debugger_print($blast_DB_message); push (@reads_db, $blast_DB); } } elsif ($file_type eq "454_fastq" || $file_type eq "454_multiple_fastq" || $file_type eq "454_sff" ) { push (@{ $domino_files_threads_QC{$keys}{"FASTA4BLAST"} }, $domino_files_threads_QC{$keys}{"NGS_QC"}[0]); push (@{ $domino_files_threads_QC{$keys}{"QUAL4BLAST"} }, $domino_files_threads_QC{$keys}{"NGS_QC"}[1]); - push (@reads_db, DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[0], $BLAST, $error_log)); + my ($blast_DB, $blast_DB_message) = DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[0], $BLAST, $error_log); + &debugger_print($blast_DB_message); push (@reads_db, $blast_DB); + } else { my @name = split("\.fastq_filtered", $domino_files_threads_QC{$keys}{"NGS_QC"}[0]); &extracting_fastq($domino_files_threads_QC{$keys}{"NGS_QC"}[0], $name[0], "YES"); push (@{ $domino_files_threads_QC{$keys}{"FASTA4BLAST"} }, $name[0].".filtered.fasta"); push (@{ $domino_files_threads_QC{$keys}{"QUAL4BLAST"} }, $name[0].".filtered.qual"); - push (@reads_db, DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[0], $BLAST, $error_log)); + my ($blast_DB, $blast_DB_message) = DOMINO::makeblastdb($domino_files_threads_QC{$keys}{"FASTA4BLAST"}[0], $BLAST, $error_log); + &debugger_print($blast_DB_message); push (@reads_db, $blast_DB); } print "\n"; &time_log(); print "\n"; &debugger_print("domino_files_threads_QC"); &debugger_print("Ref", \%domino_files_threads_QC); @@ -1122,7 +1126,8 @@ =head1 CITATION print "\n"; DOMINO::printHeader(" Database Search: BLAST ","%"); my $out_file = $taxa_dir."/blast_search.txt"; my $db; for (my $j=0; $j < scalar @reads_db; $j++) { $db .= $reads_db[$j]." "; } chop $db; - my $blastn = DOMINO::blastn($merge_fasta_database, $db, $out_file, $BLAST); + my ($blastn, $blastn_message) = DOMINO::blastn($merge_fasta_database, $db, $out_file, $BLAST); + &debugger_print($blastn_message); if ($blastn != 0) { &printError("BLASTN failed...\n"); exit(); } print "\n"; &time_log(); print "\n"; print "+ Parsing the BLAST results for $keys...\n"; @@ -1631,6 +1636,7 @@ sub mothur_sffinfo { ## The '#' is necessary to be in the first place when calling mothur with a given set of commands my $mothur_call = $mothur_path." '#set.dir(output=$directory); sffinfo(sff='".$file."'); trim.seqs(fasta="."$file_name".".fasta, qfile=".$file_name.".qual, oligos=ROCHE.oligos, bdiffs=$bdiffs, processors=$noOfProcesses)'"; + &debugger_print("MOTHUR command (sff info): $mothur_call\n"); my $mothur_result = system($mothur_call); if ($mothur_result != 0) { &printError("MOTHUR failed when trying to proccess the file..."); exit(); } else { print "Done...OK\n\n"; } } @@ -1643,8 +1649,8 @@ sub mothur_trim_fastq { ## The '#' is necessary to be in the first place when calling mothur with a given set of commands my $mothur_call = $mothur_path." '#set.dir(output=".$directory."); trim.seqs(fasta=".$file_name.".fasta, qfile=".$file_name.".qual, oligos=ROCHE.oligos, bdiffs=".$bdiffs.", processors=".$noOfProcesses.")'"; - print "+ Triming the reads according to MID tag\n$mothur_path\n"; - + &debugger_print("MOTHUR command (trim fastq): $mothur_call\n"); + print "+ Triming the reads according to MID tag\n"; my $mothur_result = system($mothur_call); if ($mothur_result != 0) { &printError("MOTHUR failed when trying to proccess the file..."); exit(); } else { print "Done...OK\n\n"; } } @@ -1676,8 +1682,8 @@ sub prinseq_dust { } $prinseq_dust_command .= " -out_good ".$out_good." -out_bad ".$out_bad." -lc_method dust -lc_threshold ".$thr; $prinseq_dust_command .= " -ns_max_n 1 -noniupac 2> $error_log"; - print "PRINSEQ command:\n".$prinseq_dust_command."\n"; - my $dust_result = system ($prinseq_dust_command); + &debugger_print("PRINSEQ command:\n$prinseq_dust_command\n"); + my $dust_result = system($prinseq_dust_command); if ($dust_result != 0) { &printError("DUST cleaning step failed when trying to proccess the file... DOMINO would not stop in this step..."); } return ($out_good, $out_bad); } diff --git a/src/perl/DM_MarkerScan_v1.0.0.pl b/src/perl/DM_MarkerScan_v1.0.0.pl index 0443aa9..c062bfd 100644 --- a/src/perl/DM_MarkerScan_v1.0.0.pl +++ b/src/perl/DM_MarkerScan_v1.0.0.pl @@ -926,7 +926,12 @@ =head1 CITATION ); ## DOMINO would check for the latest mapping in order to find if parameters are the same. my $path_returned = DOMINO::get_earliest("mapping", $folder_abs_path); - if ($path_returned eq 'mapping') { + + &debugger_print("DOMINO::get_earliest subroutine: "); &debugger_print($path_returned); + + if ($path_returned eq 'NO') { + undef $avoid_mapping; + } elsif ($path_returned eq 'mapping') { undef $avoid_mapping; DOMINO::printDetails("+ Generation of new profile of variation would be done as it has been previously done with different parameters or it was incomplete...OK\n", $mapping_parameters, $param_Detail_file_markers); } else { @@ -1284,7 +1289,7 @@ =head1 CITATION my $reference = "reference_".$reference_identifier; print "- Reference: $contigs_fasta...\n"; my $bowtie_index_call = $bowtie_path."bowtie2-build --threads $num_proc_user -f ".$contigs_fasta." ".$reference; - &debugger_print("BOWTIE command: ".$bowtie_index_call."\n"); + &debugger_print("BOWTIE2 command: ".$bowtie_index_call."\n"); my $index_result = system ($bowtie_index_call); if ($index_result != 0) { &printError("Exiting the script. Some error happened when calling bowtie for indexing the file...\n"); DOMINO::dieNicely(); @@ -1347,7 +1352,7 @@ =head1 CITATION } else { $botwie_system .= " -x ".$reference." -q -U ".$mapping_file." -S ".$sam_name." ".$R_group_id.$R_group_name.$threads.$mismatches." --no-unal".$read_gap_open.$ref_gap_open.$mismatch_penalty; }} - print "BOWTIE command: ".$botwie_system."\n"; &debugger_print("BOWTIE command: ".$botwie_system."\n"); + &debugger_print("BOWTIE2 command: ".$botwie_system."\n"); my $system_bowtie_call = system ($botwie_system); if ($system_bowtie_call != 0) { &printError("Exiting the script. Some error happened when calling bowtie for mapping the file $mapping_file...\n"); DOMINO::dieNicely(); @@ -2166,14 +2171,16 @@ =head1 CITATION ## Use BLAST for clustering sequences print "+ Generate a BLAST database...\n"; -my $blast_DB = DOMINO::makeblastdb($all_coordinates_file, $BLAST, $mapping_markers_errors_details); +my ($blast_DB, $blast_DB_message) = DOMINO::makeblastdb($all_coordinates_file, $BLAST, $mapping_markers_errors_details); +&debugger_print($blast_DB_message); if ($blast_DB eq "1") { &printError("Early termination of the DOMINO Marker Scan..."); my $msg= "\n\nPlease note that DOMINO could not find any markers for the parameters provided. Please Re-Run DOMINO using other parameters\n\n\n"; print $msg; DOMINO::printError_log($msg); &finish_time_stamp(); exit(); } my $blast_search = "blast_search.txt"; print "+ BLAST search now...\n"; -DOMINO::blastn($all_coordinates_file, $blast_DB, $blast_search, $BLAST); +my ($blastn, $blastn_message) = DOMINO::blastn($all_coordinates_file, $blast_DB, $blast_search, $BLAST); +&debugger_print($blastn_message); ## Filter BLAST results print "+ Filtering BLAST search now...\n"; diff --git a/src/perl/DOMINO.pm b/src/perl/DOMINO.pm index 679f7be..fd803a2 100755 --- a/src/perl/DOMINO.pm +++ b/src/perl/DOMINO.pm @@ -9,8 +9,9 @@ package DOMINO; sub blastn { my $file = $_[0]; my $db = $_[1]; my $results = $_[2]; my $BLAST = $_[3]; my $filter = $BLAST."blastn -query ".$file." -evalue 1e-10 -db '".$db."' -out $results -outfmt 6"; - print "BLASTN command: $filter\n"; my $blastn = system($filter); - return $blastn; + my $message = "BLASTN command: $filter\n"; + my $blastn = system($filter); + return ($blastn, $message); } sub check_ID_length { @@ -179,7 +180,7 @@ sub makeblastdb { $make_blast_db .= " -out ".$db." -logfile ".$db.".log 2> $error_log"; my $makeblastresult = system($make_blast_db); if ($makeblastresult != 0) { print "Generating the database failed when trying to proccess the file... DOMINO would not stop in this step...\n"; return "1"; } - return $db; + return ($db, $make_blast_db); } sub mothur_remove_seqs {