Skip to content

Commit

Permalink
0.40-release
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Jan 19, 2021
1 parent 1218db7 commit df5069d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 6 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dadaist2 [options] -i INPUT_DIR -o OUTPUT_DIR
Other options:
* `-1`, `--for-tag` STRING, string identifying a file as being _forward_ (default: \_R1)
* `-2`, `--rev-tag` STRING, string identifying a file as being _reverse_ (default: \_R2)
* `-m`, `--metadata` FILE, metadata file in TSV format with first column as sample ID. If not supplied will be autogenerated using _makeSampleSheet_.
* `-d`, `--database` FILE, database in gzipped FASTA format for taxonomy assignment (skipped if not provided)
* `-q`, `--min-qual` FLOAT, minimum average quality at a position to set truncation start in DADA2 (default: 28)
* `-r`, `--save-rds` will save a R data structure of the feature table
Expand All @@ -35,6 +36,8 @@ Other options:

A directory containing the FASTQ files, that are usually gzipped. Paired-end are expected, but this will change in a future release. See the `data` directory for an example.

An optional _metadata file_ can be used for secondary analyses, if not supplied will be autogenerated.

## Output

The output directory will contain:
Expand Down
49 changes: 43 additions & 6 deletions bin/dadaist2
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ my $opt_threads = 2;
my $opt_tax_db = "skip";
my $opt_save_rds = 0;
my $save_rds = "no";
my $opt_metadata_separator = "\t";

# FASTP Preprocess
# To be autocalculated:
Expand Down Expand Up @@ -151,6 +152,19 @@ if ($opt_tax_db ne 'skip') {
}
}

# Metadata?
my $meta_samples;
if ($opt_metadata) {
$meta_samples = check_metadata($opt_metadata);
copy($opt_metadata,
File::Spec->catfile($opt_output_directory,'metadata.tsv')) || die " ERROR:\n Unable to copy metadù file to $opt_output_directory.\n";

} else {
$opt_metadata = File::Spec->catfile($opt_output_directory, 'metadata.tsv');
my $meta_cmd = qq($RealBin/makeSampleSheet -i "$opt_input_directory" -o "$opt_metadata");
$S->run($meta_cmd, {candie => 1});
$meta_samples = check_metadata($opt_metadata);
}
$temp_dir = prepare_temporary_directories($temp_dir);

$S->verbose("Threads: $opt_threads");
Expand Down Expand Up @@ -434,6 +448,7 @@ if ( $msa_exec->{'exit'} != 0 or $tree_exec->{'exit'} != 0) {
}


# Delete, or not, temp dir
if ($opt_debug) {
$S->verbose("Temporary directory _not_ deleted: $temp_dir");
} else {
Expand All @@ -442,16 +457,13 @@ if ($opt_debug) {

}

my $output_message = "DADA2 finished, output files saved:\n";
# Print relevant output files produced
my $output_message = "Dadaist finished, output files saved:\n";
for my $f (keys %output_files) {
$output_message .= " * $f: " . $output_files{$f} . "\n";
}
$S->verbose($output_message);

#copy(File::Spec->catfile($dada2_temp, 'stats.tsv'),
# File::Spec->catfile($opt_output_directory, 'dada2_stats.tsv')) || die " ERROR:\n Unable to copy stats.tsv file from $dada2_temp to $opt_output_directory.\n";

#dada2.rds dada2.tsv stats.tsv


sub version {
Expand All @@ -462,7 +474,9 @@ sub version {
sub usage {
say STDERR<<END;
USAGE:
dadaist -i INPUT_DIR -o OUTPUT_DIR [-t TEMP_DIR]
dadaist2 -i INPUT_DIR -o OUTPUT_DIR [-t TEMP_DIR]
dadaist2 --help for full manual
END

if ($_[0]) {
Expand All @@ -471,6 +485,24 @@ END
}
}

sub check_metadata {
my $file = shift @_;
my $I;
if (not open ($I, '<', $file)) {
$S->verbose("Unable to load metadata from $file");
exit 1;
}
my $count_lines = 0;
my %samples = ();
while (my $line = readline($I)) {
$count_lines++;
next if ($line =~/^#/);
my @fields = split /$opt_metadata_separator/, $line;
$samples{ $fields[0] }++;
}
return \%samples;
}

sub get_qualified_positions {
# Scan a list of qualityes and return the boundaries of qualified quality
my $len = @_;
Expand Down Expand Up @@ -624,6 +656,11 @@ Directory containing the paired end files in FASTQ format, gzipped or not.
Output directory (will be created).
=item I<-m>, I<--metadata> FILE
Metadata file in TSV format, first column must match sample IDs. If not supplied
a template will be autogenerated using C<makeSampleSheet>.
=item I<-d>, I<--database> DATABASE
Reference database in gzipped FASTA format, specify 'skip' not to assign
Expand Down

0 comments on commit df5069d

Please sign in to comment.