diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7251aa4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.6 + +RUN apt-get update \ + && apt-get install -y \ + libopenblas-dev=0.3.13+ds-3 \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Copy libraries +COPY --from=quay.io/biocontainers/htslib:1.14--h9093b5e_0 /usr/local/lib/libhts.so.3 /usr/local/lib/libtinfow.so.6 /usr/local/lib/ +COPY --from=quay.io/biocontainers/bcftools:1.14--h88f3f91_0 /usr/local/lib/libgsl.so.25 /usr/local/lib/libcblas.so.3 /usr/local/lib/ + +# Copy binaries +COPY --from=quay.io/biocontainers/htslib:1.14--h9093b5e_0 /usr/local/bin/bgzip /usr/local/bin/htsfile /usr/local/bin/tabix /usr/local/bin/ +COPY --from=quay.io/biocontainers/spoa:4.0.7--h9a82719_1 /usr/local/bin/spoa /usr/local/bin/ +COPY --from=quay.io/biocontainers/racon:1.4.20--h9a82719_1 /usr/local/bin/racon /usr/local/bin/ +COPY --from=quay.io/biocontainers/minimap2:2.23--h5bf99c6_0 /usr/local/bin/minimap2 /usr/local/bin/ +COPY --from=quay.io/biocontainers/samtools:1.14--hb421002_0 /usr/local/bin/samtools /usr/local/bin/ +COPY --from=quay.io/biocontainers/bcftools:1.14--h88f3f91_0 /usr/local/bin/bcftools /usr/local/bin/ + +RUN pip install medaka==1.5.0 NGSpeciesID + +ENTRYPOINT [ "NGSpeciesID" ] diff --git a/modules/cluster.py b/modules/cluster.py index 14feb19..9075d1e 100644 --- a/modules/cluster.py +++ b/modules/cluster.py @@ -221,7 +221,7 @@ def reads_to_clusters(clusters, representatives, sorted_reads, p_emp_probs, mini ## For multiprocessing only prev_b_indices = [ prev_batch_index for (read_cl_id, prev_batch_index, acc, seq, qual, score) in sorted_reads ] - lowest_batch_index = max(1, min(prev_b_indices)) + lowest_batch_index = max(1, min(prev_b_indices or [1])) skip_count = prev_b_indices.count(lowest_batch_index) print("Saved: {0} iterations.".format(skip_count) ) ################################### @@ -369,7 +369,7 @@ def reads_to_clusters(clusters, representatives, sorted_reads, p_emp_probs, mini print("Total number of reads iterated through:{0}".format(len(sorted_reads))) print("Passed mapping criteria:{0}".format(mapped_passed_criteria)) print("Passed alignment criteria in this process:{0}".format(aln_passed_criteria)) - print("Total calls to alignment mudule in this process:{0}".format(aln_called)) + print("Total calls to alignment module in this process:{0}".format(aln_called)) return { new_batch_index : (clusters, representatives, minimizer_database, new_batch_index)} diff --git a/modules/consensus.py b/modules/consensus.py index 0e56bcf..4647aa0 100644 --- a/modules/consensus.py +++ b/modules/consensus.py @@ -271,6 +271,7 @@ def form_draft_consensus(clusters, representatives, sorted_reads_fastq_file, wor print(f"{singletons} singletons were discarded") print( f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " - f"a total of {sum(discarded_clusters)} reads were discarded" + f"a total of {sum(discarded_clusters)} reads were discarded. " + f"Highest abundance among them: {max(discarded_clusters or [0])} reads." ) return centers