From 913c93c3ee268b1dc27ef112413e66886df842e4 Mon Sep 17 00:00:00 2001 From: Coppini Date: Tue, 7 Dec 2021 17:41:54 -0300 Subject: [PATCH 1/8] feat: print abundance of the discarded cluster with the most reads --- modules/consensus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/consensus.py b/modules/consensus.py index 0e56bcf..b3ad08e 100644 --- a/modules/consensus.py +++ b/modules/consensus.py @@ -271,6 +271,6 @@ def form_draft_consensus(clusters, representatives, sorted_reads_fastq_file, wor print(f"{singletons} singletons were discarded") print( f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " - f"a total of {sum(discarded_clusters)} reads were discarded" + f"a total of {sum(discarded_clusters)} reads were discarded. Highest abundance among them: {max(discarded_clusters)} reads." ) return centers From d3e68c07f0cdde21fb84cf79ffd06c534c67ae2a Mon Sep 17 00:00:00 2001 From: Coppini Date: Wed, 22 Dec 2021 20:06:17 -0300 Subject: [PATCH 2/8] feat: adds Dockerfile with working NGSpeciesID --- Dockerfile | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8e641ac --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM quay.io/biocontainers/htslib:1.14--h9093b5e_0 AS htslib +FROM quay.io/biocontainers/spoa:4.0.7--h9a82719_1 AS spoa +FROM quay.io/biocontainers/racon:1.4.20--h9a82719_1 AS racon +FROM quay.io/biocontainers/minimap2:2.23--h5bf99c6_0 AS minimap2 +FROM quay.io/biocontainers/samtools:1.14--hb421002_0 AS samtools +FROM quay.io/biocontainers/bcftools:1.14--h88f3f91_0 AS bcftools + +FROM python:3.6 + +RUN apt-get update \ + && apt-get install -y \ + libopenblas-dev \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN pip install medaka NGSpeciesID + +# Copy libraries +COPY --from=htslib /usr/local/lib/libhts.so.3 /usr/local/lib/libtinfow.so.6 /usr/local/lib/ +COPY --from=bcftools /usr/local/lib/libgsl.so.25 /usr/local/lib/libcblas.so.3 /usr/local/lib/ + +# Copy binaries +COPY --from=htslib /usr/local/bin/bgzip /usr/local/bin/htsfile /usr/local/bin/tabix /usr/local/bin/ +COPY --from=spoa /usr/local/bin/spoa /usr/local/bin/ +COPY --from=racon /usr/local/bin/racon /usr/local/bin/ +COPY --from=minimap2 /usr/local/bin/minimap2 /usr/local/bin/ +COPY --from=samtools /usr/local/bin/samtools /usr/local/bin/ +COPY --from=bcftools /usr/local/bin/bcftools /usr/local/bin/ + +ENTRYPOINT [ "NGSpeciesID" ] \ No newline at end of file From 92d7b50371933fa7cc111a7c0b178244c8d1b119 Mon Sep 17 00:00:00 2001 From: Coppini Date: Wed, 22 Dec 2021 20:28:01 -0300 Subject: [PATCH 3/8] feat: copies directly from quay.io/biocontainers image --- Dockerfile | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e641ac..8272911 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,22 @@ -FROM quay.io/biocontainers/htslib:1.14--h9093b5e_0 AS htslib -FROM quay.io/biocontainers/spoa:4.0.7--h9a82719_1 AS spoa -FROM quay.io/biocontainers/racon:1.4.20--h9a82719_1 AS racon -FROM quay.io/biocontainers/minimap2:2.23--h5bf99c6_0 AS minimap2 -FROM quay.io/biocontainers/samtools:1.14--hb421002_0 AS samtools -FROM quay.io/biocontainers/bcftools:1.14--h88f3f91_0 AS bcftools - FROM python:3.6 RUN apt-get update \ && apt-get install -y \ - libopenblas-dev \ + libopenblas-dev=0.3.13+ds-3 \ && apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip install medaka NGSpeciesID - # Copy libraries -COPY --from=htslib /usr/local/lib/libhts.so.3 /usr/local/lib/libtinfow.so.6 /usr/local/lib/ -COPY --from=bcftools /usr/local/lib/libgsl.so.25 /usr/local/lib/libcblas.so.3 /usr/local/lib/ +COPY --from=quay.io/biocontainers/htslib:1.14--h9093b5e_0 /usr/local/lib/libhts.so.3 /usr/local/lib/libtinfow.so.6 /usr/local/lib/ +COPY --from=quay.io/biocontainers/bcftools:1.14--h88f3f91_0 /usr/local/lib/libgsl.so.25 /usr/local/lib/libcblas.so.3 /usr/local/lib/ # Copy binaries -COPY --from=htslib /usr/local/bin/bgzip /usr/local/bin/htsfile /usr/local/bin/tabix /usr/local/bin/ -COPY --from=spoa /usr/local/bin/spoa /usr/local/bin/ -COPY --from=racon /usr/local/bin/racon /usr/local/bin/ -COPY --from=minimap2 /usr/local/bin/minimap2 /usr/local/bin/ -COPY --from=samtools /usr/local/bin/samtools /usr/local/bin/ -COPY --from=bcftools /usr/local/bin/bcftools /usr/local/bin/ +COPY --from=quay.io/biocontainers/htslib:1.14--h9093b5e_0 /usr/local/bin/bgzip /usr/local/bin/htsfile /usr/local/bin/tabix /usr/local/bin/ +COPY --from=quay.io/biocontainers/spoa:4.0.7--h9a82719_1 /usr/local/bin/spoa /usr/local/bin/ +COPY --from=quay.io/biocontainers/racon:1.4.20--h9a82719_1 /usr/local/bin/racon /usr/local/bin/ +COPY --from=quay.io/biocontainers/minimap2:2.23--h5bf99c6_0 /usr/local/bin/minimap2 /usr/local/bin/ +COPY --from=quay.io/biocontainers/samtools:1.14--hb421002_0 /usr/local/bin/samtools /usr/local/bin/ +COPY --from=quay.io/biocontainers/bcftools:1.14--h88f3f91_0 /usr/local/bin/bcftools /usr/local/bin/ + +RUN pip install medaka==1.5.0 NGSpeciesID ENTRYPOINT [ "NGSpeciesID" ] \ No newline at end of file From fcef49ebe19e432af48fc2f88e8e7c9f2932f7f8 Mon Sep 17 00:00:00 2001 From: Coppini Date: Wed, 22 Dec 2021 20:37:07 -0300 Subject: [PATCH 4/8] style: EOF newline --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8272911..7251aa4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,4 +19,4 @@ COPY --from=quay.io/biocontainers/bcftools:1.14--h88f3f91_0 /usr/local/bin/bcfto RUN pip install medaka==1.5.0 NGSpeciesID -ENTRYPOINT [ "NGSpeciesID" ] \ No newline at end of file +ENTRYPOINT [ "NGSpeciesID" ] From 3d827bad6fd7b6abb32043e4cf219b15b16c40de Mon Sep 17 00:00:00 2001 From: Coppini Date: Thu, 23 Dec 2021 10:56:00 -0300 Subject: [PATCH 5/8] fix: does not print if no clusters were discarded --- modules/consensus.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/consensus.py b/modules/consensus.py index b3ad08e..b3d9bdc 100644 --- a/modules/consensus.py +++ b/modules/consensus.py @@ -269,8 +269,10 @@ def form_draft_consensus(clusters, representatives, sorted_reads_fastq_file, wor elif nr_reads_in_cluster > 1: discarded_clusters.append(nr_reads_in_cluster) print(f"{singletons} singletons were discarded") - print( - f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " - f"a total of {sum(discarded_clusters)} reads were discarded. Highest abundance among them: {max(discarded_clusters)} reads." - ) + if discarded_clusters: + print( + f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " + f"a total of {sum(discarded_clusters)} reads were discarded. " + f"Highest abundance among them: {max(discarded_clusters)} reads." + ) return centers From d2f4bc7a786ebf65d9c338561b5d0f5161fa168c Mon Sep 17 00:00:00 2001 From: Coppini Date: Thu, 23 Dec 2021 10:57:10 -0300 Subject: [PATCH 6/8] fix: print 0 if no clusters were discarded --- modules/consensus.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/consensus.py b/modules/consensus.py index b3d9bdc..256c8c2 100644 --- a/modules/consensus.py +++ b/modules/consensus.py @@ -269,10 +269,9 @@ def form_draft_consensus(clusters, representatives, sorted_reads_fastq_file, wor elif nr_reads_in_cluster > 1: discarded_clusters.append(nr_reads_in_cluster) print(f"{singletons} singletons were discarded") - if discarded_clusters: - print( - f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " - f"a total of {sum(discarded_clusters)} reads were discarded. " - f"Highest abundance among them: {max(discarded_clusters)} reads." - ) + print( + f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " + f"a total of {sum(discarded_clusters)} reads were discarded. " + f"Highest abundance among them: {max(discarded_clusters + [0])} reads." + ) return centers From 7e2c1530f9b8ecd0a8c895355b1a517f1a87ce57 Mon Sep 17 00:00:00 2001 From: Coppini Date: Thu, 23 Dec 2021 17:44:11 -0300 Subject: [PATCH 7/8] fix: min(list) fails when list is empty --- modules/cluster.py | 2 +- modules/consensus.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cluster.py b/modules/cluster.py index 14feb19..918389d 100644 --- a/modules/cluster.py +++ b/modules/cluster.py @@ -221,7 +221,7 @@ def reads_to_clusters(clusters, representatives, sorted_reads, p_emp_probs, mini ## For multiprocessing only prev_b_indices = [ prev_batch_index for (read_cl_id, prev_batch_index, acc, seq, qual, score) in sorted_reads ] - lowest_batch_index = max(1, min(prev_b_indices)) + lowest_batch_index = max(1, min(prev_b_indices or [1])) skip_count = prev_b_indices.count(lowest_batch_index) print("Saved: {0} iterations.".format(skip_count) ) ################################### diff --git a/modules/consensus.py b/modules/consensus.py index 256c8c2..4647aa0 100644 --- a/modules/consensus.py +++ b/modules/consensus.py @@ -272,6 +272,6 @@ def form_draft_consensus(clusters, representatives, sorted_reads_fastq_file, wor print( f"{len(discarded_clusters)} clusters were discarded due to not passing the abundance_cutoff: " f"a total of {sum(discarded_clusters)} reads were discarded. " - f"Highest abundance among them: {max(discarded_clusters + [0])} reads." + f"Highest abundance among them: {max(discarded_clusters or [0])} reads." ) return centers From 995ecd904a70b019e6e6b9402002d0c11013b166 Mon Sep 17 00:00:00 2001 From: Coppini Date: Thu, 23 Dec 2021 18:32:51 -0300 Subject: [PATCH 8/8] fix: spelling --- modules/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cluster.py b/modules/cluster.py index 918389d..9075d1e 100644 --- a/modules/cluster.py +++ b/modules/cluster.py @@ -369,7 +369,7 @@ def reads_to_clusters(clusters, representatives, sorted_reads, p_emp_probs, mini print("Total number of reads iterated through:{0}".format(len(sorted_reads))) print("Passed mapping criteria:{0}".format(mapped_passed_criteria)) print("Passed alignment criteria in this process:{0}".format(aln_passed_criteria)) - print("Total calls to alignment mudule in this process:{0}".format(aln_called)) + print("Total calls to alignment module in this process:{0}".format(aln_called)) return { new_batch_index : (clusters, representatives, minimizer_database, new_batch_index)}