From dec0a4eb3c1913eba0e4dd9a0aec2a6a46a39c38 Mon Sep 17 00:00:00 2001 From: alexsweeten Date: Tue, 17 Sep 2024 10:59:40 -0400 Subject: [PATCH 1/6] Bugfix axis swap in comparative static plots --- src/moddotplot/moddotplot.py | 14 +++++++++----- src/moddotplot/static_plots.py | 8 ++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py index 505cf56..f37c447 100644 --- a/src/moddotplot/moddotplot.py +++ b/src/moddotplot/moddotplot.py @@ -950,7 +950,7 @@ def main(): smaller_length = len(smaller_seq) larger_seq_name = sequences[i][0] smaller_seq_name = sequences[j][0] - # Houston we have a problem + if larger_length < smaller_length: smaller_seq = sequences[i][1] larger_seq = sequences[j][1] @@ -1000,15 +1000,17 @@ def main(): seq_list[j], False, ) + if args.grid: + grid_vals.append(bed) if not args.no_bed: # Log saving bed file if not args.output_dir: - bedfile_output = sequences[i][0] + ".bed" + bedfile_output = smaller_seq_name + "_" + larger_seq_name + "_COMPARE.bed" else: bedfile_output = os.path.join( args.output_dir, - sequences[i][0] + "_" + sequences[j][0] + ".bed", + smaller_seq_name + "_" + larger_seq_name + "_COMPARE.bed", ) with open(bedfile_output, "w") as bedfile: for row in bed: @@ -1019,8 +1021,8 @@ def main(): create_plots( sdf=[bed], directory=args.output_dir if args.output_dir else ".", - name_x=sequences[i][0], - name_y=sequences[j][0], + name_x=smaller_seq_name, + name_y=larger_seq_name, palette=args.palette, palette_orientation=args.palette_orientation, no_hist=args.no_hist, @@ -1035,6 +1037,8 @@ def main(): axes_labels=args.axes_ticks, ) + '''if args.grid: + print(grid_vals)''' if __name__ == "__main__": main() diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index 43eeaec..ed1879d 100644 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -574,7 +574,7 @@ def create_plots( height=9, dpi=dpi, format="pdf", - filename=f"{plot_filename}.pdf", + filename=f"{plot_filename}_COMPARE.pdf", verbose=False, ) ggsave( @@ -583,11 +583,11 @@ def create_plots( height=9, dpi=dpi, format="png", - filename=f"{plot_filename}.png", + filename=f"{plot_filename}_COMPARE.png", verbose=False, ) if no_hist: - print(f"{plot_filename}.pdf and {plot_filename}.png saved sucessfully. \n") + print(f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n") else: ggsave( histy, @@ -608,7 +608,7 @@ def create_plots( verbose=False, ) print( - f"{plot_filename}.pdf, {plot_filename}.png, {plot_filename}_HIST.pdf and {plot_filename}_HIST.png saved sucessfully. \n" + f"{plot_filename}_COMPARE.pdf, {plot_filename}_COMPARE.png, {plot_filename}_HIST.pdf and {plot_filename}_HIST.png saved sucessfully. \n" ) # Self-identity plots: Output _TRI, _FULL, and _HIST else: From f0180f1abc8e91fe4d2b5f7b9910b331ad3d23bc Mon Sep 17 00:00:00 2001 From: alexsweeten Date: Tue, 17 Sep 2024 11:44:31 -0400 Subject: [PATCH 2/6] bump version --- pyproject.toml | 2 +- src/moddotplot/const.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e6892fb..a003888 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ModDotPlot" -version = "0.8.5" +version = "0.8.6" requires-python = ">= 3.7" dependencies = [ "pysam", diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py index 7c971f2..e5c5fbb 100644 --- a/src/moddotplot/const.py +++ b/src/moddotplot/const.py @@ -1,4 +1,4 @@ -VERSION = "0.8.5" +VERSION = "0.8.6" COLS = [ "#query_name", "query_start", From 07c1d86d087c88c1a66c11c0ef808a0bba6cbf61 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 17 Sep 2024 11:45:22 -0400 Subject: [PATCH 3/6] Update CITATION.cff --- CITATION.cff | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 0724d6a..786cba5 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,14 +1,15 @@ - -@article{Sweeten2024.04.15.589623, - abstract = {Motivation A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy. Results In this work we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mbp genome of Arabidopsis thaliana in under 5 minutes on a laptop. ModDotPlot is implemented in Python with a graphical user interface supporting real-time interactive navigation of entire chromosomes. Availability and Implementation ModDotPlot is available at https://github.com/marbl/ModDotPlot.Competing Interest StatementThe authors have declared no competing interest.}, - author = {Alexander P. Sweeten and Michael C. Schatz and Adam M. Phillippy}, - doi = {10.1101/2024.04.15.589623}, - elocation-id = {2024.04.15.589623}, - eprint = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623.full.pdf}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - title = {ModDotPlot - Rapid and interactive visualization of complex repeats}, - url = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623}, - year = {2024}, - bdsk-url-1 = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623}, - bdsk-url-2 = {https://doi.org/10.1101/2024.04.15.589623}} +@article{10.1093/bioinformatics/btae493, + author = {Sweeten, Alexander P and Schatz, Michael C and Phillippy, Adam M}, + title = "{ModDotPlot—rapid and interactive visualization of tandem repeats}", + journal = {Bioinformatics}, + volume = {40}, + number = {8}, + pages = {btae493}, + year = {2024}, + month = {08}, + abstract = "{A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy.In this work, we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mb genome of Arabidopsis thaliana in under 5 min on a laptop. ModDotPlot is bundled with a graphical user interface supporting real-time interactive navigation of entire chromosomes.ModDotPlot is available at https://github.com/marbl/ModDotPlot.}", + issn = {1367-4811}, + doi = {10.1093/bioinformatics/btae493}, + url = {https://doi.org/10.1093/bioinformatics/btae493}, + eprint = {https://academic.oup.com/bioinformatics/article-pdf/40/8/btae493/58809824/btae493.pdf}, +} From 55fc84fd8b9d3647b001e511e399789918c939c2 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 17 Sep 2024 11:46:27 -0400 Subject: [PATCH 4/6] Update CITATION.cff --- CITATION.cff | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 786cba5..169d580 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,15 +1,11 @@ -@article{10.1093/bioinformatics/btae493, - author = {Sweeten, Alexander P and Schatz, Michael C and Phillippy, Adam M}, - title = "{ModDotPlot—rapid and interactive visualization of tandem repeats}", - journal = {Bioinformatics}, - volume = {40}, - number = {8}, - pages = {btae493}, - year = {2024}, - month = {08}, - abstract = "{A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy.In this work, we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mb genome of Arabidopsis thaliana in under 5 min on a laptop. ModDotPlot is bundled with a graphical user interface supporting real-time interactive navigation of entire chromosomes.ModDotPlot is available at https://github.com/marbl/ModDotPlot.}", - issn = {1367-4811}, - doi = {10.1093/bioinformatics/btae493}, - url = {https://doi.org/10.1093/bioinformatics/btae493}, - eprint = {https://academic.oup.com/bioinformatics/article-pdf/40/8/btae493/58809824/btae493.pdf}, +@article{Lisa_My_awesome_research_2021, + author = {Lisa, Mona and Bot, Hew}, + doi = {10.0000/00000}, + journal = {Journal Title}, + month = {9}, + number = {1}, + pages = {1--10}, + title = {{My awesome research software}}, + volume = {1}, + year = {2021} } From e6e1234846c36ec05d40ac2a9a00ec52502cd442 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 17 Sep 2024 11:47:21 -0400 Subject: [PATCH 5/6] Update CITATION.cff --- CITATION.cff | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 169d580..786cba5 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,11 +1,15 @@ -@article{Lisa_My_awesome_research_2021, - author = {Lisa, Mona and Bot, Hew}, - doi = {10.0000/00000}, - journal = {Journal Title}, - month = {9}, - number = {1}, - pages = {1--10}, - title = {{My awesome research software}}, - volume = {1}, - year = {2021} +@article{10.1093/bioinformatics/btae493, + author = {Sweeten, Alexander P and Schatz, Michael C and Phillippy, Adam M}, + title = "{ModDotPlot—rapid and interactive visualization of tandem repeats}", + journal = {Bioinformatics}, + volume = {40}, + number = {8}, + pages = {btae493}, + year = {2024}, + month = {08}, + abstract = "{A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy.In this work, we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mb genome of Arabidopsis thaliana in under 5 min on a laptop. ModDotPlot is bundled with a graphical user interface supporting real-time interactive navigation of entire chromosomes.ModDotPlot is available at https://github.com/marbl/ModDotPlot.}", + issn = {1367-4811}, + doi = {10.1093/bioinformatics/btae493}, + url = {https://doi.org/10.1093/bioinformatics/btae493}, + eprint = {https://academic.oup.com/bioinformatics/article-pdf/40/8/btae493/58809824/btae493.pdf}, } From e164de1afa801bdc08838ebb045c0a859668c459 Mon Sep 17 00:00:00 2001 From: alexsweeten Date: Tue, 17 Sep 2024 11:48:24 -0400 Subject: [PATCH 6/6] blakc linting --- src/moddotplot/moddotplot.py | 17 +++++++++++++---- src/moddotplot/static_plots.py | 4 +++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py index f37c447..368573f 100644 --- a/src/moddotplot/moddotplot.py +++ b/src/moddotplot/moddotplot.py @@ -1006,11 +1006,19 @@ def main(): if not args.no_bed: # Log saving bed file if not args.output_dir: - bedfile_output = smaller_seq_name + "_" + larger_seq_name + "_COMPARE.bed" + bedfile_output = ( + smaller_seq_name + + "_" + + larger_seq_name + + "_COMPARE.bed" + ) else: bedfile_output = os.path.join( args.output_dir, - smaller_seq_name + "_" + larger_seq_name + "_COMPARE.bed", + smaller_seq_name + + "_" + + larger_seq_name + + "_COMPARE.bed", ) with open(bedfile_output, "w") as bedfile: for row in bed: @@ -1037,8 +1045,9 @@ def main(): axes_labels=args.axes_ticks, ) - '''if args.grid: - print(grid_vals)''' + """if args.grid: + print(grid_vals)""" + if __name__ == "__main__": main() diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index ed1879d..3724c3a 100644 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -587,7 +587,9 @@ def create_plots( verbose=False, ) if no_hist: - print(f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n") + print( + f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n" + ) else: ggsave( histy,