Skip to content

Commit

Permalink
Merge pull request #34 from marbl/develop
Browse files Browse the repository at this point in the history
v0.8.6 release
  • Loading branch information
alexsweeten authored Sep 17, 2024
2 parents 45e5cbe + 002ffdf commit ce673f3
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 25 deletions.
29 changes: 15 additions & 14 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@

@article{Sweeten2024.04.15.589623,
abstract = {Motivation A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy. Results In this work we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mbp genome of Arabidopsis thaliana in under 5 minutes on a laptop. ModDotPlot is implemented in Python with a graphical user interface supporting real-time interactive navigation of entire chromosomes. Availability and Implementation ModDotPlot is available at https://github.com/marbl/ModDotPlot.Competing Interest StatementThe authors have declared no competing interest.},
author = {Alexander P. Sweeten and Michael C. Schatz and Adam M. Phillippy},
doi = {10.1101/2024.04.15.589623},
elocation-id = {2024.04.15.589623},
eprint = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623.full.pdf},
journal = {bioRxiv},
publisher = {Cold Spring Harbor Laboratory},
title = {ModDotPlot - Rapid and interactive visualization of complex repeats},
url = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623},
year = {2024},
bdsk-url-1 = {https://www.biorxiv.org/content/early/2024/04/19/2024.04.15.589623},
bdsk-url-2 = {https://doi.org/10.1101/2024.04.15.589623}}
@article{10.1093/bioinformatics/btae493,
author = {Sweeten, Alexander P and Schatz, Michael C and Phillippy, Adam M},
title = "{ModDotPlot—rapid and interactive visualization of tandem repeats}",
journal = {Bioinformatics},
volume = {40},
number = {8},
pages = {btae493},
year = {2024},
month = {08},
abstract = "{A common method for analyzing genomic repeats is to produce a sequence similarity matrix visualized via a dot plot. Innovative approaches such as StainedGlass have improved upon this classic visualization by rendering dot plots as a heatmap of sequence identity, enabling researchers to better visualize multi-megabase tandem repeat arrays within centromeres and other heterochromatic regions of the genome. However, computing the similarity estimates for heatmaps requires high computational overhead and can suffer from decreasing accuracy.In this work, we introduce ModDotPlot, an interactive and alignment-free dot plot viewer. By approximating average nucleotide identity via a k-mer-based containment index, ModDotPlot produces accurate plots orders of magnitude faster than StainedGlass. We accomplish this through the use of a hierarchical modimizer scheme that can visualize the full 128 Mb genome of Arabidopsis thaliana in under 5 min on a laptop. ModDotPlot is bundled with a graphical user interface supporting real-time interactive navigation of entire chromosomes.ModDotPlot is available at https://github.com/marbl/ModDotPlot.}",
issn = {1367-4811},
doi = {10.1093/bioinformatics/btae493},
url = {https://doi.org/10.1093/bioinformatics/btae493},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/40/8/btae493/58809824/btae493.pdf},
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ModDotPlot"
version = "0.8.5"
version = "0.8.6"
requires-python = ">= 3.7"
dependencies = [
"pysam",
Expand Down
2 changes: 1 addition & 1 deletion src/moddotplot/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "0.8.5"
VERSION = "0.8.6"
COLS = [
"#query_name",
"query_start",
Expand Down
23 changes: 18 additions & 5 deletions src/moddotplot/moddotplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ def main():
smaller_length = len(smaller_seq)
larger_seq_name = sequences[i][0]
smaller_seq_name = sequences[j][0]
# Houston we have a problem

if larger_length < smaller_length:
smaller_seq = sequences[i][1]
larger_seq = sequences[j][1]
Expand Down Expand Up @@ -1000,15 +1000,25 @@ def main():
seq_list[j],
False,
)
if args.grid:
grid_vals.append(bed)

if not args.no_bed:
# Log saving bed file
if not args.output_dir:
bedfile_output = sequences[i][0] + ".bed"
bedfile_output = (
smaller_seq_name
+ "_"
+ larger_seq_name
+ "_COMPARE.bed"
)
else:
bedfile_output = os.path.join(
args.output_dir,
sequences[i][0] + "_" + sequences[j][0] + ".bed",
smaller_seq_name
+ "_"
+ larger_seq_name
+ "_COMPARE.bed",
)
with open(bedfile_output, "w") as bedfile:
for row in bed:
Expand All @@ -1019,8 +1029,8 @@ def main():
create_plots(
sdf=[bed],
directory=args.output_dir if args.output_dir else ".",
name_x=sequences[i][0],
name_y=sequences[j][0],
name_x=smaller_seq_name,
name_y=larger_seq_name,
palette=args.palette,
palette_orientation=args.palette_orientation,
no_hist=args.no_hist,
Expand All @@ -1035,6 +1045,9 @@ def main():
axes_labels=args.axes_ticks,
)

"""if args.grid:
print(grid_vals)"""


if __name__ == "__main__":
main()
10 changes: 6 additions & 4 deletions src/moddotplot/static_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def create_plots(
height=9,
dpi=dpi,
format="pdf",
filename=f"{plot_filename}.pdf",
filename=f"{plot_filename}_COMPARE.pdf",
verbose=False,
)
ggsave(
Expand All @@ -583,11 +583,13 @@ def create_plots(
height=9,
dpi=dpi,
format="png",
filename=f"{plot_filename}.png",
filename=f"{plot_filename}_COMPARE.png",
verbose=False,
)
if no_hist:
print(f"{plot_filename}.pdf and {plot_filename}.png saved sucessfully. \n")
print(
f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n"
)
else:
ggsave(
histy,
Expand All @@ -608,7 +610,7 @@ def create_plots(
verbose=False,
)
print(
f"{plot_filename}.pdf, {plot_filename}.png, {plot_filename}_HIST.pdf and {plot_filename}_HIST.png saved sucessfully. \n"
f"{plot_filename}_COMPARE.pdf, {plot_filename}_COMPARE.png, {plot_filename}_HIST.pdf and {plot_filename}_HIST.png saved sucessfully. \n"
)
# Self-identity plots: Output _TRI, _FULL, and _HIST
else:
Expand Down

0 comments on commit ce673f3

Please sign in to comment.