Skip to content

Commit

Permalink
more options in dot plots
Browse files Browse the repository at this point in the history
  • Loading branch information
heche-psb authored and heche-psb committed Jun 18, 2023
1 parent a52b718 commit 6f670f7
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 27 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ wgd syn families gffs (option)
-ac, --ancestor, the assumed ancestor species, default None, a option that is still in development
-mg, --minseglen, the minimum length of segments to include in ratio if <= 1, default 100000
-kr, --keepredun, flag option, whether to keep redundant multiplicons, if the flag was set, the redundant multiplicons will be kept
-mgn, --mingenenum, the minimum number of genes for a segment to be considered, default 30
-ds, --dotsize, the dot size in dot plot, default 1
-aa, --apalpha, the opacity of anchor dots, default 1
-ha, --hoalpha, the opacity of homolog dots, default 0.1
```

The program `wgd viz` can realize the visualization of *K*<sub>S</sub> age distribution and synteny.
Expand Down Expand Up @@ -272,6 +276,11 @@ wgd viz (option)
-pag, --plotapgmm, flag option, whether to plot mixture modeling of anchor Ks in the mixed Ks distribution, if the flag was set, the mixture modeling of anchor Ks will be plotted
-pem, --plotelmm, flag option, whether to plot elmm mixture modeling of paranome Ks in the mixed Ks distribution, if the flag was set, the elmm mixture modeling of paranome Ks will be plotted
-c, --components, the range of the number of components to fit in anchor Ks mixture modeling, default (1,4)
-mgn, --mingenenum, the minimum number of genes for a segment to be considered, default 30
-psy, --plotsyn, flag option, whether to initiate the synteny plot, if the flag was set, the synteny plot will be produced
-ds, --dotsize, the dot size in dot plot, default 1
-aa, --apalpha, the opacity of anchor dots, default 1
-ha, --hoalpha, the opacity of homolog dots, default 0.1
```

## Usage
Expand Down Expand Up @@ -436,6 +445,8 @@ The dot plots without *K*<sub>S</sub> annotation will also be automately produce

![](data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png)

Note that the opacity of anchor dots and all homolog dots can be set by the option `apalpha` and `hoalpha` separately. If one just wants to see the anchor dots, setting the `hoalpha` as 0 (or other minuscule values) will do. If one wants to see the distribution of whole dots better, setting the `hoalpha` higher (and `apalpha` lower) will do. The `dotsize` option can be called to adjust the size of dots.

A further associated Syndepth plot shows that there are more than 80 duplicated segments, which dominates the whole collinear ratio category.

![](data/Syndepth.svg)
Expand Down
26 changes: 16 additions & 10 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,15 +521,18 @@ def _ksd(families, sequences, outdir, tmpdir, nthreads, to_stop, cds, pairwise,
@click.option('--plotapgmm', '-pag', is_flag=True, help='plot mixture modeling of anchor pairs')
@click.option('--plotelmm', '-pem', is_flag=True, help='plot elmm mixture modeling')
@click.option('--components', '-n', nargs=2, default=(1, 4), show_default=True, help="range of number of components to fit")
@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="min number of genes on segments to be considered")
@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="minimum number of genes on segments to be considered")
@click.option('--plotsyn', '-psy', is_flag=True, help='plot synteny')
@click.option('--dotsize', '-ds', type=float, default=1, show_default=True, help='size of dots')
@click.option('--apalpha', '-aa', type=float, default=1, show_default=True, help='opacity of anchor dots')
@click.option('--hoalpha', '-ha', type=float, default=0.1, show_default=True, help='opacity of homolog dots')
def viz(**kwargs):
"""
Visualization of Ks distribution or synteny
"""
_viz(**kwargs)

def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializations,prominence_cutoff,segments,minlen,maxsize,anchorpoints,multiplicon,genetable,rel_height,speciestree,onlyrootout,minseglen,keepredun,extraparanomeks,plotapgmm,plotelmm,components,mingenenum,plotsyn):
def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializations,prominence_cutoff,segments,minlen,maxsize,anchorpoints,multiplicon,genetable,rel_height,speciestree,onlyrootout,minseglen,keepredun,extraparanomeks,plotapgmm,plotelmm,components,mingenenum,plotsyn,dotsize,apalpha,hoalpha):
from wgd.viz import elmm_plot, apply_filters, multi_sp_plot, default_plot,all_dotplots,filter_by_minlength,dotplotunitgene,dotplotingene,filter_mingenumber
from wgd.core import _mkdir
from wgd.syn import get_anchors,get_multi,get_segments_profile,get_chrom_gene,get_mp_geneorder,transformunit
Expand All @@ -552,13 +555,13 @@ def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializ
segs,table,df_multi,removed_scfa = filter_by_minlength(table,segs,minlen,df_multi,keepredun,outdir,minseglen)
segs_gene_unit, gene_order_dict_allsp = transformunit(segs,ordered_genes_perchrom_allsp,outdir)
segs = filter_mingenumber(segs_gene_unit,mingenenum)
dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=df_anchor,ksdf=df,maxsize=maxsize)
dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=df_anchor,ksdf=df,maxsize=maxsize,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha)
#dotplotunitgene(ordered_genes_perchrom_allsp,segs_gene_unit,removed_scfa,outdir,mingenenum,table_orig,ordered_mp,ksdf=df)
figs = all_dotplots(table, segs, df_multi, minseglen, anchors=df_anchor, maxsize=maxsize, minlen=minlen, outdir=outdir, Ks = df)
figs = all_dotplots(table, segs, df_multi, minseglen, anchors=df_anchor, maxsize=maxsize, minlen=minlen, outdir=outdir, Ks = df, dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha)
for k, v in figs.items():
v.savefig(os.path.join(outdir, "{}.dot.svg".format(k)))
v.savefig(os.path.join(outdir, "{}.dot.pdf".format(k)))
v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=1200)
v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=500)
logging.info('Done')
exit()
ksdb_df = pd.read_csv(datafile,header=0,index_col=0,sep='\t')
Expand Down Expand Up @@ -599,15 +602,18 @@ def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializ
@click.option('--ancestor', '-ac', default=None,show_default=True,help='assumed ancestor species')
@click.option('--minseglen', '-mg', default=10000, show_default=True, help="min length of segments in ratio if <= 1")
@click.option('--keepredun', '-kr', is_flag=True, help='keep redundant multiplicons')
@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="min number of genes on segments to be considered")
@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="minimum number of genes on segments to be considered")
@click.option('--dotsize', '-ds', type=float, default=1, show_default=True, help='size of dots')
@click.option('--apalpha', '-aa', type=float, default=1, show_default=True, help='opacity of anchor dots')
@click.option('--hoalpha', '-ha', type=float, default=0.1, show_default=True, help='opacity of homolog dots')
def syn(**kwargs):
"""
Co-linearity and anchor inference using I-ADHoRe.
"""
_syn(**kwargs)

def _syn(families, gff_files, ks_distribution, outdir, feature, attribute,
minlen, maxsize, ks_range, iadhore_options, ancestor, minseglen, keepredun, mingenenum):
minlen, maxsize, ks_range, iadhore_options, ancestor, minseglen, keepredun, mingenenum, dotsize, apalpha, hoalpha):
"""
Co-linearity and anchor inference using I-ADHoRe.
"""
Expand Down Expand Up @@ -664,15 +670,15 @@ def _syn(families, gff_files, ks_distribution, outdir, feature, attribute,
ksdb_df = pd.read_csv(ks_distribution,header=0,index_col=0,sep='\t')
ksdb_df = formatv2(ksdb_df)
df_ks = apply_filters(ksdb_df, [("dS", 0., 5.)])
dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=anchors,ksdf=df_ks,maxsize=maxsize)
dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=anchors,ksdf=df_ks,maxsize=maxsize,dotsize=dotsize,apalpha=apalpha, hoalpha=hoalpha)
#dotplotunitgene(ordered_genes_perchrom_allsp,segs_gene_unit,removed_scfa,outdir,mingenenum,table_orig,ordered_mp,ksdf=df_ks)
# dotplot
#logging.info("Generating dot plots")
figs = all_dotplots(table, segs, multi, minseglen, anchors=anchors, maxsize=maxsize, minlen=minlen, outdir=outdir, ancestor=ancestor, Ks = df_ks)
figs = all_dotplots(table, segs, multi, minseglen, anchors=anchors, maxsize=maxsize, minlen=minlen, outdir=outdir, ancestor=ancestor, Ks = df_ks, dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha)
for k, v in figs.items():
v.savefig(os.path.join(outdir, "{}.dot.svg".format(k)))
v.savefig(os.path.join(outdir, "{}.dot.pdf".format(k)))
v.savefig(os.path.join(outdir, "{}.dot.png".format(k)))
v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=500)
plt.close()

# anchor Ks distributions
Expand Down
Binary file modified data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot_unit_gene.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name='wgd',
version='2.0.18',
version='2.0.19',
packages=['wgd'],
url='http://github.com/heche-psb/wgd',
license='GPL',
Expand Down
32 changes: 16 additions & 16 deletions wgd/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,7 @@ def getpairks(pair,ksdf):
Ks_dict = {pair:ks for pair,ks in zip(ksdf.index,ksdf['dS'])}
return Ks_dict.get(pair,None)

def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False):
def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False,dotsize=0.8,apalpha=1, hoalpha=0.1):
dfx,dfy = dfx.set_index('Coordinates'),dfy.set_index('Coordinates')
leng_info_x,leng_info_y = {},{}
gene_list = {gene:li for gene,li in zip(table.index,table['scaffold'])}
Expand Down Expand Up @@ -1842,11 +1842,11 @@ def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize
s_m = ScalarMappable(cmap=c_m, norm=norm)
s_m.set_array([])
if not showks:
ax.scatter(xs, ys, s=0.4, color = 'k', alpha=0.01)
ax.scatter(xs_ap, ys_ap, s=0.4, color = 'r', alpha=1)
ax.scatter(xs, ys, s=dotsize, color = 'k', alpha=hoalpha)
ax.scatter(xs_ap, ys_ap, s=dotsize, color = 'r', alpha=apalpha)
else:
ax.scatter(xs, ys, s=0.4, color=[c_m(norm(c)) for c in co], alpha=0.01)
ax.scatter(xs_ap, ys_ap, s=0.4, color=[c_m(norm(c)) for c in co_ap], alpha=1)
ax.scatter(xs, ys, s=dotsize, color=[c_m(norm(c)) for c in co], alpha=hoalpha)
ax.scatter(xs_ap, ys_ap, s=dotsize, color=[c_m(norm(c)) for c in co_ap], alpha=apalpha)
#ax.scatter(xs_ap, ys_ap, s=0.4, alpha=0.5)
xlim,ylim = xtick[-1],ytick[-1]
ax.set_xlim(0, xlim)
Expand Down Expand Up @@ -2067,15 +2067,15 @@ def dotplotunitgene(ordered_genes_perchrom_allsp,segs,removed_scfa,outdir,mingen
fname = os.path.join(outdir, "{}.line_unit_gene.svg".format(prefix))
fig.savefig(fname)

def plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False):
def plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False,dotsize=0.8, apalpha=1, hoalpha=0.1):
fig, ax = plt.subplots(1, 1, figsize=(10,10))
dfx = ordered_genes_perchrom_allsp[spx].copy().drop(removed_scfa[spx],axis=1)
dfy = ordered_genes_perchrom_allsp[spy].copy().drop(removed_scfa[spy],axis=1)
ax = plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=anchor,ksdf=ksdf,maxsize=maxsize,showks=showks)
ax = plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=anchor,ksdf=ksdf,maxsize=maxsize,showks=showks,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha)
fig.tight_layout()
return fig, ax

def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=None,ksdf=None,maxsize=200):
def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=None,ksdf=None,maxsize=200,dotsize=0.8, apalpha=1, hoalpha=0.1):
sp_list = list(ordered_genes_perchrom_allsp.keys())
gene_list = {gene:li for gene,li in zip(table.index,table['scaffold'])}
gene_genome = {gene:sp for gene,sp in zip(table.index,table['species'])}
Expand All @@ -2085,22 +2085,22 @@ def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_or
for j in range(i,len(sp_list)):
spx,spy = sp_list[i],sp_list[j]
logging.info("{0} vs. {1}".format(spx,spy))
fig, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf)
fig, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,dotsize=dotsize,apalpha=apalpha, hoalpha=hoalpha)
figs[spx + "-vs-" + spy] = fig
if not (ksdf is None):
figks, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,showks=True)
figks, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,showks=True,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha)
figs[spx + "-vs-" + spy + "_Ks"] = figks
for prefix, fig in figs.items():
fname = os.path.join(outdir, "{}.dot_unit_gene.svg".format(prefix))
fig.savefig(fname)
fname = os.path.join(outdir, "{}.dot_unit_gene.png".format(prefix))
fig.savefig(fname,dpi=1200)
fig.savefig(fname,dpi=500)
fname = os.path.join(outdir, "{}.dot_unit_gene.pdf".format(prefix))
fig.savefig(fname)
plt.close()

# dot plot stuff
def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=None, **kwargs):
def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=None, dotsize = 0.8, apalpha=1, hoalpha=0.1, **kwargs):
"""
Generate dot plots for all pairs of species in `df`, coloring anchor pairs.
"""
Expand Down Expand Up @@ -2145,7 +2145,7 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non
continue
#for x,y in zip(df.x,df.y): ax.scatter(x, y, s=1, color="k", alpha=0.1)
#print((len(list(itertools.chain(df.x))),len(list(itertools.chain(df.y)))))
ax.scatter(list(itertools.chain(df.x)), list(itertools.chain(df.y)), s=0.4, color="k", alpha=0.01)
ax.scatter(list(itertools.chain(df.x)), list(itertools.chain(df.y)), s=dotsize, color="k", alpha=hoalpha)
if not (Ks is None):
for i,x,y in zip(df.index,df['x'],df['y']):
ksage = ks_dict.get(i,None)
Expand All @@ -2159,7 +2159,7 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non
if not (anchors is None):
andf = df.join(anchors, how="inner")
#for x,y in zip(andf.x,andf.y): ax.scatter(x, y, s=1, color="r", alpha=0.9)
ax.scatter(list(itertools.chain(andf.x)), list(itertools.chain(andf.y)), s=0.4, color="r", alpha=1)
ax.scatter(list(itertools.chain(andf.x)), list(itertools.chain(andf.y)), s=dotsize, color="r", alpha=apalpha)
if not (Ks is None):
for i,x,y in zip(andf.index,andf['x'],andf['y']):
ksage = ks_dict.get(i,None)
Expand Down Expand Up @@ -2197,8 +2197,8 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non
c_m = matplotlib.cm.rainbow
s_m = ScalarMappable(cmap=c_m, norm=norm)
s_m.set_array([])
axks.scatter(xxs, yys, s=0.4, color=[c_m(norm(c)) for c in ksages], alpha=0.01)
axks.scatter(xxs_ap, yys_ap, s=0.4, color=[c_m(norm(c)) for c in ksages_ap], alpha=1)
axks.scatter(xxs, yys, s=dotsize, color=[c_m(norm(c)) for c in ksages], alpha=hoalpha)
axks.scatter(xxs_ap, yys_ap, s=dotsize, color=[c_m(norm(c)) for c in ksages_ap], alpha=apalpha)
axks.set_xlim(0, xlim)
axks.set_ylim(0, ylim)
axks.vlines(xs+[xmax], ymin=0, ymax=ylim, alpha=0.8, color="k")
Expand Down

0 comments on commit 6f670f7

Please sign in to comment.