diff --git a/soi/OrthoFinder.py b/soi/OrthoFinder.py index 302e3da..709c812 100644 --- a/soi/OrthoFinder.py +++ b/soi/OrthoFinder.py @@ -1851,7 +1851,7 @@ def get_paralogs(OFdir, outHomo, min_support=0.5): print('\t'.join(line), file=outHomo) def gene_format_p(gene): sp, g = gene.split('|') - sp = sp[:len(sp)/2] + sp = sp[:int(len(sp)/2)] g = sp + '|' + g return (sp, g) def gene_format_common(gene): diff --git a/soi/colors.py b/soi/colors.py new file mode 100644 index 0000000..5133e9d --- /dev/null +++ b/soi/colors.py @@ -0,0 +1,56 @@ +#from reportlab.lib.units import cm +#from reportlab.lib import colors +import numpy as np +from matplotlib import cm +import matplotlib + +COLORS_HEX = ['#f9c00c', '#00b9f1', '#7200da', '#f9320c', '#00b8a9', "#F4A460", '#009999', '#00C02E', + '#980000','#00ffff','#0000ff','#ff0000','#4a86e8','#ff9900','#ffff00', + '#00ff00','#9900ff','#ff00ff','#20124d','#274e13','#000000','#cccccc', + '#7f6000','#a64d79','#6aa84f','#fff2cc','#47a952','#3ea6b6','#a5b805','#8f9276','#ca8d7c'] +#white = colors.white +white = '#FFFFFF' + +class HexColors: + def __init__(self, colors_hex=None): + if colors_hex is None: + colors_hex = COLORS_HEX + elif isinstance(colors_hex, str): + colors_hex = colors_hex.split(',') + self.colors_hex = colors_hex + @property + def colors_rgb(self): + colors_lst = [colors.HexColor(v) for v in self.colors_hex] + return [','.join(map(str, v.bitmap_rgb())) for v in colors_lst] + @property + def colors_r(self): + return 'c({})'.format(','.join(map(repr, self.colors_hex))) + def __str__(self): + return str(self.colors_hex) + def __repr__(self): + return str(self.colors_hex) + +class Colors: + def __init__(self, n): + self.n = n + self.colors = self.get_colors() + + def get_colors(self): + if self.n <=10: + colors = cm.get_cmap('tab10') + elif self.n <= 20: + colors = cm.get_cmap('tab20') + else: + colors = cm.get_cmap('viridis', self.n) + colors = colors(np.linspace(0, 1, self.n)) + return colors + def to_hex(self): + colors_hex = [matplotlib.colors.to_hex(c) for c in self.colors] + return colors_hex + def to_rgb(self): + colors_hex = self.to_hex() + colors_lst = [colors.HexColor(v) for v in colors_hex] + colors_rgb = [','.join(map(str, v.bitmap_rgb())) for v in colors_lst] + return colors_rgb + + diff --git a/soi/evaluator.py b/soi/evaluator.py new file mode 100644 index 0000000..38de446 --- /dev/null +++ b/soi/evaluator.py @@ -0,0 +1,155 @@ +import sys +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.gridspec import GridSpec +mpl.use("Agg") +mpl.rcParams['pdf.fonttype'] = 42 + +from lazy_property import LazyWritableProperty as lazyproperty + +from .mcscan import Collinearity, Gff, XCollinearity +from .colors import Colors + +def eval(collinearities, gff, ref, pre=None): + d_rcs = {} + for rc in XCollinearity(collinearities, gff=gff): + rc.fr = rc.fractionation_rate(ref=ref) + if rc.fr is None: + continue + sp1, sp2 = rc.species1, rc.species2 + if sp1 == sp2: + continue + if sp2 == ref: + sp1, sp2 = sp2, sp1 + xrc = SynRec(rc) + try: d_rcs[sp2] += [xrc] + except KeyError: d_rcs[sp2] = [xrc] + + for sp, rcs in d_rcs.items(): + d_rcs[sp] = SynRecs(rcs) + outfig = 'test.png' + plot_eval(d_rcs, outfig) +def main(): + collinearities, gff, ref = sys.argv[1:4] + eval(collinearities, gff, ref) +def plot_eval(d_rcs, outfig, legend_fontsize=9): + fig = plt.figure(figsize=(10,10)) + gs = GridSpec(3, 7) + + ax0 = fig.add_subplot(gs[:, 6]) # legend + ax1 = fig.add_subplot(gs[0, 0:3]) # accumulation N + ax2 = fig.add_subplot(gs[1, 0:3]) # decay N + ax3 = fig.add_subplot(gs[2, 0:3]) # histo of fr + ax4 = fig.add_subplot(gs[0, 3:6]) # dots + ax5 = fig.add_subplot(gs[1, 3:6]) # dots of 50 + ax6 = fig.add_subplot(gs[2, 3:6]) # bar + + n = len(d_rcs) + colors = Colors(n).colors + for i, (sp, rcs) in enumerate(sorted(d_rcs.items())): + _sp = convert_sp(sp) + i50, sn50, fn50 = rcs.xn50 + fm, sm = np.median(rcs.extended_frs), np.median(rcs.extended_ns) + ns = rcs.ns + nb = len(ns) + kargs = dict(color=colors[i], alpha=0.9, label=_sp) + ax0.plot(-2, -2, linestyle='-', marker='o', **kargs) +# ax0.barh(0, 0, height=0, left=0, align='center', label=_sp) + + ax1.plot(range(1, nb+1), np.cumsum(ns), drawstyle="steps-post", **kargs) + + ax2.plot(range(1, nb+1), ns, drawstyle="steps-post", **kargs) +# ax2.scatter(i50, sn50, label=_sp) + + ax3.scatter(len(rcs.ns), sum(rcs.ns), **kargs) + ax4.hist(rcs.extended_frs, bins=40, histtype='step', **kargs) +# ax4.scatter(rcs.frs, rcs.ns, alpha=0.6, label=_sp) +# ax5.scatter(fn50, sn50, alpha=0.8, label=_sp) + ax5.scatter(fm, sn50, **kargs) + +# ax5.hist(rcs.extended_ns, bins=30, histtype='step', label=_sp) +# ax5.scatter(np.mean(rcs.extended_frs), np.mean(rcs.extended_ns), label=_sp) + ax6.scatter(fm, sm, **kargs) +# print(sn50, sm) +# set_legends([ax1, ax2, ax3,ax4,ax5,ax6], fontsize=legend_fontsize) +# ax0.legend(loc='upper left',fancybox=False, frameon=False, ncols=ncols) + for ax, xlab, ylab in ( + [ax1, '# of blocks', 'Cumulative number of genes'], + [ax2, '# of blocks', 'Number of genes'], + [ax3, 'Total number of blocks', 'Total number of genes'], + [ax4, 'Fractionation rate', 'Number of genes'], + [ax5, 'Fractionation rate', 'Block size'], + [ax6, 'Fractionation rate', 'Block size'], + ): + set_labels(ax, xlab, ylab) + + #legend + ncols = n // 30 + 1 + ax0.set_xlim(0,1) + ax0.set_ylim(0,1) + ax0.legend(loc=(-1.5, 0),fancybox=False, frameon=False, ncols=ncols) + ax0.xaxis.set_tick_params(length=0) + ax0.spines['right'].set_color('none') + ax0.spines['top'].set_color('none') + ax0.spines['left'].set_color('none') + ax0.spines['bottom'].set_color('none') + ax0.xaxis.set_ticks([]) + ax0.yaxis.set_ticks([]) +# ax0.set_title('Species', loc='left') + + plt.subplots_adjust(hspace=0.25, wspace=1.8) + plt.savefig(outfig, bbox_inches='tight') +def set_labels(ax, xlab, ylab, **kargs): + ax.set_xlabel(xlab, **kargs) + ax.set_ylabel(ylab, **kargs) + +def set_legends(axs, **kargs): + for ax in axs: + ax.legend(**kargs) +def convert_sp(sp): + return '$' + sp.replace('_', '~') + '$' +class SynRec: + def __init__(self, rc): + self.N = rc.N + self.fr = rc.fr +class SynRecs: + def __init__(self, rcs): + self.rcs = rcs + @lazyproperty + def matrix(self): + data = [] + for rc in self.rcs: + data += [[rc.N, rc.fr]] + return np.array(sorted(data, key=lambda x:-x[0])) + @lazyproperty + def ns(self): + return self.matrix[:, 0] + @lazyproperty + def frs(self): + return self.matrix[:, 1] + def get_nx0(self, cutoff=50): + self.size = sum(self.ns) + accum = 0 + for i, x in enumerate(self.ns): + accum += x + if 1e2 * accum / self.size >= cutoff: + return x, i+1 + @lazyproperty + def xn50(self): + sn50,i50 = self.get_nx0() + fn50 = list(sorted(self.frs, reverse=1))[i50 -1] + return i50, sn50, fn50 + @lazyproperty + def extended_frs(self): + frs = [] + for n, fr in self.matrix: + frs += [fr]* int(n) + return frs + @lazyproperty + def extended_ns(self): + nrs = [] + for n in self.ns: + nrs += [n] * int(n) + return nrs + diff --git a/soi/mcscan.py b/soi/mcscan.py index 0e11436..9af2355 100644 --- a/soi/mcscan.py +++ b/soi/mcscan.py @@ -623,7 +623,7 @@ def fractionation_rate(self, ref=None, both=False): l1, l2 = ie1 - is1 + 1, ie2-is2+1 if both: return 2.0*self.N / (l1+l2) - return 1.0*rc.N / l1 + return 1.0*self.N / l1 @property def good_ks(self): return [ks for ks in self.ks if ks is not None]