Skip to content

Commit

Permalink
fic bug
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangrengang committed Oct 22, 2024
1 parent aed2cfe commit 438c1d7
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 27 deletions.
4 changes: 2 additions & 2 deletions soi/RunCmdsMP.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def run_tasks(cmd_list, tc_tasks=None, mode='grid', grid_opts='', cpu=1, mem='1g
# if completed?
return len(uncmp)
def avail_cpu(cpu):
cpu_count = multiprocessing.cpu_count()
cpu_count = len(os.sched_getaffinity(0)) #multiprocessing.cpu_count()
return max(1, int(1.0*cpu_count/cpu))

d_mem = {'':1e1, 'k':1e3, 'm':1e6, 'g':1e9, 't':1e12}
Expand Down Expand Up @@ -356,7 +356,7 @@ def pool_func(func, iterable, processors=8, method=None, ordered=True, imap=Fals
def pool_run(cmd_list, processors=8, log=True, logger=None, **kargs):
try: processors = int(processors)
except (TypeError,ValueError):
processors = multiprocessing.cpu_count()
processors = len(os.sched_getaffinity(0)) #multiprocessing.cpu_count()
iterable = ((cmd, log, logger) for cmd in cmd_list)
return [ returned for returned in pool_func(_run_cmd, iterable, processors=processors, **kargs) ]

Expand Down
2 changes: 1 addition & 1 deletion soi/dot_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def main(args):
same_sp = True if chrs1 == chrs2 else False
blocks, lines1, lines2, ortholog_graph,chrs1, chrs2, d_offset1, d_offset2 = parse_collinearity(
collinearity, gff, chrs1, chrs2, kaks, args.homology,
source = args.source, use_frac=args.use_frac,
source = args.source, #use_frac=args.use_frac,
ofdir=args.ofdir, of_ratio=args.of_ratio, of_color=args.of_color,
hide_blocks = args.hide_blocks, use_median=args.use_median,
lower_ks=args.lower_ks, upper_ks=args.upper_ks,
Expand Down
75 changes: 57 additions & 18 deletions soi/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import numpy as np
import collections
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
Expand All @@ -11,31 +12,39 @@
from .mcscan import Collinearity, Gff, XCollinearity
from .colors import Colors

def eval(collinearities, gff, ref, pre=None):
def eval(collinearities, orthologs, gff, ref, pre=None):
d_rcs = {}
for rc in XCollinearity(collinearities, gff=gff):
d_refgenes = {}
for rc in XCollinearity(collinearities, orthologs=orthologs, gff=gff):
rc.fr = rc.fractionation_rate(ref=ref)
if rc.fr is None:
continue
sp1, sp2 = rc.species1, rc.species2
genes1, genes2 = rc.genes
if sp1 == sp2:
continue
if sp2 == ref:
sp1, sp2 = sp2, sp1
genes1, genes2 = genes2, genes1
xrc = SynRec(rc)
try: d_rcs[sp2] += [xrc]
except KeyError: d_rcs[sp2] = [xrc]

try: d_refgenes[sp2] += genes1
except KeyError: d_refgenes[sp2] = genes1

for sp, rcs in d_rcs.items():
d_rcs[sp] = SynRecs(rcs)
rcs = SynRecs(rcs)
counts = collections.Counter(d_refgenes[sp])
rcs.refcounts = np.array(sorted(collections.Counter(counts.values()).items()))
d_rcs[sp] = rcs
outfig = 'test.png'
plot_eval(d_rcs, outfig)
def main():
collinearities, gff, ref = sys.argv[1:4]
eval(collinearities, gff, ref)
collinearities, orthologs, gff, ref = sys.argv[1:5]
eval(collinearities, orthologs, gff, ref)
def plot_eval(d_rcs, outfig, legend_fontsize=9):
fig = plt.figure(figsize=(10,10))
gs = GridSpec(3, 7)
fig = plt.figure(figsize=(10,12))
gs = GridSpec(4, 7)

ax0 = fig.add_subplot(gs[:, 6]) # legend
ax1 = fig.add_subplot(gs[0, 0:3]) # accumulation N
Expand All @@ -44,16 +53,20 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
ax4 = fig.add_subplot(gs[0, 3:6]) # dots
ax5 = fig.add_subplot(gs[1, 3:6]) # dots of 50
ax6 = fig.add_subplot(gs[2, 3:6]) # bar
ax7 = fig.add_subplot(gs[3, 0:3])
ax8 = fig.add_subplot(gs[3, 3:6])

n = len(d_rcs)
colors = Colors(n).colors
for i, (sp, rcs) in enumerate(sorted(d_rcs.items())):
_sp = convert_sp(sp)
i50, sn50, fn50 = rcs.xn50
fm, sm = np.median(rcs.extended_frs), np.median(rcs.extended_ns)
om = np.median(rcs.extended_ois)
ns = rcs.ns
nb = len(ns)
kargs = dict(color=colors[i], alpha=0.9, label=_sp)
nb = len(ns) # np.sum(rcs.refcounts[:, 1]) #len(ns)
bm = sum(rcs.refcounts[:, 1]) # sum(rcs.ns)
kargs = dict(color=colors[i], alpha=1, label=_sp)
ax0.plot(-2, -2, linestyle='-', marker='o', **kargs)
# ax0.barh(0, 0, height=0, left=0, align='center', label=_sp)

Expand All @@ -62,15 +75,20 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
ax2.plot(range(1, nb+1), ns, drawstyle="steps-post", **kargs)
# ax2.scatter(i50, sn50, label=_sp)

ax3.scatter(len(rcs.ns), sum(rcs.ns), **kargs)
ax4.hist(rcs.extended_frs, bins=40, histtype='step', **kargs)
ax3.scatter(len(rcs.ns), bm, **kargs)
# ax4.hist(rcs.extended_frs, bins=40, histtype='step', **kargs)
hist_plot(rcs.extended_frs, ax4, bins=40, **kargs)
# ax4.scatter(rcs.frs, rcs.ns, alpha=0.6, label=_sp)
# ax5.scatter(fn50, sn50, alpha=0.8, label=_sp)
ax5.scatter(fm, sn50, **kargs)
ax5.scatter(fm, sm, **kargs)

# ax5.hist(rcs.extended_ns, bins=30, histtype='step', label=_sp)
# ax5.hist(rcs.extended_ns, bins=30, histtype='step', label=_sp)
# ax5.scatter(np.mean(rcs.extended_frs), np.mean(rcs.extended_ns), label=_sp)
ax6.scatter(fm, sm, **kargs)
ax6.scatter(fm, om, **kargs)
# ax7.hist(rcs.extended_ois, bins=40, histtype='step', **kargs)
hist_plot(rcs.extended_ois, ax7, bins=40, **kargs)

ax8.plot(rcs.refcounts[:, 0], rcs.refcounts[:, 1], **kargs)
# print(sn50, sm)
# set_legends([ax1, ax2, ax3,ax4,ax5,ax6], fontsize=legend_fontsize)
# ax0.legend(loc='upper left',fancybox=False, frameon=False, ncols=ncols)
Expand All @@ -80,7 +98,7 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
[ax3, 'Total number of blocks', 'Total number of genes'],
[ax4, 'Fractionation rate', 'Number of genes'],
[ax5, 'Fractionation rate', 'Block size'],
[ax6, 'Fractionation rate', 'Block size'],
[ax6, 'Fractionation rate', 'Orthology index'],
):
set_labels(ax, xlab, ylab)

Expand All @@ -100,6 +118,17 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):

plt.subplots_adjust(hspace=0.25, wspace=1.8)
plt.savefig(outfig, bbox_inches='tight')

def hist_plot(data, ax, bins=10, alpha=1, **kargs):
n,bins,patches = ax.hist(data, bins=bins, alpha=0, **kargs)
Xs, Ys = [], []
for i in range(len(bins)-1):
X = (bins[i] + bins[i+1])/2
Xs.append((bins[i] + bins[i+1])/2)
Ys.append(n[i])

ax.plot(Xs, Ys, alpha=alpha, **kargs)

def set_labels(ax, xlab, ylab, **kargs):
ax.set_xlabel(xlab, **kargs)
ax.set_ylabel(ylab, **kargs)
Expand All @@ -113,21 +142,25 @@ class SynRec:
def __init__(self, rc):
self.N = rc.N
self.fr = rc.fr
self.oi = rc.oi
class SynRecs:
def __init__(self, rcs):
self.rcs = rcs
@lazyproperty
def matrix(self):
data = []
for rc in self.rcs:
data += [[rc.N, rc.fr]]
data += [[rc.N, rc.fr, rc.oi]]
return np.array(sorted(data, key=lambda x:-x[0]))
@lazyproperty
def ns(self):
return self.matrix[:, 0]
@lazyproperty
def frs(self):
return self.matrix[:, 1]
@lazyproperty
def ois(self):
return self.matrix[:, 2]
def get_nx0(self, cutoff=50):
self.size = sum(self.ns)
accum = 0
Expand All @@ -143,7 +176,7 @@ def xn50(self):
@lazyproperty
def extended_frs(self):
frs = []
for n, fr in self.matrix:
for n, fr, oi in self.matrix:
frs += [fr]* int(n)
return frs
@lazyproperty
Expand All @@ -152,4 +185,10 @@ def extended_ns(self):
for n in self.ns:
nrs += [n] * int(n)
return nrs
@lazyproperty
def extended_ois(self):
ois = []
for n, fr, oi in self.matrix:
ois += [oi]* int(n)
return ois

21 changes: 15 additions & 6 deletions soi/mcscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
d_sp_count[sp_pair]= Count()
d_sp_count[sp_pair].pre_nb += 1
d_sp_count[sp_pair].pre_ng += rc.N
pre_total_oi += rc.oi * rc.N
ois = rc.oi * rc.N
pre_total_oi += ois
remove = False
if rc.N < min_n:
rn += 1
Expand All @@ -351,15 +352,17 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
ro += 1
remove = True
if remove:
d_sp_count[sp_pair].removed_oi += ois
removed_pairs += rc.syn_pairs
continue
post_nb += 1 # syntenic blocks
post_ng += rc.N # syntenic genes
post_nso += rc.on # syntenic orthologs
total_oi += rc.oi * rc.N
total_oi += ois
d_sp_count[sp_pair].post_nb += 1
d_sp_count[sp_pair].post_ng += rc.N
d_sp_count[sp_pair].post_nso += rc.on
d_sp_count[sp_pair].retained_oi += ois
if not output_orthology:
rc.write(fout)
if homo_class:
Expand All @@ -381,27 +384,33 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
pre_nb, pre_ng, post_nb, 1.0*post_nb/pre_nb, post_ng, 1.0*post_ng/pre_ng))
logger.info('Orthology: Pre-filter: {} pairs; Post-filter: {} ({:.1%}) syntenic pairs; {} pairs within removed blocks.'.format(
rc.ton, post_nso, 1.0*post_nso/rc.ton, rc.ton-post_nao))
if post_ng > 0:
logger.info('OrthoIndex: Pre-filter: {:.3f}; Post-filter: {:.3f}; {:.3f} for removed blocks.'.format(
pre_total_oi/pre_ng, total_oi/post_ng, (pre_total_oi-total_oi)/(pre_ng-post_ng)))
# if post_ng > 0:
logger.info('OrthoIndex: Pre-filter: {:.3f}; Post-filter: {:.3f}; {:.3f} for removed blocks.'.format(
pre_total_oi/pre_ng, divide(total_oi, post_ng), divide((pre_total_oi-total_oi), (pre_ng-post_ng))))
if homo_class is not None:
out_class.close()
if out_stats is None:
return
logger.info('Output stats..')
line = ['Species1', 'Species2', 'Pre-filter number of orthologous gene pairs', 'Post-filter number of orthologous gene pairs',
'Pre-filter number of syntenic blocks', 'Post-filter number of syntenic blocks',
'Pre-filter number of syntenic gene pairs', 'Post-filter number of syntenic gene pairs',]
'Pre-filter number of syntenic gene pairs', 'Post-filter number of syntenic gene pairs',
'Mean OI of removed gene pairs', 'Mean OI of retained gene pairs']
print('\t'.join(line), file=out_stats)
for sp_pair, self in d_sp_count.items():
line = [sp_pair[0], sp_pair[1], self.pre_no, self.post_nso, self.pre_nb, self.post_nb, self.pre_ng, self.post_ng]
line += [divide(self.removed_oi, (self.pre_ng-self.post_ng)), divide(self.retained_oi, self.post_ng)]
print('\t'.join(map(str, line)), file=out_stats)
out_stats.close()

def divide(x, y):
try: return x/y
except ZeroDivisionError: return 0
class Count:
def __init__(self):
self.pre_nb, self.pre_ng, self.post_nb, self.post_ng = 0,0,0,0
self.pre_no, self.post_nso = 0,0
self.retained_oi, self.removed_oi = 0,0
class Collinearity():
'''
blocks = Collinearity(blockfile)
Expand Down

0 comments on commit 438c1d7

Please sign in to comment.