fic bug

zhangrengang · Oct 22, 2024 · 438c1d7 · 438c1d7
1 parent aed2cfe
commit 438c1d7
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 27 deletions.
diff --git a/soi/RunCmdsMP.py b/soi/RunCmdsMP.py
@@ -217,7 +217,7 @@ def run_tasks(cmd_list, tc_tasks=None, mode='grid', grid_opts='', cpu=1, mem='1g
 	# if completed?
 	return len(uncmp)
 def avail_cpu(cpu):
-	cpu_count = multiprocessing.cpu_count()
+	cpu_count = len(os.sched_getaffinity(0)) #multiprocessing.cpu_count()
 	return max(1, int(1.0*cpu_count/cpu))
 
 d_mem = {'':1e1, 'k':1e3, 'm':1e6, 'g':1e9, 't':1e12}
@@ -356,7 +356,7 @@ def pool_func(func, iterable, processors=8, method=None, ordered=True, imap=Fals
 def pool_run(cmd_list, processors=8, log=True, logger=None, **kargs):
 	try: processors = int(processors)
 	except (TypeError,ValueError):
-		processors = multiprocessing.cpu_count()
+		processors = len(os.sched_getaffinity(0)) #multiprocessing.cpu_count()
 	iterable = ((cmd, log, logger) for cmd in cmd_list)
 	return [ returned for returned in pool_func(_run_cmd, iterable, processors=processors, **kargs) ]
 

diff --git a/soi/dot_plotter.py b/soi/dot_plotter.py
@@ -149,7 +149,7 @@ def main(args):
 	same_sp = True if chrs1 == chrs2 else False
 	blocks, lines1, lines2, ortholog_graph,chrs1, chrs2, d_offset1, d_offset2 = parse_collinearity(
 		collinearity, gff, chrs1, chrs2, kaks, args.homology, 
-		source = args.source, use_frac=args.use_frac,
+		source = args.source, #use_frac=args.use_frac,
 		ofdir=args.ofdir, of_ratio=args.of_ratio, of_color=args.of_color,
 		hide_blocks = args.hide_blocks, use_median=args.use_median, 
 		lower_ks=args.lower_ks, upper_ks=args.upper_ks,

diff --git a/soi/evaluator.py b/soi/evaluator.py
@@ -1,5 +1,6 @@
 import sys
 import numpy as np
+import collections
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 from matplotlib.gridspec import GridSpec
@@ -11,31 +12,39 @@
 from .mcscan import Collinearity, Gff, XCollinearity
 from .colors import Colors
 
-def eval(collinearities, gff, ref, pre=None):
+def eval(collinearities, orthologs, gff, ref, pre=None):
 	d_rcs = {}
-	for rc in XCollinearity(collinearities, gff=gff):
+	d_refgenes = {}
+	for rc in XCollinearity(collinearities, orthologs=orthologs, gff=gff):
 		rc.fr = rc.fractionation_rate(ref=ref)
 		if rc.fr is None:
 			continue
 		sp1, sp2 = rc.species1, rc.species2
+		genes1, genes2 = rc.genes
 		if sp1 == sp2:
 			continue
 		if sp2 == ref:
 			sp1, sp2 = sp2, sp1
+			genes1, genes2 = genes2, genes1
 		xrc = SynRec(rc)
 		try: d_rcs[sp2] += [xrc]
 		except KeyError: d_rcs[sp2] = [xrc]
-
+		try: d_refgenes[sp2] += genes1
+		except KeyError: d_refgenes[sp2] = genes1
+
 	for sp, rcs in d_rcs.items():
-		d_rcs[sp] = SynRecs(rcs)
+		rcs = SynRecs(rcs)
+		counts = collections.Counter(d_refgenes[sp])
+		rcs.refcounts = np.array(sorted(collections.Counter(counts.values()).items()))
+		d_rcs[sp] = rcs
 	outfig = 'test.png'
 	plot_eval(d_rcs, outfig)
 def main():
-	collinearities, gff, ref = sys.argv[1:4]
-	eval(collinearities, gff, ref)
+	collinearities, orthologs, gff, ref = sys.argv[1:5]
+	eval(collinearities, orthologs, gff, ref)
 def plot_eval(d_rcs, outfig, legend_fontsize=9):
-	fig = plt.figure(figsize=(10,10))
-	gs = GridSpec(3, 7)
+	fig = plt.figure(figsize=(10,12))
+	gs = GridSpec(4, 7)
 
 	ax0 = fig.add_subplot(gs[:, 6]) # legend
 	ax1 = fig.add_subplot(gs[0, 0:3]) # accumulation N
@@ -44,16 +53,20 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
 	ax4 = fig.add_subplot(gs[0, 3:6]) # dots
 	ax5 = fig.add_subplot(gs[1, 3:6]) # dots of 50
 	ax6 = fig.add_subplot(gs[2, 3:6]) # bar
+	ax7 = fig.add_subplot(gs[3, 0:3])
+	ax8 = fig.add_subplot(gs[3, 3:6])
 
 	n = len(d_rcs)
 	colors = Colors(n).colors
 	for i, (sp, rcs) in enumerate(sorted(d_rcs.items())):
 		_sp = convert_sp(sp)
 		i50, sn50, fn50 = rcs.xn50
 		fm, sm = np.median(rcs.extended_frs), np.median(rcs.extended_ns)
+		om = np.median(rcs.extended_ois)
 		ns = rcs.ns
-		nb = len(ns)
-		kargs = dict(color=colors[i], alpha=0.9, label=_sp)
+		nb = len(ns) # np.sum(rcs.refcounts[:, 1]) #len(ns)
+		bm = sum(rcs.refcounts[:, 1]) # sum(rcs.ns)
+		kargs = dict(color=colors[i], alpha=1, label=_sp)
 		ax0.plot(-2, -2, linestyle='-', marker='o',  **kargs)
 #		ax0.barh(0, 0, height=0, left=0, align='center', label=_sp)
 
@@ -62,15 +75,20 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
 		ax2.plot(range(1, nb+1), ns, drawstyle="steps-post", **kargs)
 #		ax2.scatter(i50, sn50, label=_sp)
 
-		ax3.scatter(len(rcs.ns), sum(rcs.ns), **kargs)
-		ax4.hist(rcs.extended_frs, bins=40, histtype='step', **kargs)
+		ax3.scatter(len(rcs.ns), bm, **kargs)
+#		ax4.hist(rcs.extended_frs, bins=40, histtype='step', **kargs)
+		hist_plot(rcs.extended_frs, ax4, bins=40, **kargs)
 #		ax4.scatter(rcs.frs, rcs.ns,  alpha=0.6, label=_sp)
 #		ax5.scatter(fn50, sn50, alpha=0.8, label=_sp)
-		ax5.scatter(fm, sn50, **kargs)
+		ax5.scatter(fm, sm, **kargs)
 
-#       ax5.hist(rcs.extended_ns, bins=30, histtype='step', label=_sp)
+#	   ax5.hist(rcs.extended_ns, bins=30, histtype='step', label=_sp)
 #		ax5.scatter(np.mean(rcs.extended_frs), np.mean(rcs.extended_ns), label=_sp)
-		ax6.scatter(fm, sm, **kargs)
+		ax6.scatter(fm, om, **kargs)
+#		ax7.hist(rcs.extended_ois, bins=40, histtype='step', **kargs)
+		hist_plot(rcs.extended_ois, ax7, bins=40, **kargs)
+
+		ax8.plot(rcs.refcounts[:, 0], rcs.refcounts[:, 1], **kargs)
 #		print(sn50, sm)
 #	set_legends([ax1, ax2, ax3,ax4,ax5,ax6], fontsize=legend_fontsize)
 #	ax0.legend(loc='upper left',fancybox=False, frameon=False, ncols=ncols)
@@ -80,7 +98,7 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
 		[ax3, 'Total number of blocks', 'Total number of genes'],
 		[ax4, 'Fractionation rate', 'Number of genes'],
 		[ax5, 'Fractionation rate', 'Block size'],
-		[ax6, 'Fractionation rate', 'Block size'],
+		[ax6, 'Fractionation rate', 'Orthology index'],
 		):
 		set_labels(ax, xlab, ylab)
 
@@ -100,6 +118,17 @@ def plot_eval(d_rcs, outfig, legend_fontsize=9):
 
 	plt.subplots_adjust(hspace=0.25, wspace=1.8)
 	plt.savefig(outfig, bbox_inches='tight')
+
+def hist_plot(data, ax, bins=10, alpha=1, **kargs):
+	n,bins,patches = ax.hist(data, bins=bins, alpha=0, **kargs)
+	Xs, Ys = [], []
+	for i in range(len(bins)-1):
+		X = (bins[i] + bins[i+1])/2
+		Xs.append((bins[i] + bins[i+1])/2)
+		Ys.append(n[i])
+
+	ax.plot(Xs, Ys, alpha=alpha, **kargs)
+
 def set_labels(ax, xlab, ylab, **kargs):
 	ax.set_xlabel(xlab, **kargs)
 	ax.set_ylabel(ylab, **kargs)
@@ -113,21 +142,25 @@ class SynRec:
 	def __init__(self, rc):
 		self.N = rc.N
 		self.fr = rc.fr
+		self.oi = rc.oi
 class SynRecs:
 	def __init__(self, rcs):
 		self.rcs = rcs
 	@lazyproperty
 	def matrix(self):
 		data = []
 		for rc in self.rcs:
-			data += [[rc.N, rc.fr]]
+			data += [[rc.N, rc.fr, rc.oi]]
 		return np.array(sorted(data, key=lambda x:-x[0]))
 	@lazyproperty
 	def ns(self):
 		return self.matrix[:, 0]
 	@lazyproperty
 	def frs(self):
 		return self.matrix[:, 1]
+	@lazyproperty
+	def ois(self):
+		return self.matrix[:, 2]
 	def get_nx0(self, cutoff=50):
 		self.size = sum(self.ns)
 		accum = 0
@@ -143,7 +176,7 @@ def xn50(self):
 	@lazyproperty
 	def extended_frs(self):
 		frs = []
-		for n, fr in self.matrix:
+		for n, fr, oi in self.matrix:
 			frs += [fr]* int(n)
 		return frs
 	@lazyproperty
@@ -152,4 +185,10 @@ def extended_ns(self):
 		for n in self.ns:
 			nrs += [n] * int(n)
 		return nrs
+	@lazyproperty
+	def extended_ois(self):
+		ois = []
+		for n, fr, oi in self.matrix:
+			ois += [oi]* int(n)
+		return ois
 
diff --git a/soi/mcscan.py b/soi/mcscan.py
@@ -339,7 +339,8 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
 			d_sp_count[sp_pair]= Count()
 		d_sp_count[sp_pair].pre_nb += 1
 		d_sp_count[sp_pair].pre_ng += rc.N
-		pre_total_oi += rc.oi * rc.N
+		ois = rc.oi * rc.N
+		pre_total_oi += ois
 		remove = False
 		if rc.N < min_n:
 			rn += 1
@@ -351,15 +352,17 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
 			ro += 1
 			remove = True
 		if remove:
+			d_sp_count[sp_pair].removed_oi += ois
 			removed_pairs += rc.syn_pairs
 			continue
 		post_nb += 1	# syntenic blocks
 		post_ng += rc.N	# syntenic genes
 		post_nso += rc.on # syntenic orthologs
-		total_oi += rc.oi * rc.N
+		total_oi += ois
 		d_sp_count[sp_pair].post_nb += 1
 		d_sp_count[sp_pair].post_ng += rc.N
 		d_sp_count[sp_pair].post_nso += rc.on
+		d_sp_count[sp_pair].retained_oi += ois
 		if not output_orthology:
 			rc.write(fout)
 		if homo_class:
@@ -381,27 +384,33 @@ def identify_orthologous_blocks(collinearities=None, orthologs=None, fout=sys.st
 		pre_nb, pre_ng, post_nb, 1.0*post_nb/pre_nb, post_ng, 1.0*post_ng/pre_ng))
 	logger.info('Orthology: Pre-filter: {} pairs; Post-filter: {} ({:.1%}) syntenic pairs; {} pairs within removed blocks.'.format(
 		rc.ton, post_nso, 1.0*post_nso/rc.ton, rc.ton-post_nao))
-	if post_ng > 0:
-		logger.info('OrthoIndex: Pre-filter: {:.3f}; Post-filter: {:.3f}; {:.3f} for removed blocks.'.format(
-			pre_total_oi/pre_ng, total_oi/post_ng, (pre_total_oi-total_oi)/(pre_ng-post_ng)))
+#	if post_ng > 0:
+	logger.info('OrthoIndex: Pre-filter: {:.3f}; Post-filter: {:.3f}; {:.3f} for removed blocks.'.format(
+			pre_total_oi/pre_ng, divide(total_oi, post_ng), divide((pre_total_oi-total_oi), (pre_ng-post_ng))))
 	if homo_class is not None:
 		out_class.close()
 	if out_stats is  None:
 		return
 	logger.info('Output stats..')
 	line = ['Species1', 'Species2', 'Pre-filter number of orthologous gene pairs', 'Post-filter number of orthologous gene pairs',
 		'Pre-filter number of syntenic blocks', 'Post-filter number of syntenic blocks', 
-		'Pre-filter number of syntenic gene pairs', 'Post-filter number of syntenic gene pairs',]
+		'Pre-filter number of syntenic gene pairs', 'Post-filter number of syntenic gene pairs',
+		'Mean OI of removed gene pairs', 'Mean OI of retained gene pairs']
 	print('\t'.join(line), file=out_stats)
 	for sp_pair, self in d_sp_count.items():
 		line = [sp_pair[0], sp_pair[1], self.pre_no, self.post_nso, self.pre_nb, self.post_nb, self.pre_ng, self.post_ng]
+		line += [divide(self.removed_oi, (self.pre_ng-self.post_ng)), divide(self.retained_oi, self.post_ng)]
 		print('\t'.join(map(str, line)), file=out_stats)
 	out_stats.close()
 
+def divide(x, y):
+	try: return x/y
+	except ZeroDivisionError: return 0
 class Count:
 	def __init__(self):
 		self.pre_nb, self.pre_ng, self.post_nb, self.post_ng = 0,0,0,0
 		self.pre_no, self.post_nso = 0,0
+		self.retained_oi, self.removed_oi = 0,0
 class Collinearity():
 	'''
 	blocks = Collinearity(blockfile)