Skip to content

Commit

Permalink
compatibility to gene id without species id
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangrengang committed Dec 4, 2024
1 parent 900f2e4 commit b7161dd
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 18 deletions.
2 changes: 1 addition & 1 deletion SOI-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Before run the pipeline:
1. install the lasest verion of [SOI](https://github.com/zhangrengang/orthoindex#installation) by `python3 setup.py install`,
and have a check: `which soi-syn`;
2. completed the [example pipeline](https://github.com/zhangrengang/evolution_example) to get the orthologous synteny file `collinearity.ortho`;
2. complete the [example pipeline](https://github.com/zhangrengang/evolution_example) to get the orthologous synteny file `collinearity.ortho`;
3. prepare file `species.config` to set the expected subgenome numbers for the targeted species (TAB seperated):
```
Vitis_vinifera 1
Expand Down
13 changes: 8 additions & 5 deletions soi/OrthoFinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,15 +1142,17 @@ def get_homologs(self, sps=None, sp1=None, sp2=None, byName=True, **kargs):
Genes_1 = list(map(gene_format_o, Genes_1))
Genes_2 = list(map(gene_format_o, Genes_2))
for (sp1, g1), (sp2, g2) in itertools.product(Genes_1, Genes_2):
assert sp1 != sp2
if sp1 is not None and sp2 is not None:
assert sp1 != sp2
if (g2, g1) in ortho_pairs:
continue
ortho_pairs.add( (g1, g2) ) #orthologs
for Genes in [Genes_1, Genes_2]:
if len(Genes) < 2:
continue
for (sp1, g1), (sp2, g2) in itertools.combinations(Genes, 2):
assert sp1 == sp2
if sp1 is not None and sp2 is not None:
assert sp1 == sp2
if sps is not None and sp1 not in sps:
continue
if (g2, g1) in ortho_pairs:
Expand Down Expand Up @@ -1840,7 +1842,8 @@ def get_oglogs(OFdir, outHomo):
line = [g1, g2]
print('\t'.join(line), file=outHomo)
def gene_format_o(gene):
sp, g = gene.split('|')
try: sp, g = gene.split('|', 1)
except ValueError: sp, g = None, gene
return (sp, gene)
def get_paralogs(OFdir, outHomo, min_support=0.5):
result = OrthoFinder(OFdir)
Expand All @@ -1867,13 +1870,13 @@ def get_paralogs(OFdir, outHomo, min_support=0.5):
line = [g1, g2, info]
print('\t'.join(line), file=outHomo)
def gene_format_p(gene):
sp, g = gene.split('|')
sp, g = gene.split('|', 1)
sp = sp[:int(len(sp)/2)]
g = sp + '|' + g
return (sp, g)
def gene_format_common(gene):
if not '|' in gene:
sp, g = gene, gene
sp, g = None, gene
return (sp, g)
sp, g = gene.split('|')
sp1 = sp[:len(sp)/2]
Expand Down
7 changes: 6 additions & 1 deletion soi/WGDI.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def plot_dotplot(self, ax=None, d_offset={}, gff=None, align='center', xy=1, ax
has_lab = False
texts = []
for sgement in self.lazy_lines(gene_axis, gff=gff):
# print(sgement, d_offset)
if sgement.chrom not in d_offset:
continue
offset = d_offset[sgement.chrom] + sgement.start
lab = sgement.label if label else None
# print(xy, len(sgement), width, offset, sgement.color, align, lab, gene_axis, self.is_order(), sgement.start, sgement.end)
Expand Down Expand Up @@ -205,9 +208,11 @@ def __init__(self, line):
except IndexError: self.label = None
def __len__(self):
return self.end - self.start + 1
@property
@lazyproperty
def id(self):
return '{}:{}-{}|{}'.format(self.chrom, self.start, self.end, self.subgenome)
def __repr__(self):
return self.id
def write(self, fout):
print('\t'.join(map(str, (self.chrom, self.start, self.end, self.color, self.subgenome))), file=fout)
def stats_besthits(alignment, pol_indice, dip_indice, labels, blasts):
Expand Down
2 changes: 1 addition & 1 deletion soi/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '1.1'
version = '1.1.2'
13 changes: 7 additions & 6 deletions soi/dot_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def dotplot_args(parser):
group_ks.add_argument('--lower-ks', metavar='Ks', type=float, default=None, help="lower limit of median Ks. [default=%(default)s]")
group_ks.add_argument('--upper-ks', metavar='Ks', type=float, default=None, help="upper limit of median Ks. [default=%(default)s]")
group_ks.add_argument('--output-hist', action='store_true', default=False, help="output the data for histogram plot. [default=%(default)s]")
group_ks.add_argument('--cbar', action='store_true', default=False, help="plot color bar when no histogram plot. [default=%(default)s]")
# group_ks.add_argument('--clip-ks', action='store_true', default=None, help="clip ks > max-ks. [default=%(default)s]")
# group_ks.add_argument('--hist-ylim', type=float, default=None, help="max y axis of Ks histgram. [default=%(default)s]")
# group_ks.add_argument('--yn00', action='store_true', default=False, help='turn to YN00[default=%(default)s]')
Expand Down Expand Up @@ -178,7 +179,7 @@ def main(args):
plot_blocks(blocks, outplots, ks = None, max_ks=None, ks_hist=None,
xlabels=chrs1, ylabels=chrs2,
xpositions=xpositions, ypositions=ypositions,
xlines=lines1, ylines=lines2,
xelines=lines1, yelines=lines2,
xlim=max(lines1), ylim=max(lines2))
ploidy_data = coord_path1, coord_path2, coord_graph1, coord_graph2 = parse_gff(gff, chrs1, chrs2)
outplots = [prefix + '.' + fmt for fmt in args.format]
Expand All @@ -190,7 +191,7 @@ def main(args):
xlabels=chrs1, ylabels=chrs2, same_sp=same_sp,
#hist_ylim=args.hist_ylim,
xpositions=xpositions, ypositions=ypositions,
xelines=lines1, yelines=lines2,
xelines=lines1, yelines=lines2, # chromosome ends
xclines=xclines, yclines=yclines, # such as centromere
xlim=max(lines1), ylim=max(lines2),
xoffset=d_offset1, yoffset=d_offset2, gff=gff,
Expand All @@ -207,7 +208,7 @@ def plot_blocks(blocks, outplots, ks=None, max_ks=None, ks_hist=False, ks_cmap=N
xlabels=None, ylabels=None, xpositions=None, ypositions=None, xelines=None, yelines=None, xlim=None, ylim=None,
figsize=18, fontsize=10, point_size=0.8, xclines=None, yclines=None, plot_bin=None, output_hist=False,
xoffset=None, yoffset=None, xbars=None, ybars=None, gff=None, gene_axis=None, xbarlab=True, ybarlab=True,
hist_ylim=None, xlabel=None, ylabel=None, remove_prefix=True, number_plots=True, same_sp=False,
hist_ylim=None, xlabel=None, ylabel=None, remove_prefix=True, number_plots=True, same_sp=False, cbar=False,
ploidy=False, ploidy_data=None, ortholog_graph=None, of_color=False, homology=False, **kargs
):
import matplotlib
Expand Down Expand Up @@ -366,11 +367,11 @@ def plot_blocks(blocks, outplots, ks=None, max_ks=None, ks_hist=False, ks_cmap=N
plot_label(ax, label, fontsize=lsize)

tlabel = 'OrthoIndex' if of_color else 'Ks'
if not ks is None and ks_hist is None and False: # color map only
if not ks is None and ks_hist is None and cbar: # color map only
ax = plt.subplot2grid((21,20),(20,0), colspan=5)
plt.axis('off')
cbar = plt.colorbar(ax=ax, orientation='horizontal', location='bottom', label=label, shrink=1, fraction=0.5)
# cbar.ax.set_xlabel('Ks', fontsize=14)
cbar = plt.colorbar(ax=ax, orientation='horizontal', location='bottom', label=tlabel, shrink=1, fraction=0.5)
#cbar.ax.set_xlabel(tlabel, fontsize=14)


# ax2
Expand Down
16 changes: 12 additions & 4 deletions soi/mcscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, info):
self.coord = (chr, start, end)
try: self.species, self.raw_gene = id.split('|', 1)
except ValueError:
logger.warn('Species id not found in `{}`, ignoring'.fomart(id))
# logger.warn('Species id not found in `{}`, ignoring'.format(id))
self.species, self.raw_gene = None, id
def __str__(self):
return self.id
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def _parse(self):
self.Gene = g
self.id = gene
try: self.species, self.raw_gene = gene.split('|', 1)
except ValueError: pass
except ValueError: self.species, self.raw_gene = None, gene
def __hash__(self):
return hash(self.id)
def __str__(self):
Expand Down Expand Up @@ -2942,17 +2942,25 @@ def test():
def list_blocks(collinearity, outTsv, gff=None, kaks=None):
'''以共线性块为单位,输出信息'''
line = ["Alignment", "species1", "species2", "chr1", "chr2", "start1", "end1", "length1", "start2", "end2", "length2", "strand", "N_gene", "mean_Ks", 'median_Ks', 'score', 'e_value']
line = ["id", "species1", "species2", "chr1", "chr2", "start1", "end1", "length1", "start2", "end2", "length2", "strand", "length", "ks_average", 'ks_median', 'score', 'e_value']
line = ["id", "species1", "species2", "chr1", "chr2", "strand",
"start1", "end1", "istart1", "iend1",
"start2", "end2", "istart2", "iend2",
"length1", "length2", "N_gene", "ks_average", 'ks_median', 'score', 'e_value']
print('\t'.join(line), file=outTsv)
for rc in Collinearity(collinearity,gff=gff,kaks=kaks):
sp1, sp2 = rc.species
chr1, chr2 = rc.chrs
Alignment, score, e_value, N, strand = rc.Alignment, rc.score, rc.e_value, rc.N, rc.strand
start1, end1, length1 = rc.start1, rc.end1, rc.length1
start2, end2, length2 = rc.start2, rc.end2, rc.length2
istart1, iend1 = rc.istart1, rc.iend1
istart2, iend2 = rc.istart2, rc.iend2
mean_ks = rc.mean_ks
median_ks = rc.median_ks
line = [Alignment, sp1, sp2, chr1, chr2, start1, end1, length1, start2, end2, length2, strand, N, mean_ks, median_ks, score, e_value]
line = [Alignment, sp1, sp2, chr1, chr2, strand,
start1, end1, istart1, iend1,
start2, end2, istart2, iend2,
length1, length2, N, mean_ks, median_ks, score, e_value]
line = list(map(str, line))
print('\t'.join(line), file=outTsv)
def gene_class(collinearity, inTsv, outTsv, byAlignment=True):
Expand Down

0 comments on commit b7161dd

Please sign in to comment.