diff --git a/SOI-tools.md b/SOI-tools.md index fa37f24..3312ba8 100644 --- a/SOI-tools.md +++ b/SOI-tools.md @@ -3,7 +3,7 @@ Before run the pipeline: 1. install the lasest verion of [SOI](https://github.com/zhangrengang/orthoindex#installation) by `python3 setup.py install`, and have a check: `which soi-syn`; -2. completed the [example pipeline](https://github.com/zhangrengang/evolution_example) to get the orthologous synteny file `collinearity.ortho`; +2. complete the [example pipeline](https://github.com/zhangrengang/evolution_example) to get the orthologous synteny file `collinearity.ortho`; 3. prepare file `species.config` to set the expected subgenome numbers for the targeted species (TAB seperated): ``` Vitis_vinifera 1 diff --git a/soi/OrthoFinder.py b/soi/OrthoFinder.py index 530cba7..624784d 100644 --- a/soi/OrthoFinder.py +++ b/soi/OrthoFinder.py @@ -1142,7 +1142,8 @@ def get_homologs(self, sps=None, sp1=None, sp2=None, byName=True, **kargs): Genes_1 = list(map(gene_format_o, Genes_1)) Genes_2 = list(map(gene_format_o, Genes_2)) for (sp1, g1), (sp2, g2) in itertools.product(Genes_1, Genes_2): - assert sp1 != sp2 + if sp1 is not None and sp2 is not None: + assert sp1 != sp2 if (g2, g1) in ortho_pairs: continue ortho_pairs.add( (g1, g2) ) #orthologs @@ -1150,7 +1151,8 @@ def get_homologs(self, sps=None, sp1=None, sp2=None, byName=True, **kargs): if len(Genes) < 2: continue for (sp1, g1), (sp2, g2) in itertools.combinations(Genes, 2): - assert sp1 == sp2 + if sp1 is not None and sp2 is not None: + assert sp1 == sp2 if sps is not None and sp1 not in sps: continue if (g2, g1) in ortho_pairs: @@ -1840,7 +1842,8 @@ def get_oglogs(OFdir, outHomo): line = [g1, g2] print('\t'.join(line), file=outHomo) def gene_format_o(gene): - sp, g = gene.split('|') + try: sp, g = gene.split('|', 1) + except ValueError: sp, g = None, gene return (sp, gene) def get_paralogs(OFdir, outHomo, min_support=0.5): result = OrthoFinder(OFdir) @@ -1867,13 +1870,13 @@ def get_paralogs(OFdir, outHomo, min_support=0.5): line = [g1, g2, info] print('\t'.join(line), file=outHomo) def gene_format_p(gene): - sp, g = gene.split('|') + sp, g = gene.split('|', 1) sp = sp[:int(len(sp)/2)] g = sp + '|' + g return (sp, g) def gene_format_common(gene): if not '|' in gene: - sp, g = gene, gene + sp, g = None, gene return (sp, g) sp, g = gene.split('|') sp1 = sp[:len(sp)/2] diff --git a/soi/WGDI.py b/soi/WGDI.py index 1c238c8..9069749 100644 --- a/soi/WGDI.py +++ b/soi/WGDI.py @@ -137,6 +137,9 @@ def plot_dotplot(self, ax=None, d_offset={}, gff=None, align='center', xy=1, ax has_lab = False texts = [] for sgement in self.lazy_lines(gene_axis, gff=gff): + # print(sgement, d_offset) + if sgement.chrom not in d_offset: + continue offset = d_offset[sgement.chrom] + sgement.start lab = sgement.label if label else None # print(xy, len(sgement), width, offset, sgement.color, align, lab, gene_axis, self.is_order(), sgement.start, sgement.end) @@ -205,9 +208,11 @@ def __init__(self, line): except IndexError: self.label = None def __len__(self): return self.end - self.start + 1 - @property + @lazyproperty def id(self): return '{}:{}-{}|{}'.format(self.chrom, self.start, self.end, self.subgenome) + def __repr__(self): + return self.id def write(self, fout): print('\t'.join(map(str, (self.chrom, self.start, self.end, self.color, self.subgenome))), file=fout) def stats_besthits(alignment, pol_indice, dip_indice, labels, blasts): diff --git a/soi/__version__.py b/soi/__version__.py index e6956dc..d6588af 100644 --- a/soi/__version__.py +++ b/soi/__version__.py @@ -1 +1 @@ -version = '1.1' +version = '1.1.2' diff --git a/soi/dot_plotter.py b/soi/dot_plotter.py index adbf6ef..95696a9 100755 --- a/soi/dot_plotter.py +++ b/soi/dot_plotter.py @@ -75,6 +75,7 @@ def dotplot_args(parser): group_ks.add_argument('--lower-ks', metavar='Ks', type=float, default=None, help="lower limit of median Ks. [default=%(default)s]") group_ks.add_argument('--upper-ks', metavar='Ks', type=float, default=None, help="upper limit of median Ks. [default=%(default)s]") group_ks.add_argument('--output-hist', action='store_true', default=False, help="output the data for histogram plot. [default=%(default)s]") + group_ks.add_argument('--cbar', action='store_true', default=False, help="plot color bar when no histogram plot. [default=%(default)s]") # group_ks.add_argument('--clip-ks', action='store_true', default=None, help="clip ks > max-ks. [default=%(default)s]") # group_ks.add_argument('--hist-ylim', type=float, default=None, help="max y axis of Ks histgram. [default=%(default)s]") # group_ks.add_argument('--yn00', action='store_true', default=False, help='turn to YN00[default=%(default)s]') @@ -178,7 +179,7 @@ def main(args): plot_blocks(blocks, outplots, ks = None, max_ks=None, ks_hist=None, xlabels=chrs1, ylabels=chrs2, xpositions=xpositions, ypositions=ypositions, - xlines=lines1, ylines=lines2, + xelines=lines1, yelines=lines2, xlim=max(lines1), ylim=max(lines2)) ploidy_data = coord_path1, coord_path2, coord_graph1, coord_graph2 = parse_gff(gff, chrs1, chrs2) outplots = [prefix + '.' + fmt for fmt in args.format] @@ -190,7 +191,7 @@ def main(args): xlabels=chrs1, ylabels=chrs2, same_sp=same_sp, #hist_ylim=args.hist_ylim, xpositions=xpositions, ypositions=ypositions, - xelines=lines1, yelines=lines2, + xelines=lines1, yelines=lines2, # chromosome ends xclines=xclines, yclines=yclines, # such as centromere xlim=max(lines1), ylim=max(lines2), xoffset=d_offset1, yoffset=d_offset2, gff=gff, @@ -207,7 +208,7 @@ def plot_blocks(blocks, outplots, ks=None, max_ks=None, ks_hist=False, ks_cmap=N xlabels=None, ylabels=None, xpositions=None, ypositions=None, xelines=None, yelines=None, xlim=None, ylim=None, figsize=18, fontsize=10, point_size=0.8, xclines=None, yclines=None, plot_bin=None, output_hist=False, xoffset=None, yoffset=None, xbars=None, ybars=None, gff=None, gene_axis=None, xbarlab=True, ybarlab=True, - hist_ylim=None, xlabel=None, ylabel=None, remove_prefix=True, number_plots=True, same_sp=False, + hist_ylim=None, xlabel=None, ylabel=None, remove_prefix=True, number_plots=True, same_sp=False, cbar=False, ploidy=False, ploidy_data=None, ortholog_graph=None, of_color=False, homology=False, **kargs ): import matplotlib @@ -366,11 +367,11 @@ def plot_blocks(blocks, outplots, ks=None, max_ks=None, ks_hist=False, ks_cmap=N plot_label(ax, label, fontsize=lsize) tlabel = 'OrthoIndex' if of_color else 'Ks' - if not ks is None and ks_hist is None and False: # color map only + if not ks is None and ks_hist is None and cbar: # color map only ax = plt.subplot2grid((21,20),(20,0), colspan=5) plt.axis('off') - cbar = plt.colorbar(ax=ax, orientation='horizontal', location='bottom', label=label, shrink=1, fraction=0.5) - # cbar.ax.set_xlabel('Ks', fontsize=14) + cbar = plt.colorbar(ax=ax, orientation='horizontal', location='bottom', label=tlabel, shrink=1, fraction=0.5) + #cbar.ax.set_xlabel(tlabel, fontsize=14) # ax2 diff --git a/soi/mcscan.py b/soi/mcscan.py index e251684..bcc9c6a 100644 --- a/soi/mcscan.py +++ b/soi/mcscan.py @@ -35,7 +35,7 @@ def __init__(self, info): self.coord = (chr, start, end) try: self.species, self.raw_gene = id.split('|', 1) except ValueError: - logger.warn('Species id not found in `{}`, ignoring'.fomart(id)) +# logger.warn('Species id not found in `{}`, ignoring'.format(id)) self.species, self.raw_gene = None, id def __str__(self): return self.id @@ -1091,7 +1091,7 @@ def _parse(self): self.Gene = g self.id = gene try: self.species, self.raw_gene = gene.split('|', 1) - except ValueError: pass + except ValueError: self.species, self.raw_gene = None, gene def __hash__(self): return hash(self.id) def __str__(self): @@ -2942,7 +2942,10 @@ def test(): def list_blocks(collinearity, outTsv, gff=None, kaks=None): '''以共线性块为单位,输出信息''' line = ["Alignment", "species1", "species2", "chr1", "chr2", "start1", "end1", "length1", "start2", "end2", "length2", "strand", "N_gene", "mean_Ks", 'median_Ks', 'score', 'e_value'] - line = ["id", "species1", "species2", "chr1", "chr2", "start1", "end1", "length1", "start2", "end2", "length2", "strand", "length", "ks_average", 'ks_median', 'score', 'e_value'] + line = ["id", "species1", "species2", "chr1", "chr2", "strand", + "start1", "end1", "istart1", "iend1", + "start2", "end2", "istart2", "iend2", + "length1", "length2", "N_gene", "ks_average", 'ks_median', 'score', 'e_value'] print('\t'.join(line), file=outTsv) for rc in Collinearity(collinearity,gff=gff,kaks=kaks): sp1, sp2 = rc.species @@ -2950,9 +2953,14 @@ def list_blocks(collinearity, outTsv, gff=None, kaks=None): Alignment, score, e_value, N, strand = rc.Alignment, rc.score, rc.e_value, rc.N, rc.strand start1, end1, length1 = rc.start1, rc.end1, rc.length1 start2, end2, length2 = rc.start2, rc.end2, rc.length2 + istart1, iend1 = rc.istart1, rc.iend1 + istart2, iend2 = rc.istart2, rc.iend2 mean_ks = rc.mean_ks median_ks = rc.median_ks - line = [Alignment, sp1, sp2, chr1, chr2, start1, end1, length1, start2, end2, length2, strand, N, mean_ks, median_ks, score, e_value] + line = [Alignment, sp1, sp2, chr1, chr2, strand, + start1, end1, istart1, iend1, + start2, end2, istart2, iend2, + length1, length2, N, mean_ks, median_ks, score, e_value] line = list(map(str, line)) print('\t'.join(line), file=outTsv) def gene_class(collinearity, inTsv, outTsv, byAlignment=True):