From 9690d1ae0e9ea7c9a63336b459f3c931381f7e68 Mon Sep 17 00:00:00 2001 From: zhangrengang Date: Wed, 25 Sep 2024 21:17:15 +0800 Subject: [PATCH] add SOI-tools --- SOI-tools.md | 25 +++++++++++++++++++++++++ setup.py | 1 + soi/RunCmdsMP.py | 6 ++++-- soi/mcscan.py | 2 +- soi/options.py | 3 ++- 5 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 SOI-tools.md diff --git a/SOI-tools.md b/SOI-tools.md new file mode 100644 index 0000000..c2f9851 --- /dev/null +++ b/SOI-tools.md @@ -0,0 +1,25 @@ + +#### Macro-synteny phylogeny #### +Before run the pipeline: +1. install the lasest verion of [SOI](https://github.com/zhangrengang/orthoindex#installation) (not the release version); +2. completed the [example pipeline] (https://github.com/zhangrengang/evolution_example) to get the orthologous synteny file `collinearity.ortho`; +3. prepare file `species.config` to set the expected subgenome numbers for targeted species (TAB seperated): +``` +Vitis_vinifera 1 +Aralia_elata 2 +Centella_asiatica 2 +``` +Then run the pipeline: +``` +cd phylogenomics +soi-syn anchor_trees collinearity.ortho species.config ../pep.faa ../all_species_gene.gff output_dir +``` +After the pipeline completed, you can find tree files in the `output_dir`: +``` +OG*treefile # gene tree file for each anchor gene +chr*treefile # macro-synteny tree file by concatenating the anchor genes from the same chromosome set +CHR*treefile # macro-synteny tree file, but allowing gene missing +``` +For example, `CHR_Ae11-Ae15-Ca2-Ca7-Vv1_143_283.concat.treefile`, `Ae11-Ae15-Ca2-Ca7-Vv1` is the chromosome set, +`143` is the number of anchor genes, and `283` is the number of all syntenic genes allowing missing. + diff --git a/setup.py b/setup.py index 120b7a9..50217b4 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ scripts=[], entry_points={ 'console_scripts': ['soi = soi.options:main', + 'soi-syn = soi.mcscan:main', ], }, ) diff --git a/soi/RunCmdsMP.py b/soi/RunCmdsMP.py index f833dd5..e693484 100644 --- a/soi/RunCmdsMP.py +++ b/soi/RunCmdsMP.py @@ -26,9 +26,11 @@ from tempfile import NamedTemporaryFile except (RuntimeError,ImportError,AttributeError,OSError) as e: if "DRMAA_LIBRARY_PATH" in format(e): - logger.warning('Grid computing is not available because DRMAA not configured properly: {}'.format(e)) +# logger.warning('Grid computing is not available because DRMAA not configured properly: {}'.format(e)) + pass else: - logger.warning('Grid computing is not available because DRMAA not installed: {}'.format(e)) +# logger.warning('Grid computing is not available because DRMAA not installed: {}'.format(e)) + pass logger.info('No DRMAA (see https://github.com/pygridtools/drmaa-python), Switching to local mode.') GRID = False diff --git a/soi/mcscan.py b/soi/mcscan.py index bc00509..b3711ac 100644 --- a/soi/mcscan.py +++ b/soi/mcscan.py @@ -2226,7 +2226,7 @@ def chrom_trees(self, min_genes=2): continue xxchroms += [chroms] i += 1 - prefix = '-'.join(chroms) + '_' + str(len(alnfiles)) + prefix = 'chr_' + '-'.join(chroms) + '_' + str(len(alnfiles)) cmds = self.concat_tree(alnfiles, prefix, idmap=self.d_chroms, astral=True) # d_gene_count3[concat_alnfile] = len(alnfiles) diff --git a/soi/options.py b/soi/options.py index 2f350db..a9be961 100644 --- a/soi/options.py +++ b/soi/options.py @@ -216,7 +216,8 @@ def main(): key = sys.argv[1] func = FUNC[key] func(**args.__dict__) - + print() + if __name__ == '__main__': main()