CI #1086
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
pull_request: | |
branches: | |
- '*' | |
paths: | |
# if any of this files or directory changed, trigger the CI | |
# The only case where it is not triggerd is when docs/ is modified | |
- 'tests/**' | |
- 'testingDataset/**' | |
- '.github/**' | |
- 'ppanggolin/**' | |
- 'MANIFEST.in' | |
- 'VERSION' | |
- 'ppanggolin_env.yaml' | |
- 'pyproject.toml' | |
- 'setup.py' | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
env: | |
NUM_CPUS: 1 | |
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | |
jobs: | |
test: | |
name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }} | |
# The type of runner that the job will run on | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: ['macos-14'] | |
python-version: ['3.10'] # , '3.12'] | |
steps: | |
# Get number of cpu available on the current runner | |
- name: Get core number on linux | |
if: matrix.os == 'ubuntu-latest' | |
run: | | |
nb_cpu_linux=`nproc` | |
echo "Number of cores avalaible on the current linux runner $nb_cpu_linux" | |
echo "NUM_CPUS=$nb_cpu_linux" >> "$GITHUB_ENV" | |
- name: Get core number on macos | |
if: matrix.os == 'macos-14' | |
run: | | |
nb_cpu_macos=`sysctl -n hw.ncpu` | |
echo "Number of cores avalaible on the current macos runner $nb_cpu_macos" | |
echo "NUM_CPUS=$nb_cpu_macos" >> "$GITHUB_ENV" | |
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it | |
- uses: actions/checkout@v4 | |
# # Install requirements with miniconda | |
# - uses: conda-incubator/setup-miniconda@v3 | |
# with: | |
# python-version: ${{ matrix.python-version }} | |
# channels: conda-forge,bioconda,defaults | |
# environment-file: ppanggolin_env.yaml | |
# activate-environment: ppanggolin | |
- name: Set up Python environment | |
run: | | |
python3 -m venv ppgg | |
source ppgg/bin/activate | |
- name: Install dependencies with Homebrew | |
run: | | |
brew update | |
brew upgrade | |
brew install mmseqs2 | |
brew install mafft | |
brew install python-setuptools | |
# brew install infernal | |
# brew install aragorn | |
- name: Install ppanggolin | |
run: | | |
# Activate the Python environment again in the correct directory | |
source ppgg/bin/activate | |
pip install setuptools | |
pip install .[python_deps] | |
# - name: Install ppanggolin | |
# shell: bash -l {0} | |
# run: | | |
# pip install .[test] | |
# mmseqs version | |
# Check that it is installed and displays help without error | |
- name: Check that PPanGGOLiN is installed | |
shell: bash -l {0} | |
run: | | |
source ppgg/bin/activate | |
ppanggolin --version | |
ppanggolin --help | |
# Check that unit tests are all passing | |
- name: Unit tests | |
shell: bash -l {0} | |
run: pytest | |
- name: gbff parsing and MSA computing | |
shell: bash -l {0} | |
run: | | |
source ppgg/bin/activate | |
cd testingDataset | |
mkdir info_to_test | |
ppanggolin workflow --cpu $NUM_CPUS --anno genomes.gbff.list --output myannopang | |
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu $NUM_CPUS | |
ppanggolin info --pangenome myannopang/pangenome.h5 > info_to_test/myannopang_info.yaml | |
cat info_to_test/myannopang_info.yaml | |
echo "$(grep 'myannopang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) myannopang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; } | |
shasum -a 256 myannopang/gene_families.tsv >> info_to_test/checksum.txt | |
cd - | |
- name: clusters reading from external file | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS | |
ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS | |
awk 'BEGIN{FS=OFS="\t"} {$1 = $1 OFS $1} 1' clusters.tsv > clusters_with_reprez.tsv; | |
ppanggolin cluster --clusters clusters_with_reprez.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS | |
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS | |
echo "$(grep 'readclusterpang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) readclusterpang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; } | |
shasum -a 256 readclusterpang/gene_families.tsv >> info_to_test/checksum.txt | |
cd - | |
- name: testing context command | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu $NUM_CPUS | |
# test from gene family ids. Test here with one family of module 1. The context should find all families of module 1 | |
echo AP288_RS05055 > one_family_of_module_1.txt | |
ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu $NUM_CPUS | |
cd - | |
- name: testing config file | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml | |
cut -f1,2 clusters.tsv > clusters_without_frag.tsv | |
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters_without_frag.tsv -o test_config --config panrgp_default_config.yaml --cpu $NUM_CPUS | |
echo "$(grep 'test_config/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) test_config/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; } | |
shasum -a 256 test_config/gene_families.tsv >> info_to_test/checksum.txt | |
cd - | |
- name: testing projection cmd | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
head genomes.fasta.list | sed 's/^/input_genome_/g' > genomes.fasta.head.list | |
# ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_list_of_fasta --fasta genomes.fasta.head.list --gff --proksee --cpu $NUM_CPUS | |
# projection of a plasmid with chevron that have been added manually to test chevron handeling in GFF | |
ppanggolin projection --pangenome myannopang/pangenome.h5 --anno GBFF/plasmid_NZ_CP007132_with_manually_added_chevrons.gff.gz --cpu $NUM_CPUS -o projection_plasmid_with_chevron | |
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.gff.gz >> genomes.gbff.h3_and_GFFplasmidNoSeq.list | |
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.fna.gz >> genomes.fna.GFFplasmidNoSeq.list | |
ppanggolin projection -p myannopang/pangenome.h5 --anno genomes.gbff.h3_and_GFFplasmidNoSeq.list --fasta genomes.fna.GFFplasmidNoSeq.list | |
- name: Archive diff files | |
uses: actions/upload-artifact@v4 | |
with: | |
name: comparison-results_${{ matrix.os }}_python${{ matrix.python-version }} | |
path: testingDataset/info_to_test/* | |