Skip to content

CI

CI #1092

Workflow file for this run

name: CI
on:
pull_request:
branches:
- '*'
paths:
# if any of this files or directory changed, trigger the CI
# The only case where it is not triggerd is when docs/ is modified
- 'tests/**'
- 'testingDataset/**'
- '.github/**'
- 'ppanggolin/**'
- 'MANIFEST.in'
- 'VERSION'
- 'ppanggolin_env.yaml'
- 'pyproject.toml'
- 'setup.py'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
env:
NUM_CPUS: 1
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
test:
name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }}
# The type of runner that the job will run on
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['macos-14']
python-version: ['3.10', '3.12']
steps:
# Get number of cpu available on the current runner
- name: Get core number on linux
if: matrix.os == 'ubuntu-latest'
run: |
nb_cpu_linux=`nproc`
echo "Number of cores avalaible on the current linux runner $nb_cpu_linux"
echo "NUM_CPUS=$nb_cpu_linux" >> "$GITHUB_ENV"
- name: Get core number on macos
if: matrix.os == 'macos-14'
run: |
nb_cpu_macos=`sysctl -n hw.ncpu`
echo "Number of cores avalaible on the current macos runner $nb_cpu_macos"
echo "NUM_CPUS=$nb_cpu_macos" >> "$GITHUB_ENV"
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v4
# # Install requirements with miniconda
# - uses: conda-incubator/setup-miniconda@v3
# with:
# python-version: ${{ matrix.python-version }}
# channels: conda-forge,bioconda,defaults
# environment-file: ppanggolin_env.yaml
# activate-environment: ppanggolin
- name: Set up Python environment
run: |
python${{ matrix.python-version }} -m venv ppgg
source ppgg/bin/activate
- name: Install dependencies with Homebrew
run: |
brew update
brew upgrade
brew install mmseqs2
brew install mafft
brew install python-setuptools
# brew install infernal
# brew install aragorn
- name: Install ppanggolin
run: |
# Activate the Python environment again in the correct directory
source ppgg/bin/activate
python -V
python -m pip install pytest
python -m pip install .[python_deps]
# - name: Install ppanggolin
# shell: bash -l {0}
# run: |
# pip install .[test]
# mmseqs version
# Check that it is installed and displays help without error
- name: Check that PPanGGOLiN is installed
shell: bash -l {0}
run: |
source ppgg/bin/activate
ppanggolin --version
ppanggolin --help
# Check that unit tests are all passing
- name: Unit tests
shell: bash -l {0}
run: |
source ppgg/bin/activate
pytest
- name: gbff parsing and MSA computing
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
mkdir info_to_test
ppanggolin workflow --cpu $NUM_CPUS --anno genomes.gbff.list --output myannopang
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu $NUM_CPUS
ppanggolin info --pangenome myannopang/pangenome.h5 > info_to_test/myannopang_info.yaml
cat info_to_test/myannopang_info.yaml
echo "$(grep 'myannopang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) myannopang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 myannopang/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: clusters reading from external file
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS
ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS
awk 'BEGIN{FS=OFS="\t"} {$1 = $1 OFS $1} 1' clusters.tsv > clusters_with_reprez.tsv;
ppanggolin cluster --clusters clusters_with_reprez.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS
echo "$(grep 'readclusterpang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) readclusterpang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 readclusterpang/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: testing context command
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu $NUM_CPUS
# test from gene family ids. Test here with one family of module 1. The context should find all families of module 1
echo AP288_RS05055 > one_family_of_module_1.txt
ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu $NUM_CPUS
cd -
- name: testing config file
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml
cut -f1,2 clusters.tsv > clusters_without_frag.tsv
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters_without_frag.tsv -o test_config --config panrgp_default_config.yaml --cpu $NUM_CPUS
echo "$(grep 'test_config/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) test_config/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 test_config/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: testing projection cmd
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
head genomes.fasta.list | sed 's/^/input_genome_/g' > genomes.fasta.head.list
# ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_list_of_fasta --fasta genomes.fasta.head.list --gff --proksee --cpu $NUM_CPUS
# projection of a plasmid with chevron that have been added manually to test chevron handeling in GFF
ppanggolin projection --pangenome myannopang/pangenome.h5 --anno GBFF/plasmid_NZ_CP007132_with_manually_added_chevrons.gff.gz --cpu $NUM_CPUS -o projection_plasmid_with_chevron
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.gff.gz >> genomes.gbff.h3_and_GFFplasmidNoSeq.list
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.fna.gz >> genomes.fna.GFFplasmidNoSeq.list
ppanggolin projection -p myannopang/pangenome.h5 --anno genomes.gbff.h3_and_GFFplasmidNoSeq.list --fasta genomes.fna.GFFplasmidNoSeq.list
- name: Archive diff files
uses: actions/upload-artifact@v4
with:
name: comparison-results_${{ matrix.os }}_python${{ matrix.python-version }}
path: testingDataset/info_to_test/*