CI #1091

Workflow file for this run

	name: CI

	on:
	pull_request:
	branches:
	- '*'
	paths:
	# if any of this files or directory changed, trigger the CI
	# The only case where it is not triggerd is when docs/ is modified
	- 'tests/**'
	- 'testingDataset/**'
	- '.github/**'
	- 'ppanggolin/**'
	- 'MANIFEST.in'
	- 'VERSION'
	- 'ppanggolin_env.yaml'
	- 'pyproject.toml'
	- 'setup.py'
	# Allows you to run this workflow manually from the Actions tab
	workflow_dispatch:

	env:
	NUM_CPUS: 1

	# A workflow run is made up of one or more jobs that can run sequentially or in parallel
	jobs:
	test:
	name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }}
	# The type of runner that the job will run on
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	os: ['macos-14']
	python-version: ['3.10'] # , '3.12']

	steps:

	# Get number of cpu available on the current runner
	- name: Get core number on linux
	if: matrix.os == 'ubuntu-latest'
	run: \|
	nb_cpu_linux=`nproc`
	echo "Number of cores avalaible on the current linux runner $nb_cpu_linux"
	echo "NUM_CPUS=$nb_cpu_linux" >> "$GITHUB_ENV"

	- name: Get core number on macos
	if: matrix.os == 'macos-14'
	run: \|
	nb_cpu_macos=`sysctl -n hw.ncpu`
	echo "Number of cores avalaible on the current macos runner $nb_cpu_macos"
	echo "NUM_CPUS=$nb_cpu_macos" >> "$GITHUB_ENV"

	# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
	- uses: actions/checkout@v4
	# # Install requirements with miniconda
	# - uses: conda-incubator/setup-miniconda@v3
	# with:
	# python-version: ${{ matrix.python-version }}
	# channels: conda-forge,bioconda,defaults
	# environment-file: ppanggolin_env.yaml
	# activate-environment: ppanggolin
	- name: Set up Python environment
	run: \|
	python${{ matrix.python-version }} -m venv ppgg
	source ppgg/bin/activate

	- name: Install dependencies with Homebrew
	run: \|
	brew update
	brew upgrade
	brew install mmseqs2
	brew install mafft
	brew install python-setuptools
	# brew install infernal
	# brew install aragorn

	- name: Install ppanggolin
	run: \|
	# Activate the Python environment again in the correct directory
	source ppgg/bin/activate
	python -V
	python -m pip install pytest
	python -m pip install .[python_deps]

	# - name: Install ppanggolin
	# shell: bash -l {0}
	# run: \|
	# pip install .[test]
	# mmseqs version

	# Check that it is installed and displays help without error
	- name: Check that PPanGGOLiN is installed
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	ppanggolin --version
	ppanggolin --help

	# Check that unit tests are all passing
	- name: Unit tests
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	pytest

	- name: gbff parsing and MSA computing
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	cd testingDataset
	mkdir info_to_test
	ppanggolin workflow --cpu $NUM_CPUS --anno genomes.gbff.list --output myannopang
	ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu $NUM_CPUS
	ppanggolin info --pangenome myannopang/pangenome.h5 > info_to_test/myannopang_info.yaml
	cat info_to_test/myannopang_info.yaml
	echo "$(grep 'myannopang/gene_families.tsv' expected_info_files/checksum.txt \| cut -d' ' -f1) myannopang/gene_families.tsv" \| shasum -a 256 -c - \|\| { echo 'Checksum verification failed.' >&2; exit 1; }
	shasum -a 256 myannopang/gene_families.tsv >> info_to_test/checksum.txt
	cd -
	- name: clusters reading from external file
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	cd testingDataset
	ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS
	ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS
	awk 'BEGIN{FS=OFS="\t"} {$1 = $1 OFS $1} 1' clusters.tsv > clusters_with_reprez.tsv;
	ppanggolin cluster --clusters clusters_with_reprez.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS
	ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS
	echo "$(grep 'readclusterpang/gene_families.tsv' expected_info_files/checksum.txt \| cut -d' ' -f1) readclusterpang/gene_families.tsv" \| shasum -a 256 -c - \|\| { echo 'Checksum verification failed.' >&2; exit 1; }
	shasum -a 256 readclusterpang/gene_families.tsv >> info_to_test/checksum.txt
	cd -
	- name: testing context command
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	cd testingDataset
	ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu $NUM_CPUS

	# test from gene family ids. Test here with one family of module 1. The context should find all families of module 1
	echo AP288_RS05055 > one_family_of_module_1.txt
	ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu $NUM_CPUS
	cd -
	- name: testing config file
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	cd testingDataset
	ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml
	cut -f1,2 clusters.tsv > clusters_without_frag.tsv
	ppanggolin panrgp --anno genomes.gbff.list --cluster clusters_without_frag.tsv -o test_config --config panrgp_default_config.yaml --cpu $NUM_CPUS
	echo "$(grep 'test_config/gene_families.tsv' expected_info_files/checksum.txt \| cut -d' ' -f1) test_config/gene_families.tsv" \| shasum -a 256 -c - \|\| { echo 'Checksum verification failed.' >&2; exit 1; }
	shasum -a 256 test_config/gene_families.tsv >> info_to_test/checksum.txt
	cd -
	- name: testing projection cmd
	shell: bash -l {0}
	run: \|
	source ppgg/bin/activate
	cd testingDataset
	head genomes.fasta.list \| sed 's/^/input_genome_/g' > genomes.fasta.head.list
	# ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_list_of_fasta --fasta genomes.fasta.head.list --gff --proksee --cpu $NUM_CPUS

	# projection of a plasmid with chevron that have been added manually to test chevron handeling in GFF
	ppanggolin projection --pangenome myannopang/pangenome.h5 --anno GBFF/plasmid_NZ_CP007132_with_manually_added_chevrons.gff.gz --cpu $NUM_CPUS -o projection_plasmid_with_chevron

	echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.gff.gz >> genomes.gbff.h3_and_GFFplasmidNoSeq.list
	echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.fna.gz >> genomes.fna.GFFplasmidNoSeq.list
	ppanggolin projection -p myannopang/pangenome.h5 --anno genomes.gbff.h3_and_GFFplasmidNoSeq.list --fasta genomes.fna.GFFplasmidNoSeq.list

	- name: Archive diff files
	uses: actions/upload-artifact@v4
	with:
	name: comparison-results_${{ matrix.os }}_python${{ matrix.python-version }}
	path: testingDataset/info_to_test/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CI #1091

Workflow file

CI #1091

Jobs

Run details

Workflow file for this run