-
Notifications
You must be signed in to change notification settings - Fork 30
174 lines (156 loc) · 7.43 KB
/
main.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
name: CI
on:
pull_request:
branches:
- '*'
paths:
# if any of this files or directory changed, trigger the CI
# The only case where it is not triggerd is when docs/ is modified
- 'tests/**'
- 'testingDataset/**'
- '.github/**'
- 'ppanggolin/**'
- 'MANIFEST.in'
- 'VERSION'
- 'ppanggolin_env.yaml'
- 'pyproject.toml'
- 'setup.py'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
env:
NUM_CPUS: 1
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
test:
name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }}
# The type of runner that the job will run on
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['macos-14']
python-version: ['3.10']
steps:
# Get number of cpu available on the current runner
- name: Get core number on linux
if: matrix.os == 'ubuntu-latest'
run: |
nb_cpu_linux=`nproc`
echo "Number of cores avalaible on the current linux runner $nb_cpu_linux"
echo "NUM_CPUS=$nb_cpu_linux" >> "$GITHUB_ENV"
- name: Get core number on macos
if: matrix.os == 'macos-14'
run: |
nb_cpu_macos=`sysctl -n hw.ncpu`
echo "Number of cores avalaible on the current macos runner $nb_cpu_macos"
echo "NUM_CPUS=$nb_cpu_macos" >> "$GITHUB_ENV"
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v4
# # Install requirements with miniconda
# - uses: conda-incubator/setup-miniconda@v3
# with:
# python-version: ${{ matrix.python-version }}
# channels: conda-forge,bioconda,defaults
# environment-file: ppanggolin_env.yaml
# activate-environment: ppanggolin
- name: Set up Python environment
run: |
python${{ matrix.python-version }} -m venv ppgg
source ppgg/bin/activate
- name: Install dependencies with Homebrew
run: |
brew update
brew upgrade
brew install mmseqs2
brew install mafft
# brew install python-setuptools
# brew install infernal
# brew install aragorn
- name: Install ppanggolin
run: |
# Activate the Python environment again in the correct directory
source ppgg/bin/activate
python -V
python -m pip install pytest
python -m pip install .[python_deps]
# - name: Install ppanggolin
# shell: bash -l {0}
# run: |
# pip install .[test]
# mmseqs version
# Check that it is installed and displays help without error
- name: Check that PPanGGOLiN is installed
shell: bash -l {0}
run: |
source ppgg/bin/activate
ppanggolin --version
ppanggolin --help
# Check that unit tests are all passing
- name: Unit tests
shell: bash -l {0}
run: |
source ppgg/bin/activate
pytest
- name: gbff parsing and MSA computing
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
mkdir info_to_test
ppanggolin workflow --cpu $NUM_CPUS --anno genomes.gbff.list --output myannopang
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu $NUM_CPUS
ppanggolin info --pangenome myannopang/pangenome.h5 > info_to_test/myannopang_info.yaml
cat info_to_test/myannopang_info.yaml
echo "$(grep 'myannopang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) myannopang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 myannopang/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: clusters reading from external file
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS
ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS
awk 'BEGIN{FS=OFS="\t"} {$1 = $1 OFS $1} 1' clusters.tsv > clusters_with_reprez.tsv;
ppanggolin cluster --clusters clusters_with_reprez.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS
echo "$(grep 'readclusterpang/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) readclusterpang/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 readclusterpang/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: testing context command
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu $NUM_CPUS
# test from gene family ids. Test here with one family of module 1. The context should find all families of module 1
echo AP288_RS05055 > one_family_of_module_1.txt
ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu $NUM_CPUS
cd -
- name: testing config file
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml
cut -f1,2 clusters.tsv > clusters_without_frag.tsv
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters_without_frag.tsv -o test_config --config panrgp_default_config.yaml --cpu $NUM_CPUS
echo "$(grep 'test_config/gene_families.tsv' expected_info_files/checksum.txt | cut -d' ' -f1) test_config/gene_families.tsv" | shasum -a 256 -c - || { echo 'Checksum verification failed.' >&2; exit 1; }
shasum -a 256 test_config/gene_families.tsv >> info_to_test/checksum.txt
cd -
- name: testing projection cmd
shell: bash -l {0}
run: |
source ppgg/bin/activate
cd testingDataset
head genomes.fasta.list | sed 's/^/input_genome_/g' > genomes.fasta.head.list
# ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_list_of_fasta --fasta genomes.fasta.head.list --gff --proksee --cpu $NUM_CPUS
# projection of a plasmid with chevron that have been added manually to test chevron handeling in GFF
ppanggolin projection --pangenome myannopang/pangenome.h5 --anno GBFF/plasmid_NZ_CP007132_with_manually_added_chevrons.gff.gz --cpu $NUM_CPUS -o projection_plasmid_with_chevron
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.gff.gz >> genomes.gbff.h3_and_GFFplasmidNoSeq.list
echo GFF_plasmid_No_seq$'\t'GBFF/plasmid_GCF_000093005.1_ASM9300v1.fna.gz >> genomes.fna.GFFplasmidNoSeq.list
ppanggolin projection -p myannopang/pangenome.h5 --anno genomes.gbff.h3_and_GFFplasmidNoSeq.list --fasta genomes.fna.GFFplasmidNoSeq.list
- name: Archive diff files
uses: actions/upload-artifact@v4
with:
name: comparison-results_${{ matrix.os }}_python${{ matrix.python-version }}
path: testingDataset/info_to_test/*