Process Samples Aspera #1127
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Process Samples Aspera | |
on: | |
schedule: | |
- cron: '0 0-16 * * 0,6' | |
env: | |
BATCH_SIZE: 100 | |
jobs: | |
setup: | |
permissions: | |
contents: write | |
id-token: write | |
runs-on: self-hosted | |
outputs: | |
run_rest_jobs: ${{ steps.get_accession_list.outputs.run_jobs }} | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v4 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Setup Python | |
run: | | |
echo ${{secrets.DPILZ_USR_PWD}} | sudo -S dnf install python3 -y | |
echo ${{secrets.DPILZ_USR_PWD}} | sudo -S dnf install python3-pip -y | |
pip3 install pandas numpy pyyaml ffq epiweeks git+https://github.com/outbreak-info/python-outbreak-info.git@new_docs | |
- name: 'Get accession list' | |
id: get_accession_list | |
run: | | |
python scripts/get_accession_list.py $BATCH_SIZE | |
if [[ -f data/accession_list.csv ]]; then | |
echo "::set-output name=run_jobs::true" | |
else | |
echo "::set-output name=run_jobs::false" | |
fi | |
run_samples: | |
needs: [setup] | |
if: needs.setup.outputs.run_rest_jobs == 'true' | |
permissions: | |
contents: write | |
id-token: write | |
runs-on: self-hosted | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v4 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Setup nextflow | |
uses: nf-core/setup-nextflow@v1 | |
- name: Setup Python | |
run: | | |
echo ${{secrets.DPILZ_USR_PWD}} | sudo -S dnf install python3 -y | |
echo ${{secrets.DPILZ_USR_PWD}} | sudo -S dnf install python3-pip -y | |
pip3 install pandas numpy pyyaml ffq epiweeks git+https://github.com/outbreak-info/python-outbreak-info.git@new_docs | |
- name: 'Set up Cloud SDK' | |
uses: 'google-github-actions/setup-gcloud@v2' | |
with: | |
version: '>= 363.0.0' | |
- id: 'auth' | |
name: 'Authenticate with gcloud' | |
uses: 'google-github-actions/auth@v2' | |
with: | |
workload_identity_provider: 'projects/12767718289/locations/global/workloadIdentityPools/github/providers/freyja-sra' | |
service_account: '[email protected]' | |
- name: 'Get accession list' | |
id: get_accession_list | |
run: | | |
python scripts/get_accession_list.py $BATCH_SIZE | |
- name: Run pipeline on new samples | |
run: | | |
export NXF_ENABLE_VIRTUAL_THREADS=false | |
nextflow run main.nf \ | |
--accession_list data/accession_list.csv \ | |
--num_samples $BATCH_SIZE \ | |
-profile docker \ | |
-entry aspera & | |
BG_PID=$! | |
wait $BG_PID | |
- name: Aggregate outputs | |
run: | | |
python scripts/aggregate_demix.py | |
python scripts/aggregate_variants.py | |
python scripts/aggregate_metadata.py | |
- id: 'download-aggregated-outputs' | |
name: 'Download aggregated outputs' | |
run: | | |
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_demix.json outputs/aggregate/aggregate_demix.json --billing-project=andersen-lab-primary | |
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_variants.json outputs/aggregate/aggregate_variants.json --billing-project=andersen-lab-primary | |
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_metadata.json outputs/aggregate/aggregate_metadata.json --billing-project=andersen-lab-primary | |
- id: 'concatenate-outputs' | |
name: 'Concatenate outputs' | |
run: | | |
python scripts/concat_agg_files.py | |
- id: 'create-demix-by-week' | |
name: 'Create demix by week' | |
run: | | |
python scripts/aggregate_demix_by_week.py | |
- id: 'upload-outputs' | |
name: 'Upload Outputs to Cloud Storage' | |
uses: 'google-github-actions/upload-cloud-storage@v2' | |
with: | |
path: 'outputs/' | |
destination: 'outbreak-ww-data/' | |
parent: false | |
project_id: 'andersen-lab-primary' | |
- name: 'Update processed samples' | |
run: | | |
python scripts/update_sample_status.py $BATCH_SIZE | |
- name: 'Commit and push changes' | |
run: | | |
git config --local user.name "$GITHUB_ACTOR" | |
git config --local user.email "[email protected]" | |
git remote set-url origin https://github.com/andersen-lab/Freyja-SRA | |
git add data/all_metadata.csv | |
git commit -m "Update processed samples" | |
git push --force | |
- name: 'Clean workspace' | |
run: | | |
rm -rf ${{ github.workspace }}/* |