Process Samples Aspera #1122

Workflow file for this run

.github/workflows/process_samples_ascp.yml at 6b674e0

	name: Process Samples Aspera

	on:
	schedule:
	- cron: '0 0-16 * * 0,6'
	env:
	BATCH_SIZE: 100


	jobs:
	setup:
	permissions:
	contents: write
	id-token: write

	runs-on: self-hosted

	outputs:
	run_rest_jobs: ${{ steps.get_accession_list.outputs.run_jobs }}

	steps:
	- name: Checkout main
	uses: actions/checkout@v4
	with:
	token: ${{ secrets.GITHUB_TOKEN }}

	- name: Setup Python
	run: \|
	echo ${{secrets.DPILZ_USR_PWD}} \| sudo -S dnf install python3 -y
	echo ${{secrets.DPILZ_USR_PWD}} \| sudo -S dnf install python3-pip -y
	pip3 install pandas numpy pyyaml ffq epiweeks git+https://github.com/outbreak-info/python-outbreak-info.git@new_docs


	- name: 'Get accession list'
	id: get_accession_list
	run: \|
	python scripts/get_accession_list.py $BATCH_SIZE
	if [[ -f data/accession_list.csv ]]; then
	echo "::set-output name=run_jobs::true"
	else
	echo "::set-output name=run_jobs::false"
	fi


	run_samples:
	needs: [setup]
	if: needs.setup.outputs.run_rest_jobs == 'true'
	permissions:
	contents: write
	id-token: write

	runs-on: self-hosted

	steps:
	- name: Checkout main
	uses: actions/checkout@v4
	with:
	token: ${{ secrets.GITHUB_TOKEN }}

	- name: Setup nextflow
	uses: nf-core/setup-nextflow@v1

	- name: Setup Python
	run: \|
	echo ${{secrets.DPILZ_USR_PWD}} \| sudo -S dnf install python3 -y
	echo ${{secrets.DPILZ_USR_PWD}} \| sudo -S dnf install python3-pip -y
	pip3 install pandas numpy pyyaml ffq epiweeks git+https://github.com/outbreak-info/python-outbreak-info.git@new_docs

	- name: 'Set up Cloud SDK'
	uses: 'google-github-actions/setup-gcloud@v2'
	with:
	version: '>= 363.0.0'

	- id: 'auth'
	name: 'Authenticate with gcloud'
	uses: 'google-github-actions/auth@v2'
	with:
	workload_identity_provider: 'projects/12767718289/locations/global/workloadIdentityPools/github/providers/freyja-sra'
	service_account: '[email protected]'
	- name: 'Get accession list'
	id: get_accession_list
	run: \|
	python scripts/get_accession_list.py $BATCH_SIZE
	- name: Run pipeline on new samples
	run: \|
	export NXF_ENABLE_VIRTUAL_THREADS=false
	nextflow run main.nf \
	--accession_list data/accession_list.csv \
	--num_samples $BATCH_SIZE \
	-profile docker \
	-entry aspera &
	BG_PID=$!
	wait $BG_PID

	- name: Aggregate outputs
	run: \|
	python scripts/aggregate_demix.py
	python scripts/aggregate_variants.py
	python scripts/aggregate_metadata.py

	- id: 'download-aggregated-outputs'
	name: 'Download aggregated outputs'
	run: \|
	gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_demix.json outputs/aggregate/aggregate_demix.json --billing-project=andersen-lab-primary
	gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_variants.json outputs/aggregate/aggregate_variants.json --billing-project=andersen-lab-primary
	gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_metadata.json outputs/aggregate/aggregate_metadata.json --billing-project=andersen-lab-primary

	- id: 'concatenate-outputs'
	name: 'Concatenate outputs'
	run: \|
	python scripts/concat_agg_files.py

	- id: 'create-demix-by-week'
	name: 'Create demix by week'
	run: \|
	python scripts/aggregate_demix_by_week.py

	- id: 'upload-outputs'
	name: 'Upload Outputs to Cloud Storage'
	uses: 'google-github-actions/upload-cloud-storage@v2'
	with:
	path: 'outputs/'
	destination: 'outbreak-ww-data/'
	parent: false
	project_id: 'andersen-lab-primary'

	- name: 'Update processed samples'
	run: \|
	python scripts/update_sample_status.py $BATCH_SIZE

	- name: 'Commit and push changes'
	run: \|
	git config --local user.name "$GITHUB_ACTOR"
	git config --local user.email "[email protected]"
	git remote set-url origin https://github.com/andersen-lab/Freyja-SRA
	git add data/all_metadata.csv
	git commit -m "Update processed samples"
	git push --force

	- name: 'Clean workspace'
	run: \|
	rm -rf ${{ github.workspace }}/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Process Samples Aspera #1122

Workflow file

Process Samples Aspera #1122

Jobs

Run details

Workflow file for this run