Skip to content

Commit

Permalink
Change SILO container to run for the short-read sequences in the meta…
Browse files Browse the repository at this point in the history
…data.s3Link column of get-released-data
  • Loading branch information
Taepper committed Dec 11, 2024
1 parent 5d556f0 commit 6ef81be
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 26 deletions.
36 changes: 33 additions & 3 deletions kubernetes/loculus/silo_import_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,44 @@ download_data() {
echo
}

extract_short_read_files_from_s3() {
# Input from https://backend-wise-seqs.loculus.org/test/get-released-data

aws configure set aws_access_key_id "$AWS_ACCESS_KEY"
aws configure set aws_secret_access_key "$AWS_SECRET_ACCESS_KEY"
aws configure set region "$AWS_DEFAULT_REGION"

S3_LINKS_FILE="tmp_s3_links.txt"

# Extract S3 links from the metadata
jq -r '.metadata.s3Link' "$new_input_data_path" > "$S3_LINKS_FILE"

touch "$silo_input_data_path"

# Loop through each S3 link and append the content to the output file
while read -r S3_LINK; do
# Temporary file for downloaded content
TEMP_FILE=$(mktemp)

# Download the ndjson file from S3
aws s3 cp "$S3_LINK" "$TEMP_FILE"

# Append the content to the output file
cat "$TEMP_FILE" >> "$silo_input_data_path"

# Clean up the temporary file
rm "$TEMP_FILE"
done < "$S3_LINKS_FILE"
}

preprocessing() {
echo "Starting preprocessing"

rm -f "$silo_input_data_path"

# This is necessary because the silo preprocessing is configured to expect the input data
# at /preprocessing/input/data.ndjson.zst
cp "$new_input_data_path" "$silo_input_data_path"
# take data from $new_input_data_path, get all data from the S3 buckets (referenced in column s3Link)
# and put it into $silo_input_data_path
extract_short_read_files_from_s3

set +e
time /app/siloApi --preprocessing
Expand Down
65 changes: 43 additions & 22 deletions kubernetes/loculus/templates/_siloDatabaseConfig.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,48 @@

{{- define "loculus.siloDatabaseConfig" }}
schema:
{{- $segments := .nucleotideSequences | default (list "main")}}
{{- $is_segmented := gt (len $segments) 1 }}
instanceName: {{ .organismName }}
opennessLevel: OPEN
metadata:
{{- range (concat .commonMetadata .metadata) }}
{{- $currentItem := . }}
{{- if and $is_segmented .perSegment }}
{{- range $segment := $segments }}
{{- with $currentItem }}
{{- include "loculus.siloDatabaseShared" . | nindent 4 }}
name: {{ printf "%s_%s" .name $segment | quote}}
{{- end }}
{{- end }}
{{- else }}
{{- include "loculus.siloDatabaseShared" . | nindent 4 }}
name: {{ .name }}
{{- end }}
{{- end }}
primaryKey: accessionVersion
{{ if .silo}}
{{- .silo | toYaml | nindent 2 }}
{{ end }}
- name: sample_id
type: string
generateIndex: false
- name: batch_id
type: string
generateIndex: false
- name: sequencing_well_position
type: string
generateIndex: false
- name: location_code
type: string
generateIndex: false
- name: sampling_date
type: date
generateIndex: false
- name: sequencing_date
type: string
generateIndex: false
- name: flow_cell_serial_number
type: string
generateIndex: false
- name: read_length
type: int
generateIndex: false
- name: primer_protocol
type: string
generateIndex: false
- name: location_name
type: string
generateIndex: false
- name: primer_protocol_name
type: string
generateIndex: false
- name: nextclade_reference
type: string
generateIndex: false
- name: read_id
type: string
generateIndex: false
opennessLevel: OPEN
instanceName: wise-sarsCoV2
features: []
primaryKey: read_id
{{- end }}
9 changes: 9 additions & 0 deletions kubernetes/loculus/templates/lapis-silo-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ spec:
{{- else }}
value: "http://loculus-backend-service:8079/{{ $key }}"
{{- end }}
- name: AWS_DEFAULT_REGION
value: eu-central-1
- name: AWS_ACCESS_KEY
value: AKIA6AB5EFK3N6KDJJ52
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: wise-short-read-sequence-bucket
key: secret-access-key
volumeMounts:
- name: lapis-silo-database-config-processed
mountPath: /preprocessing/input/reference_genomes.json
Expand Down
7 changes: 6 additions & 1 deletion kubernetes/loculus/values_preview_server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,9 @@ reduceResourceRequest: true
previewDocs: false
robotsNoindexHeader: true
disableEnaSubmission: false
additionalHeadHTML: '<script defer data-domain="loculus.org" src="https://plausible.io/js/script.js"></script>'
additionalHeadHTML: '<script defer data-domain="loculus.org" src="https://plausible.io/js/script.js"></script>'
wise-short-read-sequence-bucket:
type: sealedsecret
clusterWide: "true"
data:
secret-access-key: 'AgB9YyY/cXTfmi44zSfySs2mRjEeZfaye4lZlNL//mMuc7kaQZcEaBQ2N0C4UBQBN3zz61T+3YrR64PSMcRC97GaEGj/fMrxc8WUW9AMzLZEHIXZRmrM+BChPCA/MGoN/ekUpBuWZnTlh48fGxQg4GlCHFrnq3fpztoHiSrmED6Q7FuWOliuWnRqObmyh7xs+6AwGcs0NRhH5yQVAjwZlL9/m8LN4Cjr1mA7yedYuSYd5Ztdy5LMHOukWH9tD+NKdH8X/BfIP6axQCUA4wUiiGWI+mXfBC1dXDaueblu1zTejloLJ3CpS9BGuzS2uxC5ac9xVifA6hljyWD8oPRQ7Rzi7Uv3gdMTUFXZBCLMLK9YQntqhqnvkroWdg4kn7J4VOKbLiHz6JiLCbiELPhpbEuvZFGQl4psANSg1ODOuaibcdMiwfJc1vnSCEzQ27ura/ubZ2v6QcEz5c1jDasG26e+n5xSHtzn1aKHPaRdBTQJa5F5TPYbFiIZbWY9+1mabNxRSVCeAusmeXCyDDI7z4NqxrLBQW5NDGZx1vvDYrPiAwcjjbNX+y7P7apPsvi23n4MNWbd5WCZ6ETyG7pirNsGmkS8kNvjlUlXdkcCF5tAvGwvfPtndxeizul4sxg98eKXSOzeiMbgmpAry42OPSgF7HbHMvurWCltIkdrutb54TIstei1qjV5MBmEXVGSM+m4Zzj1u6YoiC/DV6grnuaCE8ZCf2nWHFtiy26oBo7+7NLsHE7uROfR'

0 comments on commit 6ef81be

Please sign in to comment.