diff --git a/kubernetes/loculus/silo_import_job.sh b/kubernetes/loculus/silo_import_job.sh
index c4c30396f..15e207646 100755
--- a/kubernetes/loculus/silo_import_job.sh
+++ b/kubernetes/loculus/silo_import_job.sh
@@ -148,14 +148,44 @@ download_data() {
echo
}
+extract_short_read_files_from_s3() {
+ # Input from https://backend-wise-seqs.loculus.org/test/get-released-data
+
+ aws configure set aws_access_key_id "$AWS_ACCESS_KEY"
+ aws configure set aws_secret_access_key "$AWS_SECRET_ACCESS_KEY"
+ aws configure set region "$AWS_DEFAULT_REGION"
+
+ S3_LINKS_FILE="tmp_s3_links.txt"
+
+ # Extract S3 links from the metadata
+ jq -r '.metadata.s3Link' "$new_input_data_path" > "$S3_LINKS_FILE"
+
+ touch "$silo_input_data_path"
+
+ # Loop through each S3 link and append the content to the output file
+ while read -r S3_LINK; do
+ # Temporary file for downloaded content
+ TEMP_FILE=$(mktemp)
+
+ # Download the ndjson file from S3
+ aws s3 cp "$S3_LINK" "$TEMP_FILE"
+
+ # Append the content to the output file
+ cat "$TEMP_FILE" >> "$silo_input_data_path"
+
+ # Clean up the temporary file
+ rm "$TEMP_FILE"
+ done < "$S3_LINKS_FILE"
+}
+
preprocessing() {
echo "Starting preprocessing"
rm -f "$silo_input_data_path"
- # This is necessary because the silo preprocessing is configured to expect the input data
- # at /preprocessing/input/data.ndjson.zst
- cp "$new_input_data_path" "$silo_input_data_path"
+ # take data from $new_input_data_path, get all data from the S3 buckets (referenced in column s3Link)
+ # and put it into $silo_input_data_path
+ extract_short_read_files_from_s3
set +e
time /app/siloApi --preprocessing
diff --git a/kubernetes/loculus/templates/_siloDatabaseConfig.tpl b/kubernetes/loculus/templates/_siloDatabaseConfig.tpl
index bc22a36fd..34606d008 100644
--- a/kubernetes/loculus/templates/_siloDatabaseConfig.tpl
+++ b/kubernetes/loculus/templates/_siloDatabaseConfig.tpl
@@ -12,27 +12,48 @@
{{- define "loculus.siloDatabaseConfig" }}
schema:
- {{- $segments := .nucleotideSequences | default (list "main")}}
- {{- $is_segmented := gt (len $segments) 1 }}
- instanceName: {{ .organismName }}
- opennessLevel: OPEN
metadata:
- {{- range (concat .commonMetadata .metadata) }}
- {{- $currentItem := . }}
- {{- if and $is_segmented .perSegment }}
- {{- range $segment := $segments }}
- {{- with $currentItem }}
- {{- include "loculus.siloDatabaseShared" . | nindent 4 }}
- name: {{ printf "%s_%s" .name $segment | quote}}
- {{- end }}
- {{- end }}
- {{- else }}
- {{- include "loculus.siloDatabaseShared" . | nindent 4 }}
- name: {{ .name }}
- {{- end }}
- {{- end }}
- primaryKey: accessionVersion
-{{ if .silo}}
- {{- .silo | toYaml | nindent 2 }}
-{{ end }}
+ - name: sample_id
+ type: string
+ generateIndex: false
+ - name: batch_id
+ type: string
+ generateIndex: false
+ - name: sequencing_well_position
+ type: string
+ generateIndex: false
+ - name: location_code
+ type: string
+ generateIndex: false
+ - name: sampling_date
+ type: date
+ generateIndex: false
+ - name: sequencing_date
+ type: string
+ generateIndex: false
+ - name: flow_cell_serial_number
+ type: string
+ generateIndex: false
+ - name: read_length
+ type: int
+ generateIndex: false
+ - name: primer_protocol
+ type: string
+ generateIndex: false
+ - name: location_name
+ type: string
+ generateIndex: false
+ - name: primer_protocol_name
+ type: string
+ generateIndex: false
+ - name: nextclade_reference
+ type: string
+ generateIndex: false
+ - name: read_id
+ type: string
+ generateIndex: false
+ opennessLevel: OPEN
+ instanceName: wise-sarsCoV2
+ features: []
+ primaryKey: read_id
{{- end }}
diff --git a/kubernetes/loculus/templates/lapis-silo-deployment.yaml b/kubernetes/loculus/templates/lapis-silo-deployment.yaml
index c5df2c873..ec21d7a63 100644
--- a/kubernetes/loculus/templates/lapis-silo-deployment.yaml
+++ b/kubernetes/loculus/templates/lapis-silo-deployment.yaml
@@ -84,6 +84,15 @@ spec:
{{- else }}
value: "http://loculus-backend-service:8079/{{ $key }}"
{{- end }}
+ - name: AWS_DEFAULT_REGION
+ value: eu-central-1
+ - name: AWS_ACCESS_KEY
+ value: AKIA6AB5EFK3N6KDJJ52
+ - name: AWS_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: wise-short-read-sequence-bucket
+ key: secret-access-key
volumeMounts:
- name: lapis-silo-database-config-processed
mountPath: /preprocessing/input/reference_genomes.json
diff --git a/kubernetes/loculus/values_preview_server.yaml b/kubernetes/loculus/values_preview_server.yaml
index 7452a3b99..6660bb684 100644
--- a/kubernetes/loculus/values_preview_server.yaml
+++ b/kubernetes/loculus/values_preview_server.yaml
@@ -34,4 +34,9 @@ reduceResourceRequest: true
previewDocs: false
robotsNoindexHeader: true
disableEnaSubmission: false
-additionalHeadHTML: ''
\ No newline at end of file
+additionalHeadHTML: ''
+wise-short-read-sequence-bucket:
+ type: sealedsecret
+ clusterWide: "true"
+ data:
+ secret-access-key: 'AgB9YyY/cXTfmi44zSfySs2mRjEeZfaye4lZlNL//mMuc7kaQZcEaBQ2N0C4UBQBN3zz61T+3YrR64PSMcRC97GaEGj/fMrxc8WUW9AMzLZEHIXZRmrM+BChPCA/MGoN/ekUpBuWZnTlh48fGxQg4GlCHFrnq3fpztoHiSrmED6Q7FuWOliuWnRqObmyh7xs+6AwGcs0NRhH5yQVAjwZlL9/m8LN4Cjr1mA7yedYuSYd5Ztdy5LMHOukWH9tD+NKdH8X/BfIP6axQCUA4wUiiGWI+mXfBC1dXDaueblu1zTejloLJ3CpS9BGuzS2uxC5ac9xVifA6hljyWD8oPRQ7Rzi7Uv3gdMTUFXZBCLMLK9YQntqhqnvkroWdg4kn7J4VOKbLiHz6JiLCbiELPhpbEuvZFGQl4psANSg1ODOuaibcdMiwfJc1vnSCEzQ27ura/ubZ2v6QcEz5c1jDasG26e+n5xSHtzn1aKHPaRdBTQJa5F5TPYbFiIZbWY9+1mabNxRSVCeAusmeXCyDDI7z4NqxrLBQW5NDGZx1vvDYrPiAwcjjbNX+y7P7apPsvi23n4MNWbd5WCZ6ETyG7pirNsGmkS8kNvjlUlXdkcCF5tAvGwvfPtndxeizul4sxg98eKXSOzeiMbgmpAry42OPSgF7HbHMvurWCltIkdrutb54TIstei1qjV5MBmEXVGSM+m4Zzj1u6YoiC/DV6grnuaCE8ZCf2nWHFtiy26oBo7+7NLsHE7uROfR'
\ No newline at end of file