diff --git a/bin/cache-version b/bin/cache-version new file mode 100644 index 00000000..13940869 --- /dev/null +++ b/bin/cache-version @@ -0,0 +1,19 @@ +#!/bin/bash + +s3_url="${1:?An S3 URL is required as the first argument}" + + +trap '' SIGPIPE + +(aws s3 cp "${s3_url}" - \ + | zstd -T0 -dcq \ + | head -n 2 \ + | tsv-select -H -f 'nextclade_version,dataset_version' \ + | tail -n 1 \ + | jq --raw-input --slurp ' + split("\n") + | map(split("\t")) + | .[0:-1] + | map( { "nextclade_version": .[0], "nextclade_dataset_version": .[1] } ) + | .[0]') \ + 2> /dev/null diff --git a/bin/use-nextclade-cache b/bin/use-nextclade-cache index 0a783dd1..8dadb9a9 100755 --- a/bin/use-nextclade-cache +++ b/bin/use-nextclade-cache @@ -1,19 +1,31 @@ #!/bin/bash set -euo pipefail -vendored="$(dirname "$0")"/../vendored +bin="$(dirname "$0")" +vendored="$bin"/../vendored main() { s3_dst="${1:?A destination s3:// URL where the renew file is hosted is required as the first argument.}" s3_src="${2:?A source s3:// URL where the fallback renew file is hosted is required as the second argument.}" + nextclade="${3:?A path to the Nextclade executable is required as the third argument}" # Nextclade dataset reference wildcard - reference="${3:-}" + reference="${4:-}" + if renew-flag-exists; then echo "[INFO] Found renew flag" >&2 echo "false" exit 0 fi + cache_versions="$(get-cache-version-info)" + cache_nextclade_version="$(echo "$cache_versions" | jq -r .nextclade_version)" + current_nextclade_version="$("$nextclade" --version)" + if [[ "$cache_nextclade_version" != "$current_nextclade_version" ]]; then + echo "[INFO] Current Nextclade version (${current_nextclade_version}) is different from cache version (${cache_nextclade_version})" >&2 + echo "false" + exit 0 + fi + echo "true" } @@ -25,4 +37,13 @@ renew-flag-exists() { "$vendored"/s3-object-exists "${dst_renew_file}" || "$vendored"/s3-object-exists "${src_renew_file}" } +get-cache-version-info() { + # TODO: Update check a separate file for version info + # Currently just checks the first row of the nextclade.tsv file + local version_file="nextclade${reference}.tsv.zst" + local dst_version_file="${s3_dst}/${version_file}" + + echo "$("$bin"/cache-version "$dst_version_file")" +} + main "$@" diff --git a/workflow/snakemake_rules/nextclade.smk b/workflow/snakemake_rules/nextclade.smk index db56ecbf..3c366034 100644 --- a/workflow/snakemake_rules/nextclade.smk +++ b/workflow/snakemake_rules/nextclade.smk @@ -63,6 +63,8 @@ if config.get("s3_dst") and config.get("s3_src"): ruleorder: download_previous_alignment_from_s3 > create_empty_nextclade_aligned rule use_nextclade_cache: + input: + nextclade="nextclade", params: dst_source=config["s3_dst"], src_source=config["s3_src"], @@ -73,6 +75,7 @@ if config.get("s3_dst") and config.get("s3_src"): ./bin/use-nextclade-cache \ {params.dst_source:q} \ {params.src_source:q} \ + {input.nextclade:q} \ {wildcards.reference:q} \ > {output.use_nextclade_cache} """