From 63bc9bd13c05c23fcec0037c7e57af21f85d8055 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 6 Oct 2023 17:07:39 -0700 Subject: [PATCH] ingest: Add default target rule Adds the default rule to rename the subset metadata TSV to the final metadata TSV if the Nextclade parameters are not defined in the config. This allows users to run the core ingest workflow with the simplest command of `nextstrain build .` --- ingest/README.md | 12 ++++++++++++ ingest/Snakefile | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/ingest/README.md b/ingest/README.md index b7116b7..ce5068d 100644 --- a/ingest/README.md +++ b/ingest/README.md @@ -7,6 +7,18 @@ If you have another data source or private data that needs to be formatted for the phylogenetic workflow, then you can use a similar workflow to curate your own data. +## Run + +From within the `ingest` directory, run the workflow with: + +``` +nextstrain build . +``` + +This produces a `results` directory with the following outputs: +- sequences.fasta +- metadata.tsv + ## Config The config directory contains all of the default configurations for the ingest workflow. diff --git a/ingest/Snakefile b/ingest/Snakefile index fb0a17a..121fe54 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -1,9 +1,31 @@ # Use default configuration values. Override with Snakemake's --configfile/--config options. configfile: "config/defaults.yaml" +rule all: + input: + "results/sequences.fasta", + "results/metadata.tsv", + include: "rules/fetch_from_ncbi.smk" include: "rules/curate.smk" + +# If included, the nextclade rules will create the final metadata TSV by +# joining the Nextclade output with the metadata. +# However, if not including nextclade, we have to rename the subset metadata TSV +# to the final metadata TSV. if "nextclade" in config: include: "rules/nextclade.smk" + +else: + + rule create_final_metadata: + input: + metadata="results/subset_metadata.tsv" + output: + metadata="results/metadata.tsv" + shell: + """ + mv {input.metadata} {output.metadata} + """