Merge pull request #3 from nextstrain/snakeformat

Run snakefmt on snakefiles
nextstrain · Sep 18, 2023 · 9a18402 · 9a18402
2 parents c336044 + 1eaf21a
commit 9a18402
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 31 deletions.
diff --git a/ingest/Snakefile b/ingest/Snakefile
@@ -1,5 +1,6 @@
 # Use default configuration values. Override with Snakemake's --configfile/--config options.
 configfile: "config/defaults.yaml"
 
+
 include: "rules/fetch_from_ncbi.smk"
 include: "rules/curate.smk"
diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
@@ -6,31 +6,34 @@ from NCBI and outputs the clean data as two separate files:
     - results/sequences.fasta
 """
 
+
 # The following two rules can be ignored if you choose not to use the
 # generalized geolocation rules that are shared across pathogens.
 # The Nextstrain team will try to maintain a generalized set of geolocation
 # rules that can then be overridden by local geolocation rules per pathogen repo.
 rule fetch_general_geolocation_rules:
     output:
-        general_geolocation_rules = "data/general-geolocation-rules.tsv"
+        general_geolocation_rules="data/general-geolocation-rules.tsv",
     params:
-        geolocation_rules_url = config["curate"]["geolocation_rules_url"]
+        geolocation_rules_url=config["curate"]["geolocation_rules_url"],
     shell:
         """
         curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
         """
 
+
 rule concat_geolocation_rules:
     input:
-        general_geolocation_rules = "data/general-geolocation-rules.tsv",
-        local_geolocation_rules = config["curate"]["local_geolocation_rules"]
+        general_geolocation_rules="data/general-geolocation-rules.tsv",
+        local_geolocation_rules=config["curate"]["local_geolocation_rules"],
     output:
-        all_geolocation_rules = "data/all-geolocation-rules.tsv"
+        all_geolocation_rules="data/all-geolocation-rules.tsv",
     shell:
         """
         cat {input.general_geolocation_rules} {input.local_geolocation_rules} >> {output.all_geolocation_rules}
         """
 
+
 # This curate pipeline is based on existing pipelines for pathogen repos using NCBI data.
 # You may want to add and/or remove steps from the pipeline for custom metadata
 # curation for your pathogen. Note that the curate pipeline is streaming NDJSON
@@ -40,15 +43,15 @@ rule concat_geolocation_rules:
 # separate files: a metadata TSV and a sequences FASTA.
 rule curate:
     input:
-        sequences_ndjson = "data/ncbi.ndjson",
+        sequences_ndjson="data/ncbi.ndjson",
         # Change the geolocation_rules input path if you are removing the above two rules
-        all_geolocation_rules = "data/all-geolocation-rules.tsv",
-        annotations = config["curate"]["annotations"]
+        all_geolocation_rules="data/all-geolocation-rules.tsv",
+        annotations=config["curate"]["annotations"],
     output:
-        metadata = "results/all_metadata.tsv",
-        sequences = "results/sequences.fasta"
+        metadata="results/all_metadata.tsv",
+        sequences="results/sequences.fasta",
     log:
-        "logs/curate.txt"
+        "logs/curate.txt",
     benchmark:
         "benchmarks/curate.txt"
     params:
@@ -94,13 +97,14 @@ rule curate:
                 --output-seq-field {params.sequence_field} ) 2>> {log}
         """
 
+
 rule subset_metadata:
     input:
-        metadata="results/all_metadata.tsv"
+        metadata="results/all_metadata.tsv",
     output:
-        subset_metadata="results/subset_metadata.tsv"
+        subset_metadata="results/subset_metadata.tsv",
     params:
-        metadata_fields=config["curate"]["metadata_columns"]
+        metadata_fields=config["curate"]["metadata_columns"],
     shell:
         """
         tsv-select -H -f {params.metadata_fields} \

diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk
@@ -22,11 +22,12 @@ to the other approaches.
 ########################## 1. Fetch from Entrez ###########################
 ###########################################################################
 
+
 rule fetch_from_ncbi_entrez:
     params:
-        term = config["entrez_search_term"]
+        term=config["entrez_search_term"],
     output:
-        genbank = "data/genbank.gb"
+        genbank="data/genbank.gb",
     # Allow retries in case of network errors
     retries: 5
     benchmark:
@@ -41,9 +42,9 @@ rule fetch_from_ncbi_entrez:
 
 rule parse_genbank_to_ndjson:
     input:
-        genbank = "data/genbank.gb"
+        genbank="data/genbank.gb",
     output:
-        ndjson = "data/ncbi.ndjson"
+        ndjson="data/ncbi.ndjson",
     benchmark:
         "benchmarks/parse_genbank_to_ndjson.txt"
     shell:
@@ -56,11 +57,12 @@ rule parse_genbank_to_ndjson:
 ####################### 2. Fetch from NCBI Datasets #######################
 ###########################################################################
 
+
 rule fetch_ncbi_dataset_package:
     params:
-        ncbi_taxon_id = config["ncbi_taxon_id"],
+        ncbi_taxon_id=config["ncbi_taxon_id"],
     output:
-        dataset_package = temp("data/ncbi_dataset.zip")
+        dataset_package=temp("data/ncbi_dataset.zip"),
     # Allow retries in case of network errors
     retries: 5
     benchmark:
@@ -75,9 +77,9 @@ rule fetch_ncbi_dataset_package:
 
 rule extract_ncbi_dataset_sequences:
     input:
-        dataset_package = "data/ncbi_dataset.zip"
+        dataset_package="data/ncbi_dataset.zip",
     output:
-        ncbi_dataset_sequences = temp("data/ncbi_dataset_sequences.fasta")
+        ncbi_dataset_sequences=temp("data/ncbi_dataset_sequences.fasta"),
     benchmark:
         "benchmarks/extract_ncbi_dataset_sequences.txt"
     shell:
@@ -122,11 +124,13 @@ def _get_ncbi_dataset_field_mnemonics(provided_fields: list) -> str:
 
 rule format_ncbi_dataset_report:
     input:
-        dataset_package = "data/ncbi_dataset.zip"
+        dataset_package="data/ncbi_dataset.zip",
     output:
-        ncbi_dataset_tsv = temp("data/ncbi_dataset_report.tsv")
+        ncbi_dataset_tsv=temp("data/ncbi_dataset_report.tsv"),
     params:
-        fields_to_include = _get_ncbi_dataset_field_mnemonics(config["ncbi_dataset_fields"])
+        fields_to_include=_get_ncbi_dataset_field_mnemonics(
+            config["ncbi_dataset_fields"]
+        ),
     benchmark:
         "benchmarks/format_ncbi_dataset_report.txt"
     shell:
@@ -144,12 +148,12 @@ rule format_ncbi_dataset_report:
 # data that we host on data.nextstrain.org
 rule format_ncbi_datasets_ndjson:
     input:
-        ncbi_dataset_sequences = "data/ncbi_dataset_sequences.fasta",
-        ncbi_dataset_tsv = "data/ncbi_dataset_report.tsv",
+        ncbi_dataset_sequences="data/ncbi_dataset_sequences.fasta",
+        ncbi_dataset_tsv="data/ncbi_dataset_report.tsv",
     output:
-        ndjson = "data/ncbi.ndjson",
+        ndjson="data/ncbi.ndjson",
     log:
-        "logs/format_ncbi_datasets_ndjson.txt"
+        "logs/format_ncbi_datasets_ndjson.txt",
     benchmark:
         "benchmarks/format_ncbi_datasets_ndjson.txt"
     shell:

diff --git a/nextclade/Snakefile b/nextclade/Snakefile
@@ -1,10 +1,12 @@
 # Use default configuration values. Override with Snakemake's --configfile/--config options.
 configfile: "config/defaults.yaml"
 
+
 rule all:
     input:
         # Fill in path to the final exported Auspice JSON
-        auspice_json = ""
+        auspice_json="",
+
 
 include: "rules/preprocess.smk"
 include: "rules/prepare_sequences.smk"

diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -1,10 +1,12 @@
 # Use default configuration values. Override with Snakemake's --configfile/--config options.
 configfile: "config/defaults.yaml"
 
+
 rule all:
     input:
         # Fill in path to the final exported Auspice JSON
-        auspice_json = ""
+        auspice_json="",
+
 
 include: "rules/prepare_sequences.smk"
 include: "rules/construct_phylogeny.smk"