diff --git a/phylogenetic/defaults/config_dengue.yaml b/phylogenetic/defaults/config_dengue.yaml index e34e557..6fdffe0 100644 --- a/phylogenetic/defaults/config_dengue.yaml +++ b/phylogenetic/defaults/config_dengue.yaml @@ -14,12 +14,7 @@ filter: min_length: genome: 5000 E: 1000 - sequences_per_group: - all: '36' - denv1: '36' - denv2: '36' - denv3: '36' - denv4: '36' + subsample_max_sequences: '4000' traits: sampling_bias_correction: '3' diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 44f4224..0192210 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -60,7 +60,7 @@ rule filter: sequences = "results/{gene}/filtered_{serotype}.fasta" params: group_by = config['filter']['group_by'], - sequences_per_group = lambda wildcards: config['filter']['sequences_per_group'][wildcards.serotype], + subsample_max_sequences = config['filter']['subsample_max_sequences'], min_length = lambda wildcard: config['filter']['min_length'][wildcard.gene], strain_id = config.get("strain_id_field", "strain"), shell: @@ -73,7 +73,7 @@ rule filter: --include {input.include} \ --output {output.sequences} \ --group-by {params.group_by} \ - --sequences-per-group {params.sequences_per_group} \ + --subsample-max-sequences {params.subsample_max_sequences} \ --min-length {params.min_length} \ --exclude-where country=? region=? date=? is_lab_host='true' \ --query-columns is_lab_host:str