-
Notifications
You must be signed in to change notification settings - Fork 2
/
Snakefile
144 lines (123 loc) · 5.04 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
shell.executable('/bin/bash')
import collections
import csv
import itertools
import os
from pathlib import Path
import re
import subprocess
import textwrap
from pandas.core.common import flatten
import psutil
ALL_MEMORY = int((psutil.virtual_memory().total / 1024 ** 3) - 1)
MEM_TMPDIR = config.get('MEM_TMPDIR', '/dev/shm')
configfile: 'config/config.yaml'
include: 'rules/config.smk'
TMPDIR = config.get('TMPDIR', '/tmp')
PIGZ = config.get('PIGZ', 'pigz')
SORT = config.get('SORT', 'sort')
PICARD = config.get('PICARD', 'picard')
SAMTOOLS = config.get('SAMTOOLS', 'samtools')
DROPCACHE = config.get('DROPCACHE', 'dropcache')
samples_se = [str(x) for x in set(config.get('samples_se', [])) - set(config.get('exclude_samples_pe', []))]
samples_pe = [str(x) for x in set(config.get('samples_pe', [])) - set(config.get('exclude_samples', []))]
samples_fastq = samples_se + samples_pe
samples_fasta = [str(x) for x in set(config.get('samples_fasta', []))]
samples_all = samples_fastq + samples_fasta
benchmark_samples_se = list(sorted(config.get('benchmark_samples_se')))
benchmark_samples_pe = list(sorted(config.get('benchmark_samples_pe')))
benchmark_samples_all = benchmark_samples_se + benchmark_samples_pe
paired_suffix = config.get('paired_suffix', ('_1', '_2'))
TAXONOMY_DB = config.get('TAXONOMY_DB')
def fastq_bz2_input(wildcards):
if wildcards.seq in samples_pe:
return expand('fastq/{seq}{pair}.fastq.bz2', seq=wildcards.seq, pair=paired_suffix)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq.bz2'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fastq.bz2'.format(wildcards.seq)
else:
raise Exception
def fastx_bz2_input(wildcards):
if wildcards.seq in samples_pe:
return expand('fastq/{seq}{pair}.fastq.bz2', seq=wildcards.seq, pair=paired_suffix)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq.bz2'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fasta.bz2'.format(wildcards.seq)
else:
raise Exception
def fastq_both_input(wildcards):
if wildcards.seq in samples_pe:
return 'fastq/{}.both.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fastq'.format(wildcards.seq)
else:
raise Exception
def fastx_both_input(wildcards):
if wildcards.seq in samples_pe:
return 'fastq/{}.both.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fasta'.format(wildcards.seq)
else:
raise Exception
def fastq_input(wildcards):
if wildcards.seq in samples_pe:
return expand('fastq/{seq}{{pair}}.fastq'.format(seq=wildcards.seq), pair=paired_suffix)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fastq'.format(wildcards.seq)
else:
raise Exception
def fastx_input(wildcards):
if wildcards.seq in samples_pe:
return expand('fastq/{seq}{{pair}}.fastq'.format(seq=wildcards.seq), pair=paired_suffix)
elif wildcards.seq in samples_se:
return 'fastq/{}.fastq'.format(wildcards.seq)
elif wildcards.seq in samples_fasta:
return 'fastq/{}.fasta'.format(wildcards.seq)
else:
raise Exception
include: 'rules/download.smk'
include: 'rules/bam_to_fastq.smk'
include: 'rules/prepare_fastq.smk'
include: 'rules/refseqc.smk'
include: 'rules/blast.smk'
include: 'rules/bracken.smk'
include: 'rules/centrifuge.smk'
clark_execution = config.get('CLARK_EXECUTION', 'single')
if clark_execution == 'multiple':
include: 'rules/clark_multiple.smk'
elif clark_execution == 'single':
include: 'rules/clark.smk'
include: 'rules/diamond.smk'
include: 'rules/acdiamond.smk'
include: 'rules/gottcha.smk'
include: 'rules/kaiju.smk'
include: 'rules/karp.smk'
include: 'rules/kslam.smk'
kraken_execution = config.get('KRAKEN_EXECUTION', 'single')
if kraken_execution == 'multiple':
include: 'rules/kraken_multiple.smk'
elif kraken_execution == 'single':
include: 'rules/kraken.smk'
include: 'rules/kraken2.smk'
include: 'rules/krakenhll.smk'
include: 'rules/metaothello.smk'
include: 'rules/metaphlan2.smk'
include: 'rules/mmseqs2.smk'
include: 'rules/motus.smk'
include: 'rules/pathseq.smk'
include: 'rules/prophyle.smk'
include: 'rules/taxmaps.smk'
ALL_CLASSIFIERS_ALL = list(flatten([
MEGABLAST_ALL, CENTRIFUGE_ALL, CLARK_ALL, DIAMOND_ALL, GOTTCHA_ALL, KAIJU_ALL, KSLAM_ALL, KRAKEN_ALL, KRAKEN2_ALL, KRAKENHLL_ALL, METAOTHELLO_ALL, MMSEQS2_ALL, MOTUS_ALL, PATHSEQ_ALL, PROPHYLE_ALL, TAXMAPS_ALL]))
print(ALL_CLASSIFIERS_ALL)
include: 'rules/reports.smk'
rule all:
input: MEGABLAST_ALL, CENTRIFUGE_ALL, CLARK_ALL, DIAMOND_ALL, GOTTCHA_ALL, KAIJU_ALL, KSLAM_ALL, KRAKEN_ALL, KRAKEN2_ALL, KRAKENHLL_ALL, METAOTHELLO_ALL, MMSEQS2_ALL, MOTUS_ALL, PATHSEQ_ALL, PROPHYLE_ALL, TAXMAPS_ALL