forked from ginolhac/chip-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mikkelsen.yml
193 lines (178 loc) · 9.51 KB
/
mikkelsen.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# -*- mode: Yaml; -*-
# Timestamp: 2016-09-05T13:14:19.825611
#
# Default options.
# Can also be specific for a set of samples, libraries, and lanes,
# by including the "Options" hierarchy at the same level as those
# samples, libraries, or lanes below. This does not include
# "Features", which may only be specific globally.
Options:
# Sequencing platform, see SAM/BAM reference for valid values
Platform: Illumina
# Quality offset for Phred scores, either 33 (Sanger/Illumina 1.8+)
# or 64 (Illumina 1.3+ / 1.5+). For Bowtie2 it is also possible to
# specify 'Solexa', to handle reads on the Solexa scale. This is
# used during adapter-trimming and sequence alignment
QualityOffset: 33
# Split a lane into multiple entries, one for each (pair of) file(s)
# found using the search-string specified for a given lane. Each
# lane is named by adding a number to the end of the given barcode.
SplitLanesByFilenames: yes
# Compression format for FASTQ reads; 'gz' for GZip, 'bz2' for BZip2
CompressionFormat: gz
# Settings for trimming of reads, see AdapterRemoval man-page
AdapterRemoval:
# Adapter sequences, set and uncomment to override defaults
# --adapter1: AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG
# --adapter2: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
# Some BAM pipeline defaults differ from AR defaults;
# To override, change these value(s):
--mm: 3
--minlength: 25
# Extra features enabled by default; change 'yes' to 'no' to disable
--collapse: no
--trimns: yes
--trimqualities: yes
# Settings for aligners supported by the pipeline
Aligners:
# Choice of aligner software to use, either "BWA" or "Bowtie2"
Program: BWA
# Settings for mappings performed using BWA
BWA:
# One of "backtrack", "bwasw", or "mem"; see the BWA documentation
# for a description of each algorithm (defaults to 'backtrack')
Algorithm: backtrack
# Filter aligned reads with a mapping quality (Phred) below this value
MinQuality: 0
# Filter reads that did not map to the reference sequence
FilterUnmappedReads: yes
# May be disabled ("no") for aDNA alignments, as post-mortem damage
# localizes to the seed region, which BWA expects to have few
# errors (sets "-l"). See http://pmid.us/22574660
UseSeed: yes
# Additional command-line options may be specified for the "aln"
# call(s), as described below for Bowtie2 below.
# Settings for mappings performed using Bowtie2
Bowtie2:
# Filter aligned reads with a mapping quality (Phred) below this value
MinQuality: 0
# Filter reads that did not map to the reference sequence
FilterUnmappedReads: yes
# Examples of how to add additional command-line options
# --trim5: 5
# --trim3: 5
# Note that the colon is required, even if no value is specified
--very-sensitive:
# Example of how to specify multiple values for an option
# --rg:
# - CN:SequencingCenterNameHere
# - DS:DescriptionOfReadGroup
# Mark / filter PCR duplicates. If set to 'filter', PCR duplicates are
# removed from the output files; if set to 'mark', PCR duplicates are
# flagged with bit 0x400, and not removed from the output files; if set to
# 'no', the reads are assumed to not have been amplified. Collapsed reads
# are filtered using the command 'paleomix rmdup_duplicates', while "normal"
# reads are filtered using Picard MarkDuplicates.
PCRDuplicates: mark
# Carry out quality base re-scaling of libraries using mapDamage
# This will be done using the options set for mapDamage below
RescaleQualities: no
# Command-line options for mapDamage; note that the long-form
# options are expected; --length, not -l, etc. Uncomment the
# "mapDamage" line adding command-line options below.
mapDamage:
# By default, the pipeline will downsample the input to 100k hits
# when running mapDamage; remove to use all hits
--downsample: 100000
# Set to 'yes' exclude a type of trimmed reads from alignment / analysis;
# possible read-types reflect the output of AdapterRemoval
ExcludeReads:
Single: no # Single-ended reads / Orphaned paired-ended reads
Paired: no # Paired ended reads
Singleton: no # Paired reads for which the mate was discarded
Collapsed: no # Overlapping paired-ended reads collapsed into a
# single sequence by AdapterRemoval
CollapsedTruncated: no # Like 'Collapsed', except that the reads
# truncated due to the presence ambiguous
# bases or low quality bases at read termini.
# Optional steps to perform during processing
Features:
RawBAM: yes # Generate BAM from the raw libraries (no indel realignment)
# Location: {Destination}/{Target}.{Genome}.bam
RealignedBAM: no # Generate indel-realigned BAM using the GATK Indel realigner
# Location: {Destination}/{Target}.{Genome}.realigned.bam
mapDamage: no # Generate mapDamage plot for each (unrealigned) library
# Location: {Destination}/{Target}.{Genome}.mapDamage/{Library}/
Coverage: yes # Generate coverage information for the raw BAM (wo/ indel realignment)
# Location: {Destination}/{Target}.{Genome}.coverage
Depths: no # Generate histogram of number of sites with a given read-depth
# Location: {Destination}/{Target}.{Genome}.depths
Summary: yes # Generate summary table for each target
# Location: {Destination}/{Target}.summary
DuplicateHist: no # Generate histogram of PCR duplicates, for use with PreSeq
# Location: {Destination}/{Target}.{Genome}.duphist/{Library}/
# Map of prefixes by name, each having a Path key, which specifies the
# location of the BWA/Bowtie2 index, and optional label, and an option
# set of regions for which additional statistics are produced.
Prefixes:
# Uncomment and replace 'NAME_OF_PREFIX' with name of the prefix; this name
# is used in summary statistics and as part of output filenames.
# NAME_OF_PREFIX:
# Uncomment and replace 'PATH_TO_PREFIX' with the path to .fasta file
# containing the references against which reads are to be mapped.
# Path: PATH_TO_PREFIX
GRCm38.p3:
Path: /scratch/users/aginolhac/chip-seq/Mikkelsen/references/GRCm38.p3.fasta
# (Optional) Uncomment and replace 'PATH_TO_BEDFILE' with the path to a
# .bed file listing extra regions for which coverage / depth statistics
# should be calculated; if no names are specified for the BED records,
# results are named after the chromosome / contig. Change 'NAME' to the
# name to be used in summary statistics and output filenames.
# RegionsOfInterest:
# NAME: PATH_TO_BEDFILE
# Mapping targets are specified using the following structure. Uncomment and
# replace 'NAME_OF_TARGET' with the desired prefix for filenames.
#NAME_OF_TARGET:
# Uncomment and replace 'NAME_OF_SAMPLE' with the name of this sample.
# NAME_OF_SAMPLE:
# Uncomment and replace 'NAME_OF_LIBRARY' with the name of this sample.
# NAME_OF_LIBRARY:
# Uncomment and replace 'NAME_OF_LANE' with the name of this lane,
# and replace 'PATH_WITH_WILDCARDS' with the path to the FASTQ files
# to be trimmed and mapped for this lane (may include wildcards).
# NAME_OF_LANE: PATH_WITH_WILDCARDS
Mikkelsen_3T3L1_WCE:
SAMN00011733:
3T3L1_WCE:
SRR040087: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040087.fastq.gz
SRR040088: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040088.fastq.gz
SRR040089: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040089.fastq.gz
SRR040090: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040090.fastq.gz
SRR040091: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040091.fastq.gz
SRR040092: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040092.fastq.gz
SRR040093: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040093.fastq.gz
SRR040094: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040094.fastq.gz
SRR040095: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040095.fastq.gz
Mikkelsen_3T3L1_t2_H3K4me3:
SAMN00011741:
3T3L1_t2_H3K4me3:
SRR040113: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040113.fastq.gz
SRR040114: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040114.fastq.gz
SRR040115: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040115.fastq.gz
SRR040116: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040116.fastq.gz
Mikkelsen_3T3L1_t2_H3K27ac:
SAMN00011744:
3T3L1_t2_H3K27ac:
SRR040121: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040121.fastq.gz
SRR040122: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040122.fastq.gz
Mikkelsen_3T3L1_t3_H3K4me3:
SAMN00011748:
3T3L1_t3_H3K4me3:
SRR040131: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040131.fastq.gz
SRR040132: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040132.fastq.gz
SRR040133: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040133.fastq.gz
Mikkelsen_3T3L1_t3_H3K27ac:
SAMN00011751:
3T3L1_t3_H3K27ac:
SRR040140: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040140.fastq.gz
SRR040141: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040141.fastq.gz