mikkelsen.yml

# -*- mode: Yaml; -*-
# Timestamp: 2016-09-05T13:14:19.825611
#
# Default options.
# Can also be specific for a set of samples, libraries, and lanes,
# by including the "Options" hierarchy at the same level as those
# samples, libraries, or lanes below. This does not include
# "Features", which may only be specific globally.
Options:
  # Sequencing platform, see SAM/BAM reference for valid values
  Platform: Illumina
  # Quality offset for Phred scores, either 33 (Sanger/Illumina 1.8+)
  # or 64 (Illumina 1.3+ / 1.5+). For Bowtie2 it is also possible to
  # specify 'Solexa', to handle reads on the Solexa scale. This is
  # used during adapter-trimming and sequence alignment
  QualityOffset: 33
  # Split a lane into multiple entries, one for each (pair of) file(s)
  # found using the search-string specified for a given lane. Each
  # lane is named by adding a number to the end of the given barcode.
  SplitLanesByFilenames: yes
  # Compression format for FASTQ reads; 'gz' for GZip, 'bz2' for BZip2
  CompressionFormat: gz

  # Settings for trimming of reads, see AdapterRemoval man-page
  AdapterRemoval:
     # Adapter sequences, set and uncomment to override defaults
#     --adapter1: AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG
#     --adapter2: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
     # Some BAM pipeline defaults differ from AR defaults;
     # To override, change these value(s):
     --mm: 3
     --minlength: 25
     # Extra features enabled by default; change 'yes' to 'no' to disable
     --collapse: no
     --trimns: yes
     --trimqualities: yes

  # Settings for aligners supported by the pipeline
  Aligners:
    # Choice of aligner software to use, either "BWA" or "Bowtie2"
    Program: BWA

    # Settings for mappings performed using BWA
    BWA:
      # One of "backtrack", "bwasw", or "mem"; see the BWA documentation
      # for a description of each algorithm (defaults to 'backtrack')
      Algorithm: backtrack
      # Filter aligned reads with a mapping quality (Phred) below this value
      MinQuality: 0
      # Filter reads that did not map to the reference sequence
      FilterUnmappedReads: yes
      # May be disabled ("no") for aDNA alignments, as post-mortem damage
      # localizes to the seed region, which BWA expects to have few
      # errors (sets "-l"). See http://pmid.us/22574660
      UseSeed: yes
      # Additional command-line options may be specified for the "aln"
      # call(s), as described below for Bowtie2 below.

    # Settings for mappings performed using Bowtie2
    Bowtie2:
      # Filter aligned reads with a mapping quality (Phred) below this value
      MinQuality: 0
      # Filter reads that did not map to the reference sequence
      FilterUnmappedReads: yes
      # Examples of how to add additional command-line options
#      --trim5: 5
#      --trim3: 5
      # Note that the colon is required, even if no value is specified
      --very-sensitive:
      # Example of how to specify multiple values for an option
#      --rg:
#        - CN:SequencingCenterNameHere
#        - DS:DescriptionOfReadGroup

  # Mark / filter PCR duplicates. If set to 'filter', PCR duplicates are
  # removed from the output files; if set to 'mark', PCR duplicates are
  # flagged with bit 0x400, and not removed from the output files; if set to
  # 'no', the reads are assumed to not have been amplified. Collapsed reads
  # are filtered using the command 'paleomix rmdup_duplicates', while "normal"
  # reads are filtered using Picard MarkDuplicates.
  PCRDuplicates: mark

  # Carry out quality base re-scaling of libraries using mapDamage
  # This will be done using the options set for mapDamage below
  RescaleQualities: no

  # Command-line options for mapDamage; note that the long-form
  # options are expected; --length, not -l, etc. Uncomment the
  # "mapDamage" line adding command-line options below.
  mapDamage:
    # By default, the pipeline will downsample the input to 100k hits
    # when running mapDamage; remove to use all hits
    --downsample: 100000

  # Set to 'yes' exclude a type of trimmed reads from alignment / analysis;
  # possible read-types reflect the output of AdapterRemoval
  ExcludeReads:
    Single: no              # Single-ended reads / Orphaned paired-ended reads
    Paired: no              # Paired ended reads
    Singleton: no           # Paired reads for which the mate was discarded
    Collapsed: no           # Overlapping paired-ended reads collapsed into a
                            # single sequence by AdapterRemoval
    CollapsedTruncated: no  # Like 'Collapsed', except that the reads
                            # truncated due to the presence ambiguous
                            # bases or low quality bases at read termini.

  # Optional steps to perform during processing
  Features:
    RawBAM: yes          # Generate BAM from the raw libraries (no indel realignment)
                        #   Location: {Destination}/{Target}.{Genome}.bam
    RealignedBAM: no    # Generate indel-realigned BAM using the GATK Indel realigner
                        #   Location: {Destination}/{Target}.{Genome}.realigned.bam
    mapDamage: no       # Generate mapDamage plot for each (unrealigned) library
                        #   Location: {Destination}/{Target}.{Genome}.mapDamage/{Library}/
    Coverage: yes       # Generate coverage information for the raw BAM (wo/ indel realignment)
                        #   Location: {Destination}/{Target}.{Genome}.coverage
    Depths: no          # Generate histogram of number of sites with a given read-depth
                        #   Location: {Destination}/{Target}.{Genome}.depths
    Summary: yes        # Generate summary table for each target
                        #   Location: {Destination}/{Target}.summary
    DuplicateHist: no   # Generate histogram of PCR duplicates, for use with PreSeq
                        #   Location: {Destination}/{Target}.{Genome}.duphist/{Library}/


# Map of prefixes by name, each having a Path key, which specifies the
# location of the BWA/Bowtie2 index, and optional label, and an option
# set of regions for which additional statistics are produced.
Prefixes:
  # Uncomment and replace 'NAME_OF_PREFIX' with name of the prefix; this name
  # is used in summary statistics and as part of output filenames.
#  NAME_OF_PREFIX:
    # Uncomment and replace 'PATH_TO_PREFIX' with the path to .fasta file
    # containing the references against which reads are to be mapped.
#    Path: PATH_TO_PREFIX
  GRCm38.p3:
    Path: /scratch/users/aginolhac/chip-seq/Mikkelsen/references/GRCm38.p3.fasta

    # (Optional) Uncomment and replace 'PATH_TO_BEDFILE' with the path to a
    # .bed file listing extra regions for which coverage / depth statistics
    # should be calculated; if no names are specified for the BED records,
    # results are named after the chromosome / contig. Change 'NAME' to the
    # name to be used in summary statistics and output filenames.
#    RegionsOfInterest:
#      NAME: PATH_TO_BEDFILE


# Mapping targets are specified using the following structure. Uncomment and
# replace 'NAME_OF_TARGET' with the desired prefix for filenames.
#NAME_OF_TARGET:
   #  Uncomment and replace 'NAME_OF_SAMPLE' with the name of this sample.
#  NAME_OF_SAMPLE:
     #  Uncomment and replace 'NAME_OF_LIBRARY' with the name of this sample.
#    NAME_OF_LIBRARY:
       # Uncomment and replace 'NAME_OF_LANE' with the name of this lane,
       # and replace 'PATH_WITH_WILDCARDS' with the path to the FASTQ files
       # to be trimmed and mapped for this lane (may include wildcards).
#      NAME_OF_LANE: PATH_WITH_WILDCARDS

Mikkelsen_3T3L1_WCE:
  SAMN00011733:
    3T3L1_WCE:
      SRR040087: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040087.fastq.gz
      SRR040088: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040088.fastq.gz
      SRR040089: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040089.fastq.gz
      SRR040090: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040090.fastq.gz
      SRR040091: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040091.fastq.gz
      SRR040092: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040092.fastq.gz
      SRR040093: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040093.fastq.gz
      SRR040094: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040094.fastq.gz
      SRR040095: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040095.fastq.gz
Mikkelsen_3T3L1_t2_H3K4me3:
  SAMN00011741:
    3T3L1_t2_H3K4me3:
      SRR040113: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040113.fastq.gz
      SRR040114: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040114.fastq.gz
      SRR040115: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040115.fastq.gz
      SRR040116: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040116.fastq.gz
Mikkelsen_3T3L1_t2_H3K27ac:
  SAMN00011744:
    3T3L1_t2_H3K27ac:
      SRR040121: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040121.fastq.gz
      SRR040122: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040122.fastq.gz
Mikkelsen_3T3L1_t3_H3K4me3:
  SAMN00011748:
    3T3L1_t3_H3K4me3:
      SRR040131: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040131.fastq.gz
      SRR040132: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040132.fastq.gz
      SRR040133: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040133.fastq.gz
Mikkelsen_3T3L1_t3_H3K27ac:
  SAMN00011751:
    3T3L1_t3_H3K27ac:
      SRR040140: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040140.fastq.gz
      SRR040141: /scratch/users/aginolhac/chip-seq/Mikkelsen/fastq/SRR040141.fastq.gz