forked from galaxyproject/tools-iuc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request galaxyproject#5791 from npinter/add-peakzilla
Add Peakzilla
- Loading branch information
Showing
7 changed files
with
19,710 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: peakzilla | ||
owner: iuc | ||
categories: | ||
- ChIP-seq | ||
description: Peakzilla identifies sites of enrichment and transcription factor binding sites from ChIP-seq and ChIP-exo experiments. | ||
long_description: | | ||
Peakzilla is a tool for identifying sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution. It is designed to work with data from any species and is suitable for both single and paired end data from any sequencing platform. | ||
homepage_url: https://github.com/steinmann/peakzilla | ||
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/peakzilla | ||
type: unrestricted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
<tool id="peakzilla" name="Peakzilla" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | ||
<description>Identify transcription factor binding sites from ChIP-seq and ChIP-exo experiments</description> | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.0</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
</macros> | ||
<requirements> | ||
<requirement type="package" version="2.7">python</requirement> | ||
<requirement type="package" version="@TOOL_VERSION@">peakzilla</requirement> | ||
</requirements> | ||
<stdio> | ||
<exit_code range="1:" level="fatal" description="Generic error"/> | ||
<regex match="ValueError: cannot convert float NaN to integer" | ||
level="fatal" | ||
description="No peaks detected or input data error"/> | ||
</stdio> | ||
<command> | ||
<![CDATA[ | ||
peakzilla.py | ||
#if $options.model_peaks | ||
-m '$options.model_peaks' | ||
#end if | ||
#if $options.enrichment_cutoff | ||
-c '$options.enrichment_cutoff' | ||
#end if | ||
#if $options.score_cutoff | ||
-s '$options.score_cutoff' | ||
#end if | ||
#if $options.fragment_size | ||
-f '$options.fragment_size' | ||
#end if | ||
#if $options.gaussian | ||
-e | ||
#end if | ||
#if $options.bedpe | ||
-p | ||
#end if | ||
#if $outputs.negative | ||
-n | ||
#end if | ||
#if $outputs.log | ||
-l log.txt | ||
#end if | ||
'$chip_bed' '$input_bed' > '$results' | ||
]]> | ||
</command> | ||
<inputs> | ||
<param name="chip_bed" type="data" format="bed" label="ChIP Dataset in BED format"/> | ||
<param name="input_bed" type="data" format="bed" label="Input Dataset in BED format"/> | ||
<section name="options" title="Optional Parameters" expanded="false"> | ||
<param name="model_peaks" type="integer" optional="true" min="1" label="Number of most highly enriched regions used to estimate peak size (Default: 200)"/> | ||
<param name="enrichment_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for fold enrichment (Default: 2)"/> | ||
<param name="score_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for peak score (Default: 1)"/> | ||
<param name="fragment_size" type="integer" optional="true" min="1" label="Manually set fragment size in bp" help="If not set, it will be estimated from data"/> | ||
<param name="gaussian" type="boolean" checked="false" label="Use empirical model estimate instead of gaussian"/> | ||
<param name="bedpe" type="boolean" checked="false" label="Input is paired end and in BEDPE format"/> | ||
</section> | ||
<section name="outputs" title="Output Options" expanded="false"> | ||
<param name="negative" type="boolean" checked="false" label="Output negative peaks in control sample"/> | ||
<param name="log" type="boolean" checked="false" label="Output log file"/> | ||
</section> | ||
</inputs> | ||
<outputs> | ||
<data name="results" format="tabular" label="${tool.name} on ${on_string}"/> | ||
<data name="log" format="txt" from_work_dir="log.txt" label="Log file for ${tool.name} on ${on_string}"> | ||
<filter>log</filter> | ||
</data> | ||
<data name="negative_peaks" format="tabular" from_work_dir="negative_peaks.tsv" label="Negative peaks for ${tool.name} on ${on_string}"> | ||
<filter>negative</filter> | ||
</data> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="3"> | ||
<param name="chip_bed" value="chip.bed" /> | ||
<param name="input_bed" value="input.bed" /> | ||
<param name="model_peaks" value="200" /> | ||
<param name="fragment_size" value="50" /> | ||
<param name="enrichment_cutoff" value="2" /> | ||
<param name="score_cutoff" value="1" /> | ||
<param name="log" value="true" /> | ||
<param name="negative" value="true" /> | ||
<output name="results" file="results.tsv"> | ||
<assert_contents> | ||
<has_text text="Peak_1" /> | ||
</assert_contents> | ||
</output> | ||
<output name="log" file="log.txt" lines_diff="30"> | ||
<assert_contents> | ||
<has_text text="9569"/> | ||
</assert_contents> | ||
</output> | ||
<output name="negative_peaks" file="negative_peaks.tsv"> | ||
<assert_contents> | ||
<has_text text="Chromosome" /> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
**Peakzilla** | ||
Peakzilla identifies sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution. | ||
It is designed to perform equally well for data from any species. All necessary parameters are estimated from the data. Peakzilla is suitable for both single and | ||
paired-end data from any sequencing platform. | ||
Note that peakzilla is not suited for the identification of broad regions of enrichment (e.g. ChIP-seq for histone marks), we recommend using MACS instead: Zhang et al. | ||
Model-based Analysis of ChIP-Seq (MACS). Genome Biol (2008) 9(9):R137 | ||
*INPUT FORMAT* | ||
Peakzilla accepts BED formatted alignments as input. | ||
For conversation to BED format and working with BED files and alignments in | ||
general I highly recommend: | ||
* bowtie (http://bowtie-bio.sourceforge.net/) | ||
* SAMtools (http://samtools.sourceforge.net/) | ||
* bedtools (http://code.google.com/p/bedtools/) | ||
*WORKFLOW EXAMPLE* | ||
# use bowtie to map uniquely mappable reads to the genome | ||
bowtie -p4 -m1 --sam genome_index input.fastq input.sam | ||
bowtie -p4 -m1 --sam genome_index chip.fastq chip.sam | ||
# convert to BAM format | ||
samtools view -bS input.sam > input.bam | ||
samtools view -bS chip.sam > chip.bam | ||
# convert to BED format | ||
bamToBed -i input.bam > input.bed | ||
bamToBed -i chip.bam > chip.bed | ||
# run peakzilla | ||
python peakzilla.py chip.bed input.bed > chip_peaks.tsv | ||
# Comparison of 2 datasets | ||
# Determine significant peaks with a score threshold of 10 | ||
python peakzilla.py -s 10 chip1.bed input1.bed > chip1_s10_peaks.tsv | ||
# Determine enriched regions with a score threshold of 2 | ||
python peakzilla.py -s 2 chip2.bed input2.bed > chip2_s2_peaks.tsv | ||
# Overlap significant peaks from chip1 with enriched regions from chip2 | ||
intersectBed -a chip1_s10_peaks.tsv -b chip2_s2_peaks.tsv > intersect_peaks.tsv | ||
For example datasets as well as an example of a computational pipeline for the comparative analysis of ChIP-seq datasets, please refer to our | ||
publication: Bardet AF et al. A computational pipeline for comparative ChIP-seq analyses. Nature Protocols (2011) 7(1):45-61 (http://www.starklab.org/data/bardet_natprotoc_2011/) | ||
*OPTIONS* | ||
One of peakzilla's design goals is to learn all the necessary information | ||
from the data. The usage of the options should therefore not be required. | ||
*OUTPUT FORMAT* | ||
* Results are printed as a table of tab-delimited values to stdout | ||
* Logs are appended to logs.txt in the current directory or a custom directory/filename specified by the -l option | ||
* Enriched regions in the control sample are written to negative_peaks.tsv or a custom directory/filename specified by the -n option | ||
* Columns represent Chromosome / Start / End / Name / Summit / Score / ChIP / Control / FoldEnrichment / DistributionScore / FDR (%) | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.1038/nprot.2011.420</citation> | ||
</citations> | ||
</tool> |
Oops, something went wrong.