-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #62 from broadinstitute/dp-nigeria
Nigeria preparations: add reference guided assembly pipelines for short-term use Spring 2015 at run.edu.ng and ucad.sn.
- Loading branch information
Showing
8 changed files
with
221 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,18 +9,19 @@ | |
|
||
__author__ = 'Daniel Park <[email protected]>' | ||
|
||
import os.path | ||
|
||
configfile: "config.json" | ||
pipesDir = os.path.join(os.path.expanduser(config['binDir']), 'pipes', 'rules') | ||
|
||
include: config["binDir"]+"/pipes/rules/common.rules" | ||
|
||
include: os.path.join(pipesDir, 'common.rules') | ||
set_env_vars() | ||
|
||
include: config["binDir"]+"/pipes/rules/demux.rules" | ||
include: config["binDir"]+"/pipes/rules/hs_deplete.rules" | ||
include: config["binDir"]+"/pipes/rules/assembly.rules" | ||
include: config["binDir"]+"/pipes/rules/interhost.rules" | ||
include: config["binDir"]+"/pipes/rules/reports.rules" | ||
include: os.path.join(pipesDir, 'demux.rules') | ||
include: os.path.join(pipesDir, 'hs_deplete.rules') | ||
include: os.path.join(pipesDir, 'assembly.rules') | ||
include: os.path.join(pipesDir, 'interhost.rules') | ||
include: os.path.join(pipesDir, 'reports.rules') | ||
|
||
rule all: | ||
input: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
""" | ||
This performs reference-assisted assembly of viral genomes, starting | ||
from fastqs produced directly from MiSeq machines. | ||
Make copies of this Snakefile and config.json to your analysis directory and | ||
customize as needed. | ||
This operates on the SampleSheet.csv file that is used as input to the MiSeq | ||
machine and all of the fastq files that are emitted by that machine. Put | ||
the SampleSheet.csv in the project directory and put the fastq files in a | ||
subdirectory called "data". Copy the config.json to the project directory. | ||
Then type "ref_assisted" and wait a few hours for aligned BAMs, VCFs, and | ||
FASTAs. | ||
This is designed for use on a single linux computer (e.g. Ubuntu 14.04 LTS) with: | ||
apt-get install: | ||
python3 python3-pip python-software-properties | ||
zlib zlib1g zlib1g-dev | ||
libblas3gf libblas-dev liblapack3gf liblapack-dev | ||
libatlas-dev libatlas3-base libatlas3gf-base libatlas-base-dev | ||
gfortran git oracle-java8-installer | ||
libncurses5-dev python3-nose | ||
pip3 install -r requirements.txt | ||
pip3 install snakemake==3.2 | ||
""" | ||
|
||
__author__ = 'Daniel Park <[email protected]>' | ||
|
||
import os, os.path, time, hashlib, base64 | ||
|
||
configfile: "config.json" | ||
|
||
for k,v in config.get('env_vars', {}).items(): | ||
os.environ[k] = v | ||
|
||
def get_sample_list(fname): | ||
with open(fname, 'rt') as inf: | ||
header = None | ||
for line in inf: | ||
if line.startswith('Sample_ID'): | ||
header = line.strip().split(',') | ||
elif header and line: | ||
yield line.strip().split(',')[0] | ||
|
||
def get_sample_info(sample, fname): | ||
with open(fname, 'rt') as inf: | ||
header = None | ||
n = 0 | ||
for line in inf: | ||
if line.startswith('Sample_ID'): | ||
header = line.strip().split(',') | ||
elif header and line: | ||
n += 1 | ||
out = dict(zip(header, line.strip().split(','))) | ||
out['n'] = n | ||
if out['Sample_ID']==sample: | ||
return out | ||
|
||
def get_file_date(fname): | ||
return time.strftime("%Y-%m-%d", time.localtime(os.path.getmtime(fname))) | ||
|
||
def make_run_hash(fname, length=6): | ||
if 'flowcell' in config: | ||
return config['flowcell'] | ||
with open(fname, 'rt') as inf: | ||
csv = ''.join(inf.readlines()) | ||
hash_obj = hashlib.sha1(csv.encode('utf-8')) | ||
b32_str = base64.b32encode(bytes(hash_obj.digest())).decode('utf-8') | ||
return b32_str[:length] | ||
|
||
############################################## | ||
|
||
rule all: | ||
input: | ||
expand("{dataDir}/{sample}.fasta", | ||
dataDir=config["dataDir"], | ||
sample=get_sample_list(config["samples"])) | ||
|
||
############################################## | ||
|
||
def get_fastq_filenames(wildcards): | ||
info = get_sample_info(wildcards.sample, config["samples"]) | ||
suffix = config.get('fastqs_gzipped', False) and ".gz" or "" | ||
return [ | ||
os.path.join(wildcards.dir, | ||
'{sample}_S{idx}_L001_R{dir}_001.fastq{suffix}'.format( | ||
sample=wildcards.sample, idx=info['n'], dir=direction, | ||
suffix=suffix)) | ||
for direction in ('1','2')] | ||
rule bams_from_fastq: | ||
input: get_fastq_filenames | ||
output: '{dir}/{sample}.raw.bam' | ||
resources: mem=4 | ||
params: logid="{sample}", | ||
center=config["seq_center"] | ||
run: | ||
run_date = get_file_date(input[0]) | ||
info = get_sample_info(wildcards.sample, config["samples"]) | ||
hash = make_run_hash(config["samples"]) | ||
shell("{config[binDir]}/read_utils.py fastq_to_bam {input} {output} " \ | ||
+ "--sampleName {wildcards.sample} --picardOptions " \ | ||
+ "SEQUENCING_CENTER={params.center} " \ | ||
+ "RUN_DATE={run_date} " \ | ||
+ "PLATFORM_UNIT={hash}.1.{info[index]}-{info[index2]} " \ | ||
+ "READ_GROUP_NAME={hash} " \ | ||
+ "PLATFORM=illumina SORT_ORDER=queryname") | ||
|
||
############################################## | ||
|
||
rule ref_guided_consensus: | ||
input: '{dir}/{sample}.raw.bam' | ||
output: '{dir}/{sample}.realigned.bam', | ||
'{dir}/{sample}.vcf.gz', | ||
'{dir}/{sample}.fasta' | ||
resources: mem=4 | ||
params: LSF='-W 4:00', | ||
logid="{sample}", | ||
refGenome=os.path.expanduser(config["ref_genome"]), | ||
novoalign_options=config["refine_options"]["novoalign"], | ||
min_coverage=config["refine_options"]["min_coverage"] | ||
shell: "{config[binDir]}/assembly.py refine_assembly " \ | ||
+ "{params.refGenome} {input} {output[2]} " \ | ||
+ "--outBam {output[0]} " \ | ||
+ "--outVcf {output[1]} " \ | ||
+ "--keep_all_reads " \ | ||
+ "--chr_names {wildcards.sample} " \ | ||
+ "--min_coverage {params.min_coverage} " \ | ||
+ "--novo_params '{params.novoalign_options}' " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"samples": "SampleSheet.csv", | ||
|
||
"refine_options": { | ||
"novoalign": "-r Random -l 30 -g 40 -x 20 -t 502", | ||
"min_coverage": "3" | ||
}, | ||
|
||
"seq_center": "ACEGID_RUN", | ||
"fastqs_gzipped": false, | ||
"n_cores": 4, | ||
"max_ram": 16, | ||
"ref_genome": "~/resources/ref_genome/genome.fasta", | ||
|
||
"dataDir": "data", | ||
"logDir": "log", | ||
"binDir": "~/viral-ngs", | ||
"venvDir": "~/venv", | ||
"project": "viral_ngs" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
|
||
# load config dirs from config.json | ||
BIN_DIR=`python -c 'import json,os.path;f=open("config.json");print(os.path.expanduser(json.load(f)["binDir"]));f.close()'` | ||
N_CORES=`python -c 'import json;f=open("config.json");print(json.load(f)["n_cores"]);f.close()'` | ||
MAX_RAM=`python -c 'import json;f=open("config.json");print(json.load(f)["max_ram"]);f.close()'` | ||
|
||
# execute snakemake on this machine with specified resources | ||
snakemake --timestamp --rerun-incomplete --keep-going \ | ||
--jobs $N_CORES --resources mem=$MAX_RAM \ | ||
-s $BIN_DIR/pipes/ref_assisted/Snakefile \ | ||
"$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters