Skip to content

Commit

Permalink
Merge pull request #2 from bayraktar1/OXFORD
Browse files Browse the repository at this point in the history
Oxford
  • Loading branch information
bayraktar1 authored Feb 27, 2022
2 parents cd73eeb + 03433d5 commit b8d7d5a
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 1 deletion.
16 changes: 16 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ existing_annotation: "/exports/sascstudent/dbayraktar/Data/selas_data/AdV-lumc00
# minimap2 parameters
minimap2_opts: "-uf -k14"

minimap_index_opts: " -k14 "

# TALON parameters
mincounts: "5"

Expand All @@ -28,7 +30,21 @@ flair_collapse_quality: 1

flair_abundance_quality: 1

# OXFORD parameters

oxford_poly_context: 24

oxford_max_poly_run: 8

oxford_minimum_mapping_quality: 40

oxford_stringtie_opts: " -c 1 -f 0.01 "

oxford_merge_opts: " -m 50 -c 0 -f 0.01 "

oxford_count_opts: " -L -p 10 "

oxford_avg_len: 1400

# threads
threads: 10
138 changes: 137 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ rule all:
# bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed"
# gtf = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.gtf"
# config = OUTDIR / "FLAIR" / "manifest.tsv"
abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
# abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
abundance = OUTDIR / "OXFORD" / "abundance" / "transcript_count_matrix.csv"


rule minimap2_align:
Expand Down Expand Up @@ -52,6 +53,21 @@ rule minimap2_align:
{input.fq} > {output.sam_files}
'''

rule build_minimap_index:
input:
genome = genome_fasta
output:
index = OUTDIR / "alignments" / "genome_index.mmi"
params:
opts = config["minimap_index_opts"]
singularity:
'docker://quay.io/biocontainers/minimap2:2.17--h5bf99c6_4'
threads: 10
shell:
"""
minimap2 -t {threads} {params.opts} -I 1000G -d {output.index} {input.genome}
"""

rule sam_to_bam:
'''
Converts SAM to BAM.
Expand Down Expand Up @@ -438,3 +454,123 @@ rule flair_quantify:
--quality {params.quality} \
--output {output.abundance}
'''


#OXFORD

ContextFilter = """AlnContext: { Ref: "%s", LeftShift: -%d, RightShift: %d, RegexEnd: "[Aa]{%d,}", Stranded: True, Invert: True, Tsv: "alignments/internal_priming_fail.tsv"} """ % (genome_fasta,
config["oxford_poly_context"], config["oxford_poly_context"], config["oxford_max_poly_run"])

rule oxford_filter:
input:
genome = genome_fasta,
fq = READSDIR / "{sample}.fastq",
index = rules.build_minimap_index.output.index
params:
min_mq = config["oxford_minimum_mapping_quality"],
flt = lambda x: ContextFilter,
opts = config["minimap2_opts"]
output:
bam_filtered = OUTDIR / "OXFORD" / "filtered"/ "{sample}.bam"
threads: 10
conda:
"envs/oxford_filter.yaml"
shell:
'''
minimap2 -t {threads} -ax splice {params.opts} {input.index} {input.fq}\
| samtools view -q {params.min_mq} -F 2304 -Sb -\
| seqkit bam -j {threads} -x -T '{params.flt}' -\
| samtools sort -@ {threads} -o {output.bam_filtered} -;
samtools index {output.bam_filtered}
'''

rule oxford_run_stringtie:
input:
bam_filtered = rules.oxford_filter.output.bam_filtered,
annotation = existing_annotation,
params:
opts = config["oxford_stringtie_opts"],
output:
gff = OUTDIR / "OXFORD" / "stringtie_output" / "{sample}.gff",
singularity:
"docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
threads: 10
shell:
'''
stringtie --rf -G {input.annotation} -L -p {threads} {params.opts} -o {output.gff} {input.bam_filtered}
'''

rule oxford_merge_stringtie:
input:
gff_files = expand(OUTDIR / "OXFORD" / "stringtie_output" / "{sample}.gff", sample=[".".join(read.name.split('.')[:-1]) for read in READS]),
annotation = existing_annotation
params:
opts = config["oxford_merge_opts"],
output:
merged_gff = OUTDIR / "OXFORD" / "stringtie_output" / "oxford_merged.gtf"
singularity:
"docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
threads: 10
shell:
'''
stringtie \
--merge {input.gff_files} \
-G {input.annotation} \
{params.opts} \
-o {output.merged_gff}
'''

rule oxford_abundance:
input:
bam = rules.oxford_filter.output.bam_filtered,
merged_gtf = rules.oxford_merge_stringtie.output.merged_gff
params:
opts = config["oxford_count_opts"],
output:
count_gtf = OUTDIR / "OXFORD" / "abundance" / "{sample}.gtf"
singularity:
"docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
threads: 10
shell:
'''
stringtie \
-G {input.merged_gtf} \
-e \
{params.opts} \
-o {output.count_gtf} \
{input.bam}
'''

rule oxford_config:
input:
count_gtf = expand(OUTDIR / "OXFORD" / "abundance" / "{sample}.gtf", sample=[".".join(read.name.split('.')[:-1]) for read in READS]),
params:
datasetnames= [".".join(read.name.split('.')[:-1]) for read in READS]
output:
config = OUTDIR / "OXFORD" / "abundance" / "oxford_config.tab"
threads: 1
run:
for read, name in zip(input.count_gtf, params.datasetnames):
with open(output.config, 'a+') as config:
config.write("%s\t%s\n" % (name, read))

rule oxford_calc_abundance:
input:
config_file = rules.oxford_config.output.config
params:
avg_len = config["oxford_avg_len"],
output:
transcripts = OUTDIR / "OXFORD" / "abundance" / "transcript_count_matrix.csv",
genes = OUTDIR / "OXFORD" / "abundance" / "gene_count_matrix.csv",
threads: 10
singularity:
"docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
shell:
'''
prepDE.py \
-i {input.config_file} \
-l {params.avg_len} \
-g {output.genes} \
-t {output.transcripts}
'''

30 changes: 30 additions & 0 deletions workflow/envs/oxford_filter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: oxford_filter
channels:
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=4.5=1_gnu
- bzip2=1.0.8=h7b6447c_0
- c-ares=1.18.1=h7f8727e_0
- ca-certificates=2021.10.26=h06a4308_2
- curl=7.62.0=hbc83047_0
- htslib=1.9=h4da6232_3
- k8=0.2.5=h9a82719_1
- libcurl=7.62.0=h20c2e04_0
- libdeflate=1.2=h516909a_1
- libev=4.33=h7f8727e_1
- libgcc=7.2.0=h69d50b8_2
- libgcc-ng=9.3.0=h5101ec6_17
- libgomp=9.3.0=h5101ec6_17
- libnghttp2=1.46.0=hce63b2e_0
- libssh2=1.9.0=h1ba5d50_1
- libstdcxx-ng=9.3.0=hd4cf53a_17
- minimap2=2.17=h5bf99c6_4
- ncurses=6.1=he6710b0_1
- openssl=1.1.1m=h7f8727e_0
- samtools=1.9=h10a08f8_12
- seqkit=2.1.0=h9ee0642_0
- xz=5.2.5=h7b6447c_0
- zlib=1.2.11=h7f8727e_4
prefix: /exports/sascstudent/dbayraktar/miniconda3/envs/oxford_filter

0 comments on commit b8d7d5a

Please sign in to comment.