diff --git a/config/config.yaml b/config/config.yaml index 50037f1..5268c5a 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -15,6 +15,8 @@ existing_annotation: "/exports/sascstudent/dbayraktar/Data/selas_data/AdV-lumc00 # minimap2 parameters minimap2_opts: "-uf -k14" +minimap_index_opts: " -k14 " + # TALON parameters mincounts: "5" @@ -28,7 +30,21 @@ flair_collapse_quality: 1 flair_abundance_quality: 1 +# OXFORD parameters + +oxford_poly_context: 24 + +oxford_max_poly_run: 8 + +oxford_minimum_mapping_quality: 40 + +oxford_stringtie_opts: " -c 1 -f 0.01 " + +oxford_merge_opts: " -m 50 -c 0 -f 0.01 " + +oxford_count_opts: " -L -p 10 " +oxford_avg_len: 1400 # threads threads: 10 \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index a19b591..a566ac7 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -24,7 +24,8 @@ rule all: # bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed" # gtf = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.gtf" # config = OUTDIR / "FLAIR" / "manifest.tsv" - abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv" + # abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv" + abundance = OUTDIR / "OXFORD" / "abundance" / "transcript_count_matrix.csv" rule minimap2_align: @@ -52,6 +53,21 @@ rule minimap2_align: {input.fq} > {output.sam_files} ''' +rule build_minimap_index: + input: + genome = genome_fasta + output: + index = OUTDIR / "alignments" / "genome_index.mmi" + params: + opts = config["minimap_index_opts"] + singularity: + 'docker://quay.io/biocontainers/minimap2:2.17--h5bf99c6_4' + threads: 10 + shell: + """ + minimap2 -t {threads} {params.opts} -I 1000G -d {output.index} {input.genome} + """ + rule sam_to_bam: ''' Converts SAM to BAM. @@ -438,3 +454,123 @@ rule flair_quantify: --quality {params.quality} \ --output {output.abundance} ''' + + +#OXFORD + +ContextFilter = """AlnContext: { Ref: "%s", LeftShift: -%d, RightShift: %d, RegexEnd: "[Aa]{%d,}", Stranded: True, Invert: True, Tsv: "alignments/internal_priming_fail.tsv"} """ % (genome_fasta, +config["oxford_poly_context"], config["oxford_poly_context"], config["oxford_max_poly_run"]) + +rule oxford_filter: + input: + genome = genome_fasta, + fq = READSDIR / "{sample}.fastq", + index = rules.build_minimap_index.output.index + params: + min_mq = config["oxford_minimum_mapping_quality"], + flt = lambda x: ContextFilter, + opts = config["minimap2_opts"] + output: + bam_filtered = OUTDIR / "OXFORD" / "filtered"/ "{sample}.bam" + threads: 10 + conda: + "envs/oxford_filter.yaml" + shell: + ''' + minimap2 -t {threads} -ax splice {params.opts} {input.index} {input.fq}\ + | samtools view -q {params.min_mq} -F 2304 -Sb -\ + | seqkit bam -j {threads} -x -T '{params.flt}' -\ + | samtools sort -@ {threads} -o {output.bam_filtered} -; + samtools index {output.bam_filtered} + ''' + +rule oxford_run_stringtie: + input: + bam_filtered = rules.oxford_filter.output.bam_filtered, + annotation = existing_annotation, + params: + opts = config["oxford_stringtie_opts"], + output: + gff = OUTDIR / "OXFORD" / "stringtie_output" / "{sample}.gff", + singularity: + "docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2" + threads: 10 + shell: + ''' + stringtie --rf -G {input.annotation} -L -p {threads} {params.opts} -o {output.gff} {input.bam_filtered} + ''' + +rule oxford_merge_stringtie: + input: + gff_files = expand(OUTDIR / "OXFORD" / "stringtie_output" / "{sample}.gff", sample=[".".join(read.name.split('.')[:-1]) for read in READS]), + annotation = existing_annotation + params: + opts = config["oxford_merge_opts"], + output: + merged_gff = OUTDIR / "OXFORD" / "stringtie_output" / "oxford_merged.gtf" + singularity: + "docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2" + threads: 10 + shell: + ''' + stringtie \ + --merge {input.gff_files} \ + -G {input.annotation} \ + {params.opts} \ + -o {output.merged_gff} + ''' + +rule oxford_abundance: + input: + bam = rules.oxford_filter.output.bam_filtered, + merged_gtf = rules.oxford_merge_stringtie.output.merged_gff + params: + opts = config["oxford_count_opts"], + output: + count_gtf = OUTDIR / "OXFORD" / "abundance" / "{sample}.gtf" + singularity: + "docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2" + threads: 10 + shell: + ''' + stringtie \ + -G {input.merged_gtf} \ + -e \ + {params.opts} \ + -o {output.count_gtf} \ + {input.bam} + ''' + +rule oxford_config: + input: + count_gtf = expand(OUTDIR / "OXFORD" / "abundance" / "{sample}.gtf", sample=[".".join(read.name.split('.')[:-1]) for read in READS]), + params: + datasetnames= [".".join(read.name.split('.')[:-1]) for read in READS] + output: + config = OUTDIR / "OXFORD" / "abundance" / "oxford_config.tab" + threads: 1 + run: + for read, name in zip(input.count_gtf, params.datasetnames): + with open(output.config, 'a+') as config: + config.write("%s\t%s\n" % (name, read)) + +rule oxford_calc_abundance: + input: + config_file = rules.oxford_config.output.config + params: + avg_len = config["oxford_avg_len"], + output: + transcripts = OUTDIR / "OXFORD" / "abundance" / "transcript_count_matrix.csv", + genes = OUTDIR / "OXFORD" / "abundance" / "gene_count_matrix.csv", + threads: 10 + singularity: + "docker://quay.io/biocontainers/stringtie:2.2.1--hecb563c_2" + shell: + ''' + prepDE.py \ + -i {input.config_file} \ + -l {params.avg_len} \ + -g {output.genes} \ + -t {output.transcripts} + ''' + diff --git a/workflow/envs/oxford_filter.yaml b/workflow/envs/oxford_filter.yaml new file mode 100644 index 0000000..af3e1d4 --- /dev/null +++ b/workflow/envs/oxford_filter.yaml @@ -0,0 +1,30 @@ +name: oxford_filter +channels: + - bioconda + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=4.5=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.18.1=h7f8727e_0 + - ca-certificates=2021.10.26=h06a4308_2 + - curl=7.62.0=hbc83047_0 + - htslib=1.9=h4da6232_3 + - k8=0.2.5=h9a82719_1 + - libcurl=7.62.0=h20c2e04_0 + - libdeflate=1.2=h516909a_1 + - libev=4.33=h7f8727e_1 + - libgcc=7.2.0=h69d50b8_2 + - libgcc-ng=9.3.0=h5101ec6_17 + - libgomp=9.3.0=h5101ec6_17 + - libnghttp2=1.46.0=hce63b2e_0 + - libssh2=1.9.0=h1ba5d50_1 + - libstdcxx-ng=9.3.0=hd4cf53a_17 + - minimap2=2.17=h5bf99c6_4 + - ncurses=6.1=he6710b0_1 + - openssl=1.1.1m=h7f8727e_0 + - samtools=1.9=h10a08f8_12 + - seqkit=2.1.0=h9ee0642_0 + - xz=5.2.5=h7b6447c_0 + - zlib=1.2.11=h7f8727e_4 +prefix: /exports/sascstudent/dbayraktar/miniconda3/envs/oxford_filter