From 931e223fb4930ec97beccdbc786d78d3326b359a Mon Sep 17 00:00:00 2001 From: slsevilla Date: Thu, 29 Jul 2021 11:08:02 -0400 Subject: [PATCH 1/9] fix col naming --- workflow/scripts/03_transcript_types.Rmd | 57 +++++++++++------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/workflow/scripts/03_transcript_types.Rmd b/workflow/scripts/03_transcript_types.Rmd index 04891f9..9dd60ad 100755 --- a/workflow/scripts/03_transcript_types.Rmd +++ b/workflow/scripts/03_transcript_types.Rmd @@ -2,15 +2,15 @@ output: html_document params: f_talon_data: - value: "~/../../Volumes/sevillas2/rbl3_test3/05_talon/counts/SIRV_talon_abundance_filtered.tsv" + value: "~/../../Volumes/dgcr8demo/05_talon/counts/SIRV_talon_abundance_filtered.tsv" u_talon_data: - value: "~/../../Volumes/sevillas2/rbl3_test3/05_talon/counts/SIRV_talon_abundance.tsv" + value: "~/../../Volumes/dgcr8demo/05_talon/counts/SIRV_talon_abundance.tsv" u_flair_data: - value: "~/../../Volumes/sevillas2/rbl3_test3/06_flair/counts/flair_counts_matrix.tsv" + value: "~/../../Volumes/dgcr8demo/06_flair/counts/flair_counts_matrix.tsv" output_dir: - value: "~/../../Volumes/sevillas2/rbl3_test3/08_report" + value: "~/../../Volumes/dgcr8demo/08_report" log_dir: - value: "~/../../Volumes/sevillas2/rbl3_test3/log" + value: "~/../../Volumes/dgcr8demo/log" perc_sim: value: 20 num_match: @@ -20,7 +20,7 @@ params: clean_up: value: "Y" deg_list: - value: "~/../../Volumes/sevillas2/rbl3_test3/07_deg/deg_iso_wt_ko.dicer.txt ~/../../Volumes/sevillas2/rbl3_test3/07_deg/deg_iso_wt_ko.drosha.txt" + value: "~/../../Volumes/dgcr8demo/07_deg/deg_iso_wt_ko.dicer.txt ~/../../Volumes/dgcr8demo/07_deg/deg_iso_wt_ko.drosha.txt" editor_options: chunk_output_type: console --- @@ -55,12 +55,31 @@ For complete workflow documentation, review the [Wiki](https://github.com/RBL-NC Both parameters will impact the results below. For more detailed information on the parameters, review the snakemake_config.yaml file within the ``r params$log_dir`` directory. +## Transcript Type +### Talon data + **TALON identified transcripts are summarized prior to filtering by type, with the following key:** + +* ISM: incomplete splice match + +* NIC: novel in catalog + +* NNC: novel not in catalog + +Two files are generated, including: +* A counts matrix was created, located at ```r talon_unfiltered_data```, which is previewed below: +```{r, echo=FALSE, message=FALSE} +#text files +talon_df = read.csv(talon_unfiltered_data,sep="\t") +kable(head(talon_df)) +``` + +* A transcript summary was generated from this unfiltered data, log10 transformed: ```{r, echo=FALSE} #plot transcript read lengths, read counts, exon counts TranscriptFeatures <- function(df_in,title_in,type_in) { abund_df = read.csv(df_in,sep="\t") - + p1 = ggplot(abund_df, aes(x=transcript_novelty, y=length, fill=transcript_novelty)) + geom_violin(trim=FALSE)+ geom_boxplot(width=0.1, fill="white")+ @@ -81,7 +100,7 @@ TranscriptFeatures <- function(df_in,title_in,type_in) { transcript_nov = aggregate(get(colnames(abund_df[12]))~transcript_novelty,abund_df,sum) colnames(transcript_nov)[2] <- colnames(abund_df[12]) - for (i in range(13:ncol(abund_df))){ + for (i in (13:ncol(abund_df))){ transcript_nov[,colnames(abund_df[i])] = aggregate(get(colnames(abund_df[i])) ~transcript_novelty,abund_df,sum)[2] @@ -118,29 +137,7 @@ TranscriptFeatures <- function(df_in,title_in,type_in) { #save plot ggsave(paste0(out_dir,"/img/",type_in,".png"),p_final) } -``` -## Transcript Type -### Talon data - **TALON identified transcripts are summarized prior to filtering by type, with the following key:** - -* ISM: incomplete splice match - -* NIC: novel in catalog - -* NNC: novel not in catalog - -Two files are generated, including: - -* A counts matrix was created, located at ```r talon_unfiltered_data```, which is previewed below: -```{r, echo=FALSE, message=FALSE} -#text files -talon_df = read.csv(talon_unfiltered_data,sep="\t") -kable(head(talon_df)) -``` - -* A transcript summary was generated from this unfiltered data, log10 transformed: -```{r, echo=FALSE, message=FALSE} #png files TranscriptFeatures(talon_unfiltered_data," Unfiltered Data","unfiltered_abundance") ``` From c22f1ec71c9d12c9675f5d7462a6b21eb3059fce Mon Sep 17 00:00:00 2001 From: slsevilla Date: Thu, 29 Jul 2021 12:56:24 -0400 Subject: [PATCH 2/9] remove output not needed for analysis --- workflow/Snakefile | 80 ++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 3204e3d..c010891 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -35,7 +35,7 @@ num_match = config['numberMatches'] #annotation FA with wildcards #if wildcards are used for bc, check if masking is required def get_annotation_fa(wildcards): - if hasattr('wildcards', 'masked_flag'): + if hasattr(wildcards, 'masked_flag'): if (wildcards.masked_flag == "masked"): anno_file=join(out_dir,"00_tmp","annotation_files","masked.fa") else: @@ -58,16 +58,10 @@ def get_bam_input(wildcards): #annotation GTF with wildcards def get_annotation_gtf(wildcards): - if hasattr('wildcards', 'masked_flag'): - if (wildcards.masked_flag == "masked"): - anno_file=join(out_dir,"00_tmp","annotation_files","masked.gtf") - else: - anno_file=join(out_dir,"00_tmp","annotation_files","unmasked.gtf") + if (masked_refs == "Y"): + anno_file=join(out_dir,"00_tmp","annotation_files","masked.gtf") else: - if (masked_refs == "Y"): - anno_file=join(out_dir,"00_tmp","annotation_files","masked.gtf") - else: - anno_file=join(out_dir,"00_tmp","annotation_files","unmasked.gtf") + anno_file=join(out_dir,"00_tmp","annotation_files","unmasked.gtf") return(anno_file) #talon config @@ -211,9 +205,9 @@ rule all: input_annotation, #input fastq files - expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), - expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), - expand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), + #expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), + #expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), + #xpand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), #sam files input_sam, @@ -253,12 +247,6 @@ rule all: # #squanti # #join(out_dir,'tbd'), - #collapsed files - #expand(join(out_dir,'collapsed','{bc}.collapsed.gff'),bc=bc_list), - #expand(join(out_dir,'collapsed','{bc}.collapsed.rep.fq'),bc=bc_list), - #expand(join(out_dir,'collapsed','{bc}.collapsed.group.txt'),bc=bc_list), - #expand(join(out_dir,'collapsed','{bc}.ignored_ids.txt'),bc=bc_list), - #common and other SMK if source_dir == "": include: "rules/common.smk" @@ -316,7 +304,7 @@ rule handle_fastq: params: rname = "01_fq", output: - zip = join(out_dir,'01_fastq','{bc}.fastq.gz') + zip = temp(join(out_dir,'01_fastq','{bc}.fastq.gz')) shell: """ gzip -c {input.f1} > {output.zip} @@ -335,7 +323,7 @@ rule adaptor_trim: envmodules: config['singularity'], output: - o1 = join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz') + o1 = temp(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz')) shell: ''' {params.sing_param} {params.doc} porechop -i {input.f1} -o {output.o1} -t 2 @@ -358,8 +346,8 @@ rule create_sam: config['minimap2'], config['samtools'] output: - sam = join(out_dir,'02_sam','{bc}_{masked_flag}.sam'), - sorted = join(out_dir,'02_sam','{bc}_{masked_flag}.sorted.sam') + sam = temp(join(out_dir,'02_sam','{bc}_{masked_flag}.sam')), + sorted = temp(join(out_dir,'02_sam','{bc}_{masked_flag}.sorted.sam')) shell: ''' minimap2 \ @@ -387,7 +375,7 @@ if (clean_up=="Y"): envmodules: config['singularity'], output: - o1 = join(out_dir,'02_sam_corrected','{bc}_{masked_flag}.sorted_clean.sam') + o1 = temp(join(out_dir,'02_sam_corrected','{bc}_{masked_flag}.sorted_clean.sam')) shell: ''' {params.sing_param} {params.doc} TranscriptClean.py --sam {input.f1} --genome {params.anno_fa} \ @@ -402,7 +390,7 @@ rule create_bam: envmodules: config['samtools'] output: - bam = join(out_dir,'03_bam','{bc}_{masked_flag}.bam'), + bam = temp(join(out_dir,'03_bam','{bc}_{masked_flag}.bam')), sorted = join(out_dir, '03_bam','{bc}_{masked_flag}.sorted.bam') shell: """ @@ -423,7 +411,7 @@ rule qc_fastq: envmodules: config['fastqc'] output: - o1 = join(out_dir, '04_qc','fastqc','{bc}_fastqc.html') + o1 = temp(join(out_dir, '04_qc','fastqc','{bc}_fastqc.html')) shell: """ fastqc {input.fq} -o {params.base} @@ -442,7 +430,7 @@ rule qc_samstats: envmodules: config['samtools'] output: - o1 = join(out_dir, '04_qc','samtools','{bc}_{masked_flag}_samstats.txt') + o1 = temp(join(out_dir, '04_qc','samtools','{bc}_{masked_flag}_samstats.txt')) shell: """ samtools view -h {input.f1} | samtools stats - > {output.o1} @@ -489,12 +477,12 @@ rule qc_alignment: config['samtools'], config['R'] output: - bam_a = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_align_len.txt'), - bam_u = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unalign_len.txt'), - png_align = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_aligned.png'), - png_unalign = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unaligned.png'), - txt_align = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_aligned.txt'), - txt_unalign = join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unaligned.txt'), + bam_a = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_align_len.txt')), + bam_u = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unalign_len.txt')), + png_align = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_aligned.png')), + png_unalign = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unaligned.png')), + txt_align = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_aligned.txt')), + txt_unalign = temp(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_unaligned.txt')), shell: """ samtools view -F 4 {input.f1} | awk '{{print length($10)}}' > {output.bam_a}; \ @@ -584,7 +572,7 @@ rule talon_db: anno_gtf = get_annotation_gtf, base = join(out_dir,'05_talon',build_id), output: - o1 = join(out_dir,'05_talon', build_id + '.db') + o1 = temp(join(out_dir,'05_talon', build_id + '.db')) envmodules: config['singularity'], shell: @@ -621,8 +609,8 @@ rule talon_prime: base_sample = join(out_dir,'05_talon','sam_labeled','{bc}'), p_len = primer_len, output: - o1 = join(out_dir,'05_talon','sam_labeled','{bc}_labeled.sam'), - o2 = join(out_dir,'05_talon','sam_labeled','{bc}_read_labels.tsv'), + o1 = temp(join(out_dir,'05_talon','sam_labeled','{bc}_labeled.sam')), + o2 = temp(join(out_dir,'05_talon','sam_labeled','{bc}_read_labels.tsv')), envmodules: config['singularity'], shell: @@ -805,7 +793,7 @@ rule merge_fq: rname = "06.1_flair_merge", base = join(out_dir,'01_fastq_trimmed') output: - o1 = join(out_dir,'06_flair','merged.fastq.gz') + o1 = temp(join(out_dir,'06_flair','merged.fastq.gz')) shell: ''' zcat {params.base}/*.fastq.gz | gzip -n -> {output.o1} @@ -839,14 +827,14 @@ rule flair_isoforms: envmodules: config['singularity'], output: - o1 = join(out_dir,'06_flair','isoforms','merged_flair.isoforms.fa'), - o2 = join(out_dir,'06_flair','isoforms','merged_flair.isoforms.gtf'), - o3 = join(out_dir,'06_flair','isoforms','merged_flair_all_corrected.bed'), - o4 = join(out_dir,'06_flair','isoforms','merged_flair_all_inconsistent.bed'), - o5 = join(out_dir,'06_flair','isoforms','merged_flair.bam'), - o6 = join(out_dir,'06_flair','isoforms','merged_flair.bed'), - o7 = join(out_dir,'06_flair','isoforms','merged_flair.isoforms.bed'), - o8 = join(out_dir,'06_flair','isoforms','merged_flair.sam'), + o1 = temp(join(out_dir,'06_flair','isoforms','merged_flair.isoforms.fa')), + o2 = temp(join(out_dir,'06_flair','isoforms','merged_flair.isoforms.gtf')), + o3 = temp(join(out_dir,'06_flair','isoforms','merged_flair_all_corrected.bed')), + o4 = temp(join(out_dir,'06_flair','isoforms','merged_flair_all_inconsistent.bed')), + o5 = temp(join(out_dir,'06_flair','isoforms','merged_flair.bam')), + o6 = temp(join(out_dir,'06_flair','isoforms','merged_flair.bed')), + o7 = temp(join(out_dir,'06_flair','isoforms','merged_flair.isoforms.bed')), + o8 = temp(join(out_dir,'06_flair','isoforms','merged_flair.sam')), shell: ''' {params.sing_param} {params.doc} flair.py 123 -g {params.anno_fa} \ @@ -877,7 +865,7 @@ rule flair_fastq: params: zip = join(out_dir,'06_flair','fastq','{bc}.fastq.gz') output: - o1 = join(out_dir,'06_flair','fastq','{bc}.fastq') + o1 = temp(join(out_dir,'06_flair','fastq','{bc}.fastq')) shell: ''' cp {input.f1} {params.zip}; \ From e102ed7a913e10e585263456b3fce7a7e15b05d1 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Thu, 29 Jul 2021 12:57:02 -0400 Subject: [PATCH 3/9] rename report rule --- workflow/Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index c010891..a3eef99 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -940,14 +940,14 @@ rule flair_deg: {params.sing_param} {params.docs} bash -c "python3 /opt2/flair/bin/diff_iso_usage.py {input.f1} {params.groupid} {output.o1}" ''' -rule abundance_plots: +rule final_report: input: unfilt = join(out_dir,'05_talon','counts', build_id + '_talon_abundance.tsv'), filt = join(out_dir,'05_talon','counts', build_id + '_talon_abundance_filtered.tsv'), flair = join(out_dir,'06_flair','counts','flair_counts_matrix.tsv'), degs = expand(join(out_dir,'07_deg','deg_iso_{group_id}.txt'),group_id=deg_list) params: - rname = "07_abundance_plots", + rname = "07_final_report", R = join(source_dir,"workflow","scripts","03_transcript_types.Rmd"), base = join(out_dir,'08_report'), log = join(out_dir,'log'), From 1aa2317987c221318a32b2482b6eb72e76c9415a Mon Sep 17 00:00:00 2001 From: slsevilla Date: Thu, 29 Jul 2021 16:33:54 -0400 Subject: [PATCH 4/9] create bam outputs filtered by transcript novelty --- workflow/Snakefile | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index a3eef99..5b63154 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -232,6 +232,7 @@ rule all: join(out_dir,'05_talon','counts', build_id + '_whitelist.txt'), join(out_dir,'05_talon','counts', build_id + '_talon_abundance_filtered.tsv'), join(out_dir,'05_talon','gtf', build_id + '_talon.gtf'), + expand(join(out_dir,'05_talon','transcript_filtered','{bc}_Known.bam'),bc=bc_list), #flair join(out_dir,'06_flair','merged.fastq.gz'), @@ -784,6 +785,48 @@ rule talon_gtf: --o {params.base} ''' +if (masked_refs == "Y"): + rule create_masked_outputs: + ''' + http://broadinstitute.github.io/picard/command-line-overview.html#FilterSamReads + + Using talon annotations, BAM files are created for the transcript_novelty category. The following + are categories that can be found in a sample: Antisense, Genomic, ISM, Known, NIC, NNC + ''' + input: + anno = join(out_dir,'05_talon','annotate', build_id + '_talon_read_annot.tsv'), + sam = join(out_dir,'05_talon','sam_labeled','{bc}_labeled.sam') + params: + rname = "todo", + base = join(out_dir,'05_talon','transcript_filtered','{bc}_') + envmodules: + config['java'], + config['picard'], + config['samtools'], + output: + bam = temp(join(out_dir,'05_talon','transcript_filtered','{bc}_labeled.bam')), + cat_list = temp(join(out_dir,'05_talon','transcript_filtered','{bc}_category_list.txt')), + filt = join(out_dir,'05_talon','transcript_filtered','{bc}_Known.bam'), + shell: + """ + #determine which transcript categories are present in samples, skip header + awk '(NR>1)' {input.anno} | cut -f17 - | sort | uniq > {output.cat_list}; + + #create bam + samtools view -bS {input.sam}> {output.bam}; + + #create read list, create subset bam file + while read p; do \ + cat {input.anno} | awk -v p="$p" '$17 == '"p"'' | cut -f1 > {params.base}${{p}}_readlist.txt; + + java -jar $PICARDJARPATH/picard.jar FilterSamReads \ + I={output.bam} \ + O={params.base}${{p}}.bam \ + READ_LIST_FILE={params.base}${{p}}_readlist.txt \ + FILTER=includeReadList + done <{output.cat_list}; + """ + rule merge_fq: ''' ''' From c7759862ba64c9ae2e6c64dddc520a859539a290 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 30 Jul 2021 11:24:46 -0400 Subject: [PATCH 5/9] change output req for create_masked_outputs --- workflow/Snakefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 5b63154..3a10cdd 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -232,8 +232,8 @@ rule all: join(out_dir,'05_talon','counts', build_id + '_whitelist.txt'), join(out_dir,'05_talon','counts', build_id + '_talon_abundance_filtered.tsv'), join(out_dir,'05_talon','gtf', build_id + '_talon.gtf'), - expand(join(out_dir,'05_talon','transcript_filtered','{bc}_Known.bam'),bc=bc_list), - + expand(join(out_dir,'05_talon','transcript_filtered','{bc}_category_list.txt'),bc=bc_list), + #flair join(out_dir,'06_flair','merged.fastq.gz'), join(out_dir,'06_flair','isoforms','merged_flair.isoforms.fa'), @@ -803,10 +803,10 @@ if (masked_refs == "Y"): config['java'], config['picard'], config['samtools'], + config['bedtools'] output: bam = temp(join(out_dir,'05_talon','transcript_filtered','{bc}_labeled.bam')), cat_list = temp(join(out_dir,'05_talon','transcript_filtered','{bc}_category_list.txt')), - filt = join(out_dir,'05_talon','transcript_filtered','{bc}_Known.bam'), shell: """ #determine which transcript categories are present in samples, skip header @@ -815,7 +815,7 @@ if (masked_refs == "Y"): #create bam samtools view -bS {input.sam}> {output.bam}; - #create read list, create subset bam file + #create read list, create subset bam file, sam file while read p; do \ cat {input.anno} | awk -v p="$p" '$17 == '"p"'' | cut -f1 > {params.base}${{p}}_readlist.txt; @@ -823,8 +823,13 @@ if (masked_refs == "Y"): I={output.bam} \ O={params.base}${{p}}.bam \ READ_LIST_FILE={params.base}${{p}}_readlist.txt \ - FILTER=includeReadList - done <{output.cat_list}; + FILTER=includeReadList; + + samtools view -h -o {params.base}${{p}}.sam {params.base}${{p}}.bam; + + bedtools bamtofastq -i {params.base}${{p}}.bam -fq {params.base}${{p}}.fastq; + done <{output.cat_list}; + """ rule merge_fq: From 5f408792c432c81d2ad6af64fd6eabb60efd413c Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 30 Jul 2021 11:33:00 -0400 Subject: [PATCH 6/9] add config req for java and picard --- config/snakemake_config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/snakemake_config.yaml b/config/snakemake_config.yaml index 0b620dc..07965e3 100644 --- a/config/snakemake_config.yaml +++ b/config/snakemake_config.yaml @@ -46,4 +46,6 @@ fastqc: "fastqc/0.11.9" multiqc: "multiqc/1.9" Qt: "Qt/5.13.2" samtools: "samtools/1.11" -R: "R/4.0" \ No newline at end of file +R: "R/4.0" +java: "java/12.0.1" +picard: "picard/2.25.0" \ No newline at end of file From 67d305f29f798fb3bb3f0d9fef951e04494c8fd3 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 30 Jul 2021 11:35:00 -0400 Subject: [PATCH 7/9] remove unused code, unhide rule all req --- workflow/Snakefile | 38 ++++---------------------------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 3a10cdd..2bb3e31 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -205,9 +205,9 @@ rule all: input_annotation, #input fastq files - #expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), - #expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), - #xpand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), + expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), + expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), + expand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), #sam files input_sam, @@ -1023,34 +1023,4 @@ rule final_report: clean_up = "{params.clean}", \ num_match = "{params.num_match}", \ deg_list = "{input.degs}"))' - ''' - -# rule squanti: -# ''' -# remove params: -# --polyA_motif_list polyA.list -# --cage_peak {params.cage} \ - -# for short read data only: -# --expression rsemQuantification.chr13.isoforms.results -# -c star.SJ.out.tab \ - -# sqanti3_qc.py --gtf {input.gtf} {params.anno} --fl_count {input.counts} --isoAnnotLite --gff3 {params.gff} - -# ''' -# input: -# gtf = join(out_dir,'05_talon','gtf', build_id + '_talon.gtf'), -# counts = join(out_dir,'05_talon','counts', build_id + '_talon_abundance_filtered.tsv') -# params: -# rname = "11_sqanti", -# anno = anno_gtf + " " + anno_fa, -# gff = anno_gff -# container: "docker://nciccbr/ccbr_sqanti_3:latest" -# output: -# o1 = join(out_dir,'tbd') -# shell: -# ''' -# sqanti3_qc.py --gtf /data/sevillas2/rbl3/09_gtf/SIRV_talon.gtf /data/CCBR_Pipeliner/db/PipeDB/Indices/GTFs/hg38/gencode.v30.annotation.gtf \ -# /data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_30/ref.fa --fl_count /data/sevillas2/rbl3/08_counts/SIRV_talon_abundance_filtered.tsv \ -# --isoAnnotLite --gff3 /data/CCBR/projects/rbl3/dependencies/Homo_sapiens_GRCh38_Ensembl_86.gff3 -# ''' \ No newline at end of file + ''' \ No newline at end of file From a64ccfe10fefcec8f9aff1070a3ee0ae8770bd95 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 30 Jul 2021 11:42:49 -0400 Subject: [PATCH 8/9] comment out not req outputs --- workflow/Snakefile | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 2bb3e31..3b0234d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -204,28 +204,28 @@ rule all: #input annotation files input_annotation, - #input fastq files - expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), - expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), - expand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), + # #input fastq files + # expand(join(fastq_dir,'{bc}.fastq'),bc=bc_list), + # expand(join(out_dir,'01_fastq','{bc}.fastq.gz'),bc=bc_list), + # expand(join(out_dir,'01_fastq_trimmed','{bc}.fastq.gz'),bc=bc_list), - #sam files - input_sam, + # #sam files + # input_sam, #bam files expand(join(out_dir,'03_bam','{bc}_{masked_flag}.sorted.bam'),bc=bc_list, masked_flag=masked_list), #qc - expand(join(out_dir, '04_qc','fastqc','{bc}_fastqc.html'), bc=bc_list), - expand(join(out_dir, '04_qc','samtools','{bc}_{masked_flag}_samstats.txt'), bc=bc_list, masked_flag=masked_list), + # expand(join(out_dir, '04_qc','fastqc','{bc}_fastqc.html'), bc=bc_list), + # expand(join(out_dir, '04_qc','samtools','{bc}_{masked_flag}_samstats.txt'), bc=bc_list, masked_flag=masked_list), join(out_dir,'04_qc','multiqc_report.html'), - expand(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_align_len.txt'), bc=bc_list, masked_flag=masked_list), + # expand(join(out_dir, '04_qc', 'alignment','{bc}_{masked_flag}_align_len.txt'), bc=bc_list, masked_flag=masked_list), join(out_dir,'04_qc','qc_report.html'), #talon join(out_dir,'05_talon', 'talon_config.csv'), - join(out_dir,'05_talon', build_id + '.db'), - expand(join(out_dir,'05_talon','sam_labeled','{bc}_labeled.sam'),bc=bc_list), + # join(out_dir,'05_talon', build_id + '.db'), + # expand(join(out_dir,'05_talon','sam_labeled','{bc}_labeled.sam'),bc=bc_list), join(out_dir,'05_talon','annotate', build_id + '_talon_read_annot.tsv'), join(out_dir,'05_talon','counts', build_id + '_talon_summary.tsv'), join(out_dir,'05_talon','counts', build_id + '_talon_abundance.tsv'), @@ -235,10 +235,10 @@ rule all: expand(join(out_dir,'05_talon','transcript_filtered','{bc}_category_list.txt'),bc=bc_list), #flair - join(out_dir,'06_flair','merged.fastq.gz'), - join(out_dir,'06_flair','isoforms','merged_flair.isoforms.fa'), + # join(out_dir,'06_flair','merged.fastq.gz'), + # join(out_dir,'06_flair','isoforms','merged_flair.isoforms.fa'), join(out_dir,'06_flair','flair_config.csv'), - expand(join(out_dir,'06_flair','fastq','{bc}.fastq'),bc=bc_list), + # expand(join(out_dir,'06_flair','fastq','{bc}.fastq'),bc=bc_list), join(out_dir,'06_flair','counts','flair_counts_matrix.tsv'), expand(join(out_dir,'07_deg','deg_iso_{group_id}.txt'),group_id=deg_list), From b7696a54434ff72351d1474e974efbdb7742ed02 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 30 Jul 2021 11:43:53 -0400 Subject: [PATCH 9/9] update to v1.3 --- config/snakemake_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/snakemake_config.yaml b/config/snakemake_config.yaml index 07965e3..0c137ec 100644 --- a/config/snakemake_config.yaml +++ b/config/snakemake_config.yaml @@ -1,6 +1,6 @@ # Global configuration file for the pipeline #path to rbl3 directory -sourceDir: "ls /data/RBL_NCI/Pipelines/Talon_Flair/v1.0/RBL_RBL3/" +sourceDir: "ls /data/RBL_NCI/Pipelines/Talon_Flair/v1.3/RBL_RBL3/" #path to output directory outputDir: "/path/to/output/dir/"