Skip to content

Commit

Permalink
feat: Allow bam input files (#94)
Browse files Browse the repository at this point in the history
This PR allows bam files as input via the fields `bam_single` or
`bam_paired` in `units.tsv`.

---------

Co-authored-by: Johannes Köster <[email protected]>
  • Loading branch information
fxwiegand and johanneskoester authored Jun 5, 2024
1 parent 6d98a39 commit 4a1f983
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .test/config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
A 1 ngs-test-data/reads/a.chr21.1.fq ngs-test-data/reads/a.chr21.2.fq
B 1 ngs-test-data/reads/b.chr21.1.fq ngs-test-data/reads/b.chr21.2.fq
B 2 300 14 ngs-test-data/reads/b.chr21.1.fq
Expand Down
2 changes: 1 addition & 1 deletion .test/three_prime/config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
SRR8309096 u1 430 43 quant_seq_test_data/SRR8309096.fastq.gz
SRR8309094 u1 430 43 quant_seq_test_data/SRR8309094.fastq.gz
SRR8309095 u1 430 43 quant_seq_test_data/SRR8309095.fastq.gz
Expand Down
2 changes: 1 addition & 1 deletion config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
A 1 raw/a.chr21.1.fq raw/a.chr21.2.fq
B 1 raw/b.chr21.1.fq raw/b.chr21.2.fq
B 2 300 14 raw/b.chr21.1.fq
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ include: "rules/diffexp.smk"
include: "rules/diffsplice.smk"
include: "rules/enrichment.smk"
include: "rules/datavzrd.smk"
include: "rules/bam.smk"


rule all:
Expand Down
33 changes: 33 additions & 0 deletions workflow/rules/bam.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
rule bam_paired_to_fastq:
input:
lookup(
query="sample == '{sample}' & unit == '{unit}'",
within=units,
cols="bam_paired",
),
output:
"results/fastq/{sample}-{unit}.1.fq.gz",
"results/fastq/{sample}-{unit}.2.fq.gz",
log:
"logs/fastq/{sample}-{unit}.separate.log",
params:
fastq="-n",
threads: 3
wrapper:
"v3.10.2/bio/samtools/fastq/separate"


rule bam_single_to_fastq:
input:
lookup(
query="sample == '{sample}' & unit == '{unit}'",
within=units,
cols="bam_single",
),
output:
"results/fastq/{sample}-{unit}.fq.gz",
log:
"logs/fastq/{sample}-{unit}.interleaved.log",
threads: 3
wrapper:
"v3.10.2/bio/samtools/fastq/interleaved"
11 changes: 9 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,20 @@ def get_model(wildcards):

def is_single_end(sample, unit):
"""Determine whether unit is single-end."""
bam_paired_not_present = pd.isnull(units.loc[(sample, unit), "bam_paired"])
fq2_not_present = pd.isnull(units.loc[(sample, unit), "fq2"])
return fq2_not_present
return fq2_not_present and bam_paired_not_present


def get_fastqs(wildcards):
"""Get raw FASTQ files from unit sheet."""
if is_single_end(wildcards.sample, wildcards.unit):
if not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_single"]):
return f"results/fastq/{wildcards.sample}-{wildcards.unit}.fq.gz"
elif not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_paired"]):
fqfrombam1 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.1.fq.gz"
fqfrombam2 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.2.fq.gz"
return [fqfrombam1, fqfrombam2]
elif is_single_end(wildcards.sample, wildcards.unit):
return units.loc[(wildcards.sample, wildcards.unit), "fq1"]
else:
u = units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()
Expand Down
8 changes: 5 additions & 3 deletions workflow/rules/quant.smk
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
rule kallisto_index:
input:
fasta="resources/transcriptome.cdna.without_poly_a.fasta"
if is_3prime_experiment
else "resources/transcriptome.cdna.fasta",
fasta=(
"resources/transcriptome.cdna.without_poly_a.fasta"
if is_3prime_experiment
else "resources/transcriptome.cdna.fasta"
),
output:
index="results/kallisto_cdna/transcripts.cdna.idx",
log:
Expand Down
11 changes: 8 additions & 3 deletions workflow/schemas/units.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ properties:
description: unit id
fq1:
type: string
description: path to FASTQ file
description: path to FASTQ file (leave empty in case usage of bam_single or bam_paired)
fq2:
type: string
description: path to second FASTQ file (leave empty in case of single-end)
description: path to second FASTQ file (leave empty in case of single-end or usage of bam_single or bam_paired)
bam_single:
type: string
description: path to single bam file (leave empty in case of usage of fastq files)
bam_paired:
type: string
description: path to paired bam file (leave empty in case of usage of fastq files)
required:
- sample
- unit
- fq1

0 comments on commit 4a1f983

Please sign in to comment.