diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e4e0f2fb..12e46347 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,6 +42,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Formatting uses: github/super-linter@v5 env: diff --git a/.test/config/units.tsv b/.test/config/units.tsv index 8154e6ce..75d9b71c 100644 --- a/.test/config/units.tsv +++ b/.test/config/units.tsv @@ -1,4 +1,4 @@ -sample unit fragment_len_mean fragment_len_sd fq1 fq2 +sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired A 1 ngs-test-data/reads/a.chr21.1.fq ngs-test-data/reads/a.chr21.2.fq B 1 ngs-test-data/reads/b.chr21.1.fq ngs-test-data/reads/b.chr21.2.fq B 2 300 14 ngs-test-data/reads/b.chr21.1.fq diff --git a/.test/three_prime/config/units.tsv b/.test/three_prime/config/units.tsv index e83b4e11..0bf90874 100644 --- a/.test/three_prime/config/units.tsv +++ b/.test/three_prime/config/units.tsv @@ -1,4 +1,4 @@ -sample unit fragment_len_mean fragment_len_sd fq1 fq2 +sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired SRR8309096 u1 430 43 quant_seq_test_data/SRR8309096.fastq.gz SRR8309094 u1 430 43 quant_seq_test_data/SRR8309094.fastq.gz SRR8309095 u1 430 43 quant_seq_test_data/SRR8309095.fastq.gz diff --git a/CHANGELOG.md b/CHANGELOG.md index bc2b25c1..15a861a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [2.6.0](https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/compare/v2.5.5...v2.6.0) (2024-06-05) + + +### Features + +* Allow bam input files ([#94](https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/issues/94)) ([4a1f983](https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/commit/4a1f98320ab1b5f099941f3cd62acef7f861d631)) + ## [2.5.4](https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/compare/v2.5.3...v2.5.4) (2024-01-31) diff --git a/config/units.tsv b/config/units.tsv index 488b4271..a9c2b2ab 100644 --- a/config/units.tsv +++ b/config/units.tsv @@ -1,4 +1,4 @@ -sample unit fragment_len_mean fragment_len_sd fq1 fq2 +sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired A 1 raw/a.chr21.1.fq raw/a.chr21.2.fq B 1 raw/b.chr21.1.fq raw/b.chr21.2.fq B 2 300 14 raw/b.chr21.1.fq diff --git a/workflow/Snakefile b/workflow/Snakefile index 1d3c8401..276afc26 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -25,6 +25,7 @@ include: "rules/diffexp.smk" include: "rules/diffsplice.smk" include: "rules/enrichment.smk" include: "rules/datavzrd.smk" +include: "rules/bam.smk" rule all: diff --git a/workflow/rules/bam.smk b/workflow/rules/bam.smk new file mode 100644 index 00000000..f1bffd1e --- /dev/null +++ b/workflow/rules/bam.smk @@ -0,0 +1,33 @@ +rule bam_paired_to_fastq: + input: + lookup( + query="sample == '{sample}' & unit == '{unit}'", + within=units, + cols="bam_paired", + ), + output: + "results/fastq/{sample}-{unit}.1.fq.gz", + "results/fastq/{sample}-{unit}.2.fq.gz", + log: + "logs/fastq/{sample}-{unit}.separate.log", + params: + fastq="-n", + threads: 3 + wrapper: + "v3.10.2/bio/samtools/fastq/separate" + + +rule bam_single_to_fastq: + input: + lookup( + query="sample == '{sample}' & unit == '{unit}'", + within=units, + cols="bam_single", + ), + output: + "results/fastq/{sample}-{unit}.fq.gz", + log: + "logs/fastq/{sample}-{unit}.interleaved.log", + threads: 3 + wrapper: + "v3.10.2/bio/samtools/fastq/interleaved" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 845345cb..cf6f5355 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -88,13 +88,20 @@ def get_model(wildcards): def is_single_end(sample, unit): """Determine whether unit is single-end.""" + bam_paired_not_present = pd.isnull(units.loc[(sample, unit), "bam_paired"]) fq2_not_present = pd.isnull(units.loc[(sample, unit), "fq2"]) - return fq2_not_present + return fq2_not_present and bam_paired_not_present def get_fastqs(wildcards): """Get raw FASTQ files from unit sheet.""" - if is_single_end(wildcards.sample, wildcards.unit): + if not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_single"]): + return f"results/fastq/{wildcards.sample}-{wildcards.unit}.fq.gz" + elif not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_paired"]): + fqfrombam1 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.1.fq.gz" + fqfrombam2 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.2.fq.gz" + return [fqfrombam1, fqfrombam2] + elif is_single_end(wildcards.sample, wildcards.unit): return units.loc[(wildcards.sample, wildcards.unit), "fq1"] else: u = units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna() diff --git a/workflow/schemas/units.schema.yaml b/workflow/schemas/units.schema.yaml index 308253b0..569c8cba 100644 --- a/workflow/schemas/units.schema.yaml +++ b/workflow/schemas/units.schema.yaml @@ -10,11 +10,16 @@ properties: description: unit id fq1: type: string - description: path to FASTQ file + description: path to FASTQ file (leave empty in case usage of bam_single or bam_paired) fq2: type: string - description: path to second FASTQ file (leave empty in case of single-end) + description: path to second FASTQ file (leave empty in case of single-end or usage of bam_single or bam_paired) + bam_single: + type: string + description: path to single bam file (leave empty in case of usage of fastq files) + bam_paired: + type: string + description: path to paired bam file (leave empty in case of usage of fastq files) required: - sample - unit - - fq1