Skip to content

Commit

Permalink
feat!: igvf outputs (kircherlab#129)
Browse files Browse the repository at this point in the history
* refactor: removed statistics from final barcode to oligo map

* refactor outputs

* fix scripts due to renaming headers

* fix assignment statistic due to new output

* refactor!: moving files. not attched counts are not used as well as median for scaling

* adding logs

---------

Co-authored-by: Max Schubach <[email protected]>
  • Loading branch information
visze and Max Schubach authored Oct 28, 2024
1 parent e647309 commit b112d9c
Show file tree
Hide file tree
Showing 19 changed files with 426 additions and 253 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ logs
!config/*
!resources
!resources/**
resources/**/.local
resources/**/.cache
resources/**/.ipython
!workflow
!workflow/**
!.gitattributes
Expand All @@ -27,4 +30,4 @@ mix_data
*report.html
*.simg
*results
.DS_Store
.DS_Store
18 changes: 18 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@ rule all:
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.combined.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.combined.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
"results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
"results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
]
),
getOutputProjectConditionAssignmentConfigThreshold_helper(
[
"results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
"results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
]
),
# assignment statistic
Expand Down Expand Up @@ -263,6 +272,15 @@ rule all_experiments:
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.combined.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.combined.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
"results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
"results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
]
),
getOutputProjectConditionAssignmentConfigThreshold_helper(
[
"results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
"results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
]
),
# assignment statistic
Expand Down
66 changes: 58 additions & 8 deletions workflow/rules/assigned_counts.smk
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ rule assigned_counts_assignBarcodes:
script=getScript("count/merge_BC_and_assignment.py"),
output:
counts="results/experiments/{project}/assigned_counts/{assignment}/{condition}_{replicate}_{type}_final_counts.config.{config}.tsv.gz",
stats="results/experiments/{project}/statistic/assigned_counts/{assignment}/{condition}_{replicate}_{type}_{config}.statistic.tsv.gz",
statistic=temp(
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{condition}_{replicate}_{type}_{config}.statistic.tsv.gz"
),
params:
name="{condition}_{replicate}_{type}",
log:
Expand All @@ -79,7 +81,7 @@ rule assigned_counts_assignBarcodes:
python {input.script} --counts {input.counts} \
--assignment {input.association} \
--output {output.counts} \
--statistic {output.stats} \
--statistic {output.statistic} \
--name {params.name} &> {log}
"""

Expand All @@ -97,7 +99,9 @@ rule assigned_counts_dna_rna_merge:
output:
counts="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.tsv.gz",
bc_counts="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_barcode_assigned_counts.tsv.gz",
stats="results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.statistic.tsv.gz",
statistic=temp(
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.statistic.tsv.gz"
),
params:
minRNACounts=lambda wc: config["experiments"][wc.project]["configs"][
wc.config
Expand All @@ -116,7 +120,7 @@ rule assigned_counts_dna_rna_merge:
--assignment {input.association} \
--output {output.counts} \
--bcOutput {output.bc_counts} \
--statistic {output.stats} &> {log}
--statistic {output.statistic} &> {log}
"""


Expand All @@ -137,7 +141,6 @@ rule assigned_counts_make_master_tables:
all="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.tsv.gz",
thresh="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.tsv.gz",
params:
cond="{condition}",
files=lambda wc: ",".join(
expand(
"results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.tsv.gz",
Expand All @@ -161,7 +164,6 @@ rule assigned_counts_make_master_tables:
shell:
"""
Rscript {input.script} \
--condition {params.cond} \
--threshold {params.thresh} \
--files {params.files} \
--replicates {params.replicates} \
Expand All @@ -185,7 +187,8 @@ rule assigned_counts_combine_replicates_barcode_output:
),
script=getScript("count/merge_replicates_barcode_counts.py"),
output:
bc_merged="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
bc_merged_thresh="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
bc_merged_all="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
params:
thresh=lambda wc: config["experiments"][wc.project]["configs"][wc.config][
"filter"
Expand Down Expand Up @@ -218,7 +221,8 @@ rule assigned_counts_combine_replicates_barcode_output:
python {input.script} {params.bc_counts} \
--threshold {params.thresh} \
{params.replicates} \
--output {output.bc_merged} &> {log}
--output-threshold {output.bc_merged_thresh} \
--output {output.bc_merged_all} &> {log}
"""


Expand Down Expand Up @@ -250,3 +254,49 @@ rule assigned_counts_combine_replicates:
{params.label_file} \
--output {output} &> {log}
"""


rule assigned_counts_copy_final_all_files:
"""
Will copy final files to the main folder so that it is creal which files to use.
"""
conda:
"../envs/default.yaml"
input:
all=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.tsv.gz",
bc_all=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
output:
all="results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
bc_all="results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
log:
temp(
"results/logs/assigned_counts/copy_final_all_files.{project}.{condition}.{assignment}.{config}.log"
),
shell:
"""
cp {input.all} {output.all} &> {log}
cp {input.bc_all} {output.bc_all} &>> {log}
"""


rule assigned_counts_copy_final_thresh_files:
"""
Will copy final files to the main folder so that it is creal which files to use.
"""
conda:
"../envs/default.yaml"
input:
thresh=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.tsv.gz",
bc_thresh=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
output:
thresh="results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
bc_thresh="results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
log:
temp(
"results/logs/assigned_counts/copy_final_thresh_files.{project}.{condition}.{assignment}.{config}.{threshold}.log"
),
shell:
"""
cp {input.thresh} {output.thresh} &> {log}
cp {input.bc_thresh} {output.bc_thresh} &>> {log}
"""
2 changes: 1 addition & 1 deletion workflow/rules/assignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ rule assignment_filter:
python {input.script} \
-m {params.min_support} -f {params.fraction} {params.unknown_other} {params.ambiguous} | \
tee >(gzip -c > {output.ambigous}) | \
awk -v "OFS=\\t" -F"\\t" '{{ if (($2 != \"ambiguous\") && ($2 != \"other\")) {{ print $0 }} }}' | \
awk -v "OFS=\\t" -F"\\t" '{{ if (($2 != \"ambiguous\") && ($2 != \"other\")) {{ print $1,$2 }} }}' | \
gzip -c > {output.final} 2> {log.err};
gzip -l {output.final} | awk 'NR==2 {{exit($2==0)}}' || {{ echo "Error: Empty barcode file {output.final}. No barcodes detected!" >> {log.err}; exit 1; }}
"""
2 changes: 1 addition & 1 deletion workflow/rules/assignment/statistic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ rule assignment_statistic_assignment:
conda:
"../../envs/r.yaml"
input:
bc="results/assignment/{assignment}/assignment_barcodes.{assignment_config}.tsv.gz",
bc="results/assignment/{assignment}/assignment_barcodes_with_ambigous.{assignment_config}.tsv.gz",
script=getScript("assignment/statistic_assignment.R"),
output:
stats="results/assignment/{assignment}/statistic/assignment.{assignment_config}.tsv.gz",
Expand Down
28 changes: 28 additions & 0 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,34 @@ def getOutputProjectConditionAssignmentConfig_helper(files):
return output


def getOutputProjectConditionAssignmentConfigThreshold_helper(files):
"""
Inserts {project}, {condition}, {assignment} {config} (from configs of project) and Threshold from config into given file.
"""
output = []
projects = getProjects()
for project in projects:
try:
conditions = getConditions(project)
for condition in conditions:
for conf in getConfigs(project):
threshold = config["experiments"][project]["configs"][conf][
"filter"
]["bc_threshold"]
for file in files:
output += expand(
file,
project=project,
condition=condition,
assignment=getProjectAssignments(project),
config=conf,
threshold=threshold,
)
except MissingAssignmentInConfigException:
continue
return output


def getOutputProjectAssignmentConfig_helper(files, betweenReplicates=False):
"""
Inserts {project}, {assignment} and {config} (from configs of project) from config into given file.
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/statistic/assigned_counts.smk
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ rule statistic_assigned_counts_combine_stats_dna_rna_merge:
),
script=getScript("count/merge_statistic_tables.py"),
output:
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/combined/{condition}_merged_assigned_counts.statistic.tsv.gz",
temp(
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/combined/{condition}_merged_assigned_counts.statistic.tsv.gz"
),
params:
cond="{condition}",
statistic=lambda wc: " ".join(
Expand Down
12 changes: 6 additions & 6 deletions workflow/rules/statistic/bc_overlap.smk
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ rule statistic_bc_overlap_combine_counts:
conda:
"../../envs/default.yaml"
input:
stats=lambda wc: expand(
statistic=lambda wc: expand(
"results/experiments/{{project}}/statistic/bc_overlap/counts/overlapBCandCounts.{condition}_{type}.{config}.tsv",
type=["DNA", "RNA"],
condition=getConditions(wc.project),
Expand All @@ -74,8 +74,8 @@ rule statistic_bc_overlap_combine_counts:
"""
set +o pipefail;
(
cat {input.stats[0]} | head -n 1;
for i in {input.stats}; do
cat {input.statistic[0]} | head -n 1;
for i in {input.statistic}; do
cat $i | tail -n +2
done;
) > {output} 2> {log}
Expand All @@ -86,7 +86,7 @@ rule statistic_bc_overlap_combine_assigned_counts:
conda:
"../../envs/default.yaml"
input:
stats=lambda wc: expand(
statistic=lambda wc: expand(
"results/experiments/{{project}}/statistic/bc_overlap/assigned_counts/{{assignment}}/overlapBCandCounts.{condition}_{type}.{{config}}.tsv",
type=["DNA", "RNA"],
condition=getConditions(wc.project),
Expand All @@ -111,8 +111,8 @@ rule statistic_bc_overlap_combine_assigned_counts:
"""
set +o pipefail;
(
cat {input.stats[0]} | head -n 1;
for i in {input.stats}; do
cat {input.statistic[0]} | head -n 1;
for i in {input.statistic}; do
cat $i | tail -n +2
done;
) > {output} 2> {log}
Expand Down
12 changes: 9 additions & 3 deletions workflow/rules/statistic/correlation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ rule statistic_correlation_bc_counts:
"Plot": "Ratio",
},
),
"results/experiments/{project}/statistic/barcode/{raw_or_assigned}/{condition}_{config}_barcode_correlation.tsv",
temp(
"results/experiments/{project}/statistic/barcode/{raw_or_assigned}/{condition}_{config}_barcode_correlation.tsv"
),
params:
replicates=lambda wc: ",".join(
getMergedCounts(wc.project, wc.raw_or_assigned, wc.condition, wc.config)[1]
Expand Down Expand Up @@ -287,8 +289,12 @@ rule statistic_correlation_calculate:
),
},
),
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation.tsv",
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation_minThreshold.tsv",
temp(
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation.tsv"
),
temp(
"results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation_minThreshold.tsv"
),
params:
cond="{condition}",
files=lambda wc: ",".join(
Expand Down
Loading

0 comments on commit b112d9c

Please sign in to comment.