From b112d9c12216e2c15c38af9ac47612b2dc05d7a5 Mon Sep 17 00:00:00 2001
From: Max <visze@users.noreply.github.com>
Date: Mon, 28 Oct 2024 14:36:58 +0100
Subject: [PATCH] feat!: igvf outputs (#129)

* refactor: removed statistics from final barcode to oligo map

* refactor outputs

* fix scripts due to renaming headers

* fix assignment statistic due to new output

* refactor!: moving files. not attched counts are not used as well as median for scaling

* adding logs

---------

Co-authored-by: Max Schubach <max.schubach@bih-charite.de>
---
 .gitignore                                    |   5 +-
 workflow/Snakefile                            |  18 ++
 workflow/rules/assigned_counts.smk            |  66 +++++++-
 workflow/rules/assignment.smk                 |   2 +-
 workflow/rules/assignment/statistic.smk       |   2 +-
 workflow/rules/common.smk                     |  28 ++++
 workflow/rules/statistic/assigned_counts.smk  |   4 +-
 workflow/rules/statistic/bc_overlap.smk       |  12 +-
 workflow/rules/statistic/correlation.smk      |  12 +-
 .../scripts/assignment/statistic_assignment.R |  82 ++++-----
 workflow/scripts/count/combine_replicates.py  |  22 ++-
 workflow/scripts/count/make_master_tables.R   | 156 +++++++++++-------
 workflow/scripts/count/merge_label.py         |  68 +++++---
 .../count/merge_replicates_barcode_counts.py  | 131 ++++++++-------
 .../count/plot_perInsertCounts_correlation.R  |  14 +-
 .../count/plot_perInsertCounts_stats.R        |  19 +--
 .../variants/correlateVariantTables.py        |   2 +-
 .../variants/generateMasterVariantTable.py    |  28 ++--
 .../scripts/variants/generateVariantTable.py  |   8 +-
 19 files changed, 426 insertions(+), 253 deletions(-)

diff --git a/.gitignore b/.gitignore
index 38d5e812..9267b83a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,9 @@ logs
 !config/*
 !resources
 !resources/**
+resources/**/.local
+resources/**/.cache
+resources/**/.ipython
 !workflow
 !workflow/**
 !.gitattributes
@@ -27,4 +30,4 @@ mix_data
 *report.html
 *.simg
 *results
-.DS_Store
\ No newline at end of file
+.DS_Store
diff --git a/workflow/Snakefile b/workflow/Snakefile
index c9eef965..6abde68b 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -134,6 +134,15 @@ rule all:
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.combined.tsv.gz",
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.combined.tsv.gz",
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
+                "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
+            ]
+        ),
+        getOutputProjectConditionAssignmentConfigThreshold_helper(
+            [
+                "results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
             ]
         ),
         # assignment statistic
@@ -263,6 +272,15 @@ rule all_experiments:
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.combined.tsv.gz",
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.combined.tsv.gz",
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
+                "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
+            ]
+        ),
+        getOutputProjectConditionAssignmentConfigThreshold_helper(
+            [
+                "results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
+                "results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
             ]
         ),
         # assignment statistic
diff --git a/workflow/rules/assigned_counts.smk b/workflow/rules/assigned_counts.smk
index ef0f1197..cb2c0699 100644
--- a/workflow/rules/assigned_counts.smk
+++ b/workflow/rules/assigned_counts.smk
@@ -67,7 +67,9 @@ rule assigned_counts_assignBarcodes:
         script=getScript("count/merge_BC_and_assignment.py"),
     output:
         counts="results/experiments/{project}/assigned_counts/{assignment}/{condition}_{replicate}_{type}_final_counts.config.{config}.tsv.gz",
-        stats="results/experiments/{project}/statistic/assigned_counts/{assignment}/{condition}_{replicate}_{type}_{config}.statistic.tsv.gz",
+        statistic=temp(
+            "results/experiments/{project}/statistic/assigned_counts/{assignment}/{condition}_{replicate}_{type}_{config}.statistic.tsv.gz"
+        ),
     params:
         name="{condition}_{replicate}_{type}",
     log:
@@ -79,7 +81,7 @@ rule assigned_counts_assignBarcodes:
         python {input.script} --counts {input.counts} \
         --assignment {input.association} \
         --output {output.counts} \
-        --statistic {output.stats} \
+        --statistic {output.statistic} \
         --name {params.name} &> {log}
         """
 
@@ -97,7 +99,9 @@ rule assigned_counts_dna_rna_merge:
     output:
         counts="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.tsv.gz",
         bc_counts="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_barcode_assigned_counts.tsv.gz",
-        stats="results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.statistic.tsv.gz",
+        statistic=temp(
+            "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.statistic.tsv.gz"
+        ),
     params:
         minRNACounts=lambda wc: config["experiments"][wc.project]["configs"][
             wc.config
@@ -116,7 +120,7 @@ rule assigned_counts_dna_rna_merge:
         --assignment {input.association} \
         --output {output.counts} \
         --bcOutput {output.bc_counts} \
-        --statistic {output.stats} &> {log}
+        --statistic {output.statistic} &> {log}
         """
 
 
@@ -137,7 +141,6 @@ rule assigned_counts_make_master_tables:
         all="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.tsv.gz",
         thresh="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.tsv.gz",
     params:
-        cond="{condition}",
         files=lambda wc: ",".join(
             expand(
                 "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_{replicate}_merged_assigned_counts.tsv.gz",
@@ -161,7 +164,6 @@ rule assigned_counts_make_master_tables:
     shell:
         """
         Rscript {input.script} \
-        --condition {params.cond} \
         --threshold {params.thresh} \
         --files {params.files} \
         --replicates {params.replicates} \
@@ -185,7 +187,8 @@ rule assigned_counts_combine_replicates_barcode_output:
         ),
         script=getScript("count/merge_replicates_barcode_counts.py"),
     output:
-        bc_merged="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
+        bc_merged_thresh="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
+        bc_merged_all="results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
     params:
         thresh=lambda wc: config["experiments"][wc.project]["configs"][wc.config][
             "filter"
@@ -218,7 +221,8 @@ rule assigned_counts_combine_replicates_barcode_output:
         python {input.script} {params.bc_counts} \
         --threshold {params.thresh} \
         {params.replicates}  \
-        --output {output.bc_merged} &> {log}
+        --output-threshold {output.bc_merged_thresh} \
+        --output {output.bc_merged_all} &> {log}
         """
 
 
@@ -250,3 +254,49 @@ rule assigned_counts_combine_replicates:
         {params.label_file} \
         --output {output}  &> {log}
         """
+
+
+rule assigned_counts_copy_final_all_files:
+    """
+    Will copy final files to the main folder so that it is creal which files to use.
+    """
+    conda:
+        "../envs/default.yaml"
+    input:
+        all=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged.tsv.gz",
+        bc_all=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_merged_barcode_assigned_counts.tsv.gz",
+    output:
+        all="results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.all.tsv.gz",
+        bc_all="results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.all.tsv.gz",
+    log:
+        temp(
+            "results/logs/assigned_counts/copy_final_all_files.{project}.{condition}.{assignment}.{config}.log"
+        ),
+    shell:
+        """
+        cp {input.all} {output.all} &> {log}
+        cp {input.bc_all} {output.bc_all} &>> {log}
+        """
+
+
+rule assigned_counts_copy_final_thresh_files:
+    """
+    Will copy final files to the main folder so that it is creal which files to use.
+    """
+    conda:
+        "../envs/default.yaml"
+    input:
+        thresh=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged.tsv.gz",
+        bc_thresh=lambda wc: "results/experiments/{project}/assigned_counts/{assignment}/{config}/{condition}_allreps_minThreshold_merged_barcode_assigned_counts.tsv.gz",
+    output:
+        thresh="results/experiments/{project}/reporter_experiment.oligo.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
+        bc_thresh="results/experiments/{project}/reporter_experiment.barcode.{condition}.{assignment}.{config}.min_oligo_threshold_{threshold}.tsv.gz",
+    log:
+        temp(
+            "results/logs/assigned_counts/copy_final_thresh_files.{project}.{condition}.{assignment}.{config}.{threshold}.log"
+        ),
+    shell:
+        """
+        cp {input.thresh} {output.thresh} &> {log}
+        cp {input.bc_thresh} {output.bc_thresh} &>> {log}
+        """
diff --git a/workflow/rules/assignment.smk b/workflow/rules/assignment.smk
index f3d7a572..620e2f20 100644
--- a/workflow/rules/assignment.smk
+++ b/workflow/rules/assignment.smk
@@ -248,7 +248,7 @@ rule assignment_filter:
         python {input.script} \
         -m {params.min_support} -f {params.fraction} {params.unknown_other} {params.ambiguous} | \
         tee >(gzip -c > {output.ambigous}) | \
-        awk -v "OFS=\\t"  -F"\\t" '{{ if (($2 != \"ambiguous\") && ($2 != \"other\")) {{ print $0 }} }}' | \
+        awk -v "OFS=\\t"  -F"\\t" '{{ if (($2 != \"ambiguous\") && ($2 != \"other\")) {{ print $1,$2 }} }}' | \
         gzip -c > {output.final} 2> {log.err};
         gzip -l {output.final} | awk 'NR==2 {{exit($2==0)}}' || {{ echo "Error: Empty barcode file {output.final}. No barcodes detected!" >> {log.err}; exit 1; }}
         """
diff --git a/workflow/rules/assignment/statistic.smk b/workflow/rules/assignment/statistic.smk
index 35b6c64f..3af3cfad 100644
--- a/workflow/rules/assignment/statistic.smk
+++ b/workflow/rules/assignment/statistic.smk
@@ -48,7 +48,7 @@ rule assignment_statistic_assignment:
     conda:
         "../../envs/r.yaml"
     input:
-        bc="results/assignment/{assignment}/assignment_barcodes.{assignment_config}.tsv.gz",
+        bc="results/assignment/{assignment}/assignment_barcodes_with_ambigous.{assignment_config}.tsv.gz",
         script=getScript("assignment/statistic_assignment.R"),
     output:
         stats="results/assignment/{assignment}/statistic/assignment.{assignment_config}.tsv.gz",
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index c0615c9b..63be7717 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -374,6 +374,34 @@ def getOutputProjectConditionAssignmentConfig_helper(files):
     return output
 
 
+def getOutputProjectConditionAssignmentConfigThreshold_helper(files):
+    """
+    Inserts {project}, {condition}, {assignment} {config} (from configs of project) and Threshold from config into given file.
+    """
+    output = []
+    projects = getProjects()
+    for project in projects:
+        try:
+            conditions = getConditions(project)
+            for condition in conditions:
+                for conf in getConfigs(project):
+                    threshold = config["experiments"][project]["configs"][conf][
+                        "filter"
+                    ]["bc_threshold"]
+                    for file in files:
+                        output += expand(
+                            file,
+                            project=project,
+                            condition=condition,
+                            assignment=getProjectAssignments(project),
+                            config=conf,
+                            threshold=threshold,
+                        )
+        except MissingAssignmentInConfigException:
+            continue
+    return output
+
+
 def getOutputProjectAssignmentConfig_helper(files, betweenReplicates=False):
     """
     Inserts {project}, {assignment} and {config} (from configs of project) from config into given file.
diff --git a/workflow/rules/statistic/assigned_counts.smk b/workflow/rules/statistic/assigned_counts.smk
index 9a32af44..15ae1ad7 100644
--- a/workflow/rules/statistic/assigned_counts.smk
+++ b/workflow/rules/statistic/assigned_counts.smk
@@ -86,7 +86,9 @@ rule statistic_assigned_counts_combine_stats_dna_rna_merge:
         ),
         script=getScript("count/merge_statistic_tables.py"),
     output:
-        "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/combined/{condition}_merged_assigned_counts.statistic.tsv.gz",
+        temp(
+            "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/combined/{condition}_merged_assigned_counts.statistic.tsv.gz"
+        ),
     params:
         cond="{condition}",
         statistic=lambda wc: " ".join(
diff --git a/workflow/rules/statistic/bc_overlap.smk b/workflow/rules/statistic/bc_overlap.smk
index 77e01cb8..7ac65bd1 100644
--- a/workflow/rules/statistic/bc_overlap.smk
+++ b/workflow/rules/statistic/bc_overlap.smk
@@ -50,7 +50,7 @@ rule statistic_bc_overlap_combine_counts:
     conda:
         "../../envs/default.yaml"
     input:
-        stats=lambda wc: expand(
+        statistic=lambda wc: expand(
             "results/experiments/{{project}}/statistic/bc_overlap/counts/overlapBCandCounts.{condition}_{type}.{config}.tsv",
             type=["DNA", "RNA"],
             condition=getConditions(wc.project),
@@ -74,8 +74,8 @@ rule statistic_bc_overlap_combine_counts:
         """
         set +o pipefail;
         (
-            cat {input.stats[0]} | head -n 1;
-            for i in {input.stats}; do
+            cat {input.statistic[0]} | head -n 1;
+            for i in {input.statistic}; do
                 cat $i | tail -n +2
             done;
         ) > {output} 2> {log}
@@ -86,7 +86,7 @@ rule statistic_bc_overlap_combine_assigned_counts:
     conda:
         "../../envs/default.yaml"
     input:
-        stats=lambda wc: expand(
+        statistic=lambda wc: expand(
             "results/experiments/{{project}}/statistic/bc_overlap/assigned_counts/{{assignment}}/overlapBCandCounts.{condition}_{type}.{{config}}.tsv",
             type=["DNA", "RNA"],
             condition=getConditions(wc.project),
@@ -111,8 +111,8 @@ rule statistic_bc_overlap_combine_assigned_counts:
         """
         set +o pipefail;
         (
-            cat {input.stats[0]} | head -n 1;
-            for i in {input.stats}; do
+            cat {input.statistic[0]} | head -n 1;
+            for i in {input.statistic}; do
                 cat $i | tail -n +2
             done;
         ) > {output} 2> {log}
diff --git a/workflow/rules/statistic/correlation.smk b/workflow/rules/statistic/correlation.smk
index 4175ec4a..675c8b6a 100644
--- a/workflow/rules/statistic/correlation.smk
+++ b/workflow/rules/statistic/correlation.smk
@@ -45,7 +45,9 @@ rule statistic_correlation_bc_counts:
                 "Plot": "Ratio",
             },
         ),
-        "results/experiments/{project}/statistic/barcode/{raw_or_assigned}/{condition}_{config}_barcode_correlation.tsv",
+        temp(
+            "results/experiments/{project}/statistic/barcode/{raw_or_assigned}/{condition}_{config}_barcode_correlation.tsv"
+        ),
     params:
         replicates=lambda wc: ",".join(
             getMergedCounts(wc.project, wc.raw_or_assigned, wc.condition, wc.config)[1]
@@ -287,8 +289,12 @@ rule statistic_correlation_calculate:
                 ),
             },
         ),
-        "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation.tsv",
-        "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation_minThreshold.tsv",
+        temp(
+            "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation.tsv"
+        ),
+        temp(
+            "results/experiments/{project}/statistic/assigned_counts/{assignment}/{config}/{condition}_correlation_minThreshold.tsv"
+        ),
     params:
         cond="{condition}",
         files=lambda wc: ",".join(
diff --git a/workflow/scripts/assignment/statistic_assignment.R b/workflow/scripts/assignment/statistic_assignment.R
index 9beb9092..e699d586 100644
--- a/workflow/scripts/assignment/statistic_assignment.R
+++ b/workflow/scripts/assignment/statistic_assignment.R
@@ -3,38 +3,38 @@ library(optparse)
 
 
 option_list <- list(
-    make_option(c("-i", "--input"),
-        type = "character",
-        help = "Assignment parcode file"
-    ),
-    make_option(c("-p", "--plot"),
-        type = "character",
-        help = "histogram plot"
-    ),
-    make_option(c("-s", "--statistic"),
-        type = "character",
-        help = "Statistics"
-    )
+  make_option(c("-i", "--input"),
+    type = "character",
+    help = "Assignment parcode file"
+  ),
+  make_option(c("-p", "--plot"),
+    type = "character",
+    help = "histogram plot"
+  ),
+  make_option(c("-s", "--statistic"),
+    type = "character",
+    help = "Statistics"
+  )
 )
 
 arguments <- parse_args(OptionParser(option_list = option_list), positional_arguments = TRUE)
 opt <- arguments$options
 
 if (!"input" %in% names(opt)) {
-    stop("--input parameter must be provided. See script usage (--help)")
+  stop("--input parameter must be provided. See script usage (--help)")
 }
 if (!"plot" %in% names(opt)) {
-    stop("--plot parameter must be provided. See script usage (--help)")
+  stop("--plot parameter must be provided. See script usage (--help)")
 }
 if (!"statistic" %in% names(opt)) {
-    stop("--statistic parameter must be provided. See script usage (--help)")
+  stop("--statistic parameter must be provided. See script usage (--help)")
 }
 
 bcs <- read_delim(opt$input, delim = "\t", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE)
 
 bcs <- bcs %>%
-    filter(!(X2 %in% c("other", "ambiguous"))) %>%
-    separate(X4, c("support", "all"), sep = "/", convert = TRUE)
+  filter(!(X2 %in% c("other", "ambiguous"))) %>%
+  separate(X4, c("support", "all"), sep = "/", convert = TRUE)
 
 
 
@@ -42,13 +42,13 @@ avg_support <- mean(bcs$support / bcs$all)
 median_support <- median(bcs$support / bcs$all)
 
 bcs <- bcs %>%
-    group_by(X2) %>%
-    count() %>%
-    ungroup()
+  group_by(X2) %>%
+  count() %>%
+  ungroup()
 
 oligos_support <- bcs %>%
-    filter(n >= 15) %>%
-    nrow()
+  filter(n >= 15) %>%
+  nrow()
 
 output <- data.frame(Value = c(avg_support, median_support, oligos_support))
 
@@ -57,23 +57,25 @@ rownames(output) <- c("Average support of BC for Oligo:", "Median support of BC
 write.table(output, file = opt$statistic, quote = FALSE, sep = "\t", row.names = TRUE, col.names = FALSE)
 
 p <- ggplot(bcs, aes(n)) +
-    geom_histogram(bins = 150) +
-    xlab("Number of BCs") +
-    ylab("Number of Oligos") +
-    geom_vline(aes(xintercept = mean(bcs$n)), col = "red", linewidth = 2) +
-    geom_text(
-        aes(
-            label = sprintf("\nmean = %.2f", mean(bcs$n)),
-            x = Inf, y = Inf
-        ), hjust = 1.1, vjust = 1.3, colour = "red"
-    ) +
-    geom_vline(aes(xintercept = median(bcs$n)), col = "blue", linewidth = 2) +
-    geom_text(
-        aes(
-            label = sprintf("median = %.2f\n", median(bcs$n)),
-            x = Inf, y = Inf
-        ), hjust = 1.1, vjust = 1.3, colour = "blue"
-    ) +
-    theme_bw()
+  geom_histogram(bins = 150) +
+  xlab("Number of BCs") +
+  ylab("Number of Oligos") +
+  geom_vline(aes(xintercept = mean(bcs$n)), col = "red", linewidth = 2) +
+  geom_text(
+    aes(
+      label = sprintf("\nmean = %.2f", mean(bcs$n)),
+      x = Inf, y = Inf
+    ),
+    hjust = 1.1, vjust = 1.3, colour = "red"
+  ) +
+  geom_vline(aes(xintercept = median(bcs$n)), col = "blue", linewidth = 2) +
+  geom_text(
+    aes(
+      label = sprintf("median = %.2f\n", median(bcs$n)),
+      x = Inf, y = Inf
+    ),
+    hjust = 1.1, vjust = 1.3, colour = "blue"
+  ) +
+  theme_bw()
 
 ggsave(opt$plot, p, type = "cairo", dpi = 300)
diff --git a/workflow/scripts/count/combine_replicates.py b/workflow/scripts/count/combine_replicates.py
index 7e3e69d5..d5af1c44 100755
--- a/workflow/scripts/count/combine_replicates.py
+++ b/workflow/scripts/count/combine_replicates.py
@@ -42,22 +42,21 @@ def cli(input_file, label_file, output_file):
     df_out.to_csv(output_file, sep="\t", index=False)
 
 def combine_replicates(label_file, df_allreps, total_dna_counts, total_rna_counts):
-    df_allreps = df_allreps.groupby(by=["condition", "name"]).aggregate(
+    df_allreps = df_allreps.groupby(by=["oligo_name"]).aggregate(
         {
             "replicate": "count",
             "dna_counts": ["sum", "mean"],
             "rna_counts": ["sum", "mean"],
             "dna_normalized": "mean",
             "rna_normalized": "mean",
-            "ratio": "mean",
-            "log2": "mean",
-            "n_obs_bc": ["sum", "mean"],
+            "log2FoldChange": "mean",
+            "n_bc": ["sum", "mean"],
         }
     )
     
     df_allreps = df_allreps.reset_index()
-    df_out = df_allreps.iloc[:, 0:2]
-    df_out.columns = ["condition", "name"]
+    df_out = df_allreps.iloc[:, 0:1]
+    df_out.columns = ["oligo_name"]
 
     df_out["replicates"] = df_allreps.replicate["count"]
 
@@ -70,20 +69,19 @@ def combine_replicates(label_file, df_allreps, total_dna_counts, total_rna_count
     df_out["dna_normalized"] = df_out["dna_counts"] / total_dna_counts * scaling
     df_out["rna_normalized"] = df_out["rna_counts"] / total_rna_counts * scaling
 
-    df_out["ratio"] = df_out["rna_normalized"] / df_out["dna_normalized"]
-    df_out["log2"] = np.log2(df_out.ratio)
+    df_out["log2FoldChange"] = np.log2(df_out["rna_normalized"] / df_out["dna_normalized"])
 
     df_out["mean_dna_counts"] = df_allreps.dna_counts["mean"]
     df_out["mean_rna_counts"] = df_allreps.rna_counts["mean"]
     df_out["mean_dna_normalized"] = df_allreps.dna_normalized["mean"]
     df_out["mean_rna_normalized"] = df_allreps.rna_normalized["mean"]
-    df_out["mean_ratio"] = df_allreps.ratio["mean"]
-    df_out["mean_log2"] = df_allreps.log2["mean"]
+    
+    df_out["mean_log2FoldChange"] = df_allreps.log2FoldChange["mean"]
 
 
     if label_file:
-        df_labels = pd.read_csv(label_file, sep="\t", names=["name", "Label"])
-        df_out = df_out.join(df_labels.set_index("name"), on="name")
+        df_labels = pd.read_csv(label_file, sep="\t", names=["oligo_name", "Label"])
+        df_out = df_out.join(df_labels.set_index("oligo_name"), on="oligo_name")
     return df_out
 
 
diff --git a/workflow/scripts/count/make_master_tables.R b/workflow/scripts/count/make_master_tables.R
index 569337f9..03195627 100644
--- a/workflow/scripts/count/make_master_tables.R
+++ b/workflow/scripts/count/make_master_tables.R
@@ -1,4 +1,4 @@
-#Adapted from Vikram Agarwal by Gracie Gordon
+# Adapted from Vikram Agarwal by Gracie Gordon and Max Schubach
 
 
 library(dplyr)
@@ -6,30 +6,39 @@ library(optparse)
 
 
 option_list <- list(
-    make_option(c("-c", "--condition"), type="character",
-        help="Condition name"),
-    make_option(c("-l", "--label"), type="character",
-        help="Label file. (optional)"),
-    make_option(c("-f", "--files"), type="character",
-        help="Comma separated input files of assigned counts"),
-    make_option(c("-r", "--replicates"), type="character",
-        help="Comma separated name of the replicates (same order than files)"),
-    make_option(c("-a", "--output-all"), type="character",
-        help="Output file of master table. No BC threshold filter (optional)."),
-    make_option(c("-o", "--output"), type="character",
-        help="Output file of master table filtered by --threshold"),
-    make_option(c("-s", "--statistic"), type="character",
-        help="Statistic of master table and filtered master table"),
-    make_option(c("-t", "--threshold"), type="integer", default=10,
-        help="Number of required barcodes (default 10)")
+  make_option(c("-l", "--label"),
+    type = "character",
+    help = "Label file. (optional)"
+  ),
+  make_option(c("-f", "--files"),
+    type = "character",
+    help = "Comma separated input files of assigned counts"
+  ),
+  make_option(c("-r", "--replicates"),
+    type = "character",
+    help = "Comma separated name of the replicates (same order than files)"
+  ),
+  make_option(c("-a", "--output-all"),
+    type = "character",
+    help = "Output file of master table. No BC threshold filter (optional)."
+  ),
+  make_option(c("-o", "--output"),
+    type = "character",
+    help = "Output file of master table filtered by --threshold"
+  ),
+  make_option(c("-s", "--statistic"),
+    type = "character",
+    help = "Statistic of master table and filtered master table"
+  ),
+  make_option(c("-t", "--threshold"),
+    type = "integer", default = 10,
+    help = "Number of required barcodes (default 10)"
+  )
 )
 
-arguments <- parse_args(OptionParser(option_list=option_list), positional_arguments=TRUE)
+arguments <- parse_args(OptionParser(option_list = option_list), positional_arguments = TRUE)
 opt <- arguments$options
 
-if (!"condition" %in% names(opt)) {
-  stop("--condition parameter must be provided. See script usage (--help)")
-}
 if (!"files" %in% names(opt)) {
   stop("--files parameter must be provided. See script usage (--help)")
 }
@@ -44,69 +53,88 @@ if (!"statistic" %in% names(opt)) {
 }
 
 
-
-#exp=args[1]
-cond=opt$cond
-thresh=opt$threshold
-files <- strsplit(opt$files,",")[[1]]
-replicates=strsplit(opt$replicates,",")[[1]]
+# exp=args[1]
+cond <- opt$cond
+thresh <- opt$threshold
+files <- strsplit(opt$files, ",")[[1]]
+replicates <- strsplit(opt$replicates, ",")[[1]]
 if (length(files) != length(replicates)) {
-    stop("Number of input files must be euqal to number of replicates")
+  stop("Number of input files must be euqal to number of replicates")
 }
 
-outfile=opt$output
-avg_outfile=opt$statistic
-#out=args[3]
+outfile <- opt$output
+avg_outfile <- opt$statistic
 
-##MAKE MASTER TABLE
-masterTable = data.frame()
-for (i in 1:length(files)){
-   file=files[i]
-   rep=replicates[i]
+precision <- 4
 
-   table = as.data.frame(read.table(file,header=TRUE),stringsAsFactors=FALSE)
-   table$condition = cond
-   table$replicate = rep
-
-   masterTable <- masterTable %>% bind_rows(table)
-}
+## MAKE MASTER TABLE
+master_table <- data.frame()
+for (i in seq_len(length(files))) {
+  file <- files[i]
+  rep <- replicates[i]
 
-masterTable <- masterTable %>% group_by(condition,replicate) %>%
-                select(condition, replicate, name, dna_counts, rna_counts, dna_normalized, rna_normalized, ratio, log2, n_obs_bc)
+  table <- as.data.frame(read.table(file, header = TRUE), stringsAsFactors = FALSE)
+  table$replicate <- rep
 
-masterTableFiltered <- masterTable %>%
-                      filter(n_obs_bc >= thresh) %>%
-                      mutate(ratio = ratio/median(ratio), log2 = round(log2(ratio),8))
-masterTable <- masterTable %>% mutate(ratio = ratio/median(ratio), log2 = round(log2(ratio),8))
+  master_table <- master_table %>% bind_rows(table)
+}
 
-writeFile <- function(file,table) {
+master_table <- master_table %>%
+  group_by(replicate) %>%
+  filter(!oligo_name %in% c("no_BC")) %>%
+  mutate(
+    dna_normalized = round(dna_normalized, precision),
+    rna_normalized = round(rna_normalized, precision),
+    ratio = round(ratio, precision),
+    log2FoldChange = round(log2FoldChange, precision)
+  )
+
+master_table_filtered <- master_table %>%
+  filter(n_bc >= thresh)
+# TODO use this? Maybe not because it normalizes across all replicates
+# master_table_filtered %>% mutate(ratio = ratio / median(ratio))
+
+# TODO use this? Maybe not because it normalizes across all replicates
+# master_table %>% mutate(ratio = ratio / median(ratio))
+
+write_file <- function(file, table) {
   gz <- gzfile(file, "w")
-  write.table(table,file=gz,quote=FALSE,sep='\t',row.names=FALSE,col.names=TRUE )
+  write.table(
+    table,
+    file = gz, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE
+  )
   close(gz)
 }
 
-writeFile(outfile,masterTableFiltered)
+write_file(
+  outfile,
+  master_table_filtered %>%
+    select(replicate, oligo_name, dna_counts, rna_counts, dna_normalized, rna_normalized, log2FoldChange, n_bc)
+)
 
 if ("output-all" %in% names(opt)) {
-  writeFile(opt$`output-all`, masterTable)
+  write_file(
+    opt$`output-all`,
+    master_table %>%
+      select(replicate, oligo_name, dna_counts, rna_counts, dna_normalized, rna_normalized, log2FoldChange, n_bc)
+  )
 }
 
-##MAKE AVERAGED ACROSS REPLICATES
-makeAverageAcrossReplicates <- function(table, name) {
+## MAKE AVERAGED ACROSS REPLICATES
+make_average_across_replicates <- function(table, name) {
   avg <- table %>% summarize(
-                      mean_ratio=mean(ratio),
-                      mean_log2=log2(mean(ratio)),
-                      mean_n_obs_bc=mean(n_obs_bc)
-                    )
-  avg$BC_filter = name
+    mean_ratio = mean(ratio),
+    mean_log2FoldChange = mean(log2FoldChange),
+    mean_n_bc = mean(n_bc)
+  )
+  avg$BC_filter <- name
 
   return(avg)
 }
 
-all_avg <- makeAverageAcrossReplicates(masterTable, 'None') %>%
-            bind_rows(makeAverageAcrossReplicates(masterTableFiltered, paste0("n_obs_bc >= ", thresh)))
-
-writeFile(avg_outfile, all_avg)
+all_avg <- make_average_across_replicates(master_table, "None") %>%
+  bind_rows(make_average_across_replicates(master_table_filtered, paste0("n_bc >= ", thresh)))
 
+write_file(avg_outfile, all_avg)
 
-print('done')
+print("done")
diff --git a/workflow/scripts/count/merge_label.py b/workflow/scripts/count/merge_label.py
index 2a0b16d3..3117f1d3 100644
--- a/workflow/scripts/count/merge_label.py
+++ b/workflow/scripts/count/merge_label.py
@@ -39,6 +39,21 @@
     type=int,
     help="Minimum number of DNA counts required per barcode. If 0 pesudocounts are used.",
 )
+@click.option(
+    "--inlclude-not-assigned-for-normalization/--exclude-not-assigned-for-normalization",
+    "normalize_with_not_assigned",
+    default=False,
+    show_default=True,
+    help="Use barcodes that are not assigned for normalization.",
+)
+@click.option(
+    "--scaling",
+    "scaling",
+    default=10**6,
+    show_default=True,
+    type=float,
+    help="Scaling parameter. Usually counts per million (10**6).",
+)
 @click.option(
     "--output",
     "output_file",
@@ -64,6 +79,8 @@ def cli(
     assignment_file,
     minRNACounts,
     minDNACounts,
+    normalize_with_not_assigned,
+    scaling,
     output_file,
     statistic_file,
     bc_output_file,
@@ -97,19 +114,19 @@ def cli(
         header=None,
         usecols=[0, 1],
         sep="\t",
-        names=["Barcode", "Oligo"],
+        names=["barcode", "oligo"],
     )
     # drop duplicated barcodes!!!
-    assoc.drop_duplicates("Barcode", keep=False, inplace=True)
-    assoc.set_index("Barcode", inplace=True)
+    assoc.drop_duplicates("barcode", keep=False, inplace=True)
+    assoc.set_index("barcode", inplace=True)
 
-    statistic["oligos design"] = assoc.Oligo.nunique()
+    statistic["oligos design"] = assoc.oligo.nunique()
     statistic[["barcodes design"]] = assoc.shape[0]
 
     # get count df
     click.echo("Read count file...")
     counts = pd.read_csv(
-        counts_file, sep="\t", header=None, names=["Barcode", "dna_count", "rna_count"]
+        counts_file, sep="\t", header=None, names=["barcode", "dna_count", "rna_count"]
     )
     # filter
     counts = counts[
@@ -120,11 +137,11 @@ def cli(
 
     # fill in labels from dictionary
     click.echo("Combine assignment with count file...")
-    counts = pd.merge(assoc, counts, how="right", on="Barcode")
-    counts.Oligo.fillna("no_BC", inplace=True)
-    counts.rename(columns={"Oligo": "name"}, inplace=True)
-    # counts = counts[['name', 'Barcode', 'dna_count', 'rna_count']]
-    statistic[["unknown barcodes dna/rna"]] = counts[counts.name == "no_BC"].shape[0]
+    counts = pd.merge(assoc, counts, how="right", on="barcode")
+    counts.oligo.fillna("no_BC", inplace=True)
+    counts.rename(columns={"oligo": "oligo_name"}, inplace=True)
+    # counts = counts[['oligo_name', 'barcode', 'dna_count', 'rna_count']]
+    statistic[["unknown barcodes dna/rna"]] = counts[counts.oligo_name == "no_BC"].shape[0]
 
     statistic["matched barcodes"] = (
         statistic["barcodes dna/rna"] - statistic["unknown barcodes dna/rna"]
@@ -137,14 +154,18 @@ def cli(
         counts.to_csv(bc_output_file, index=False, sep="\t", compression="gzip")
 
     # remove Barcorde. Not needed anymore
-    counts.drop(["Barcode"], axis=1, inplace=True)
+    counts.drop(["barcode"], axis=1, inplace=True)
 
     # number of DNA and RNA counts
     total_dna_counts = sum(counts["dna_count"])
     total_rna_counts = sum(counts["rna_count"])
 
+    unknown_dna_counts = sum(counts[counts.oligo_name == "no_BC"]["dna_count"])
+    unknown_rna_counts = sum(counts[counts.oligo_name == "no_BC"]["rna_count"])
+
     statistic["total dna counts"] = total_dna_counts
     statistic["total rna counts"] = total_rna_counts
+
     statistic["avg dna counts per bc"] = (
         statistic["total dna counts"] / statistic["barcodes dna/rna"]
     )
@@ -152,9 +173,18 @@ def cli(
         statistic["total rna counts"] / statistic["barcodes dna/rna"]
     )
 
-    grouped_label = counts.groupby("name").agg(
+    # group by oligo name and inlcude or exclude not assigned barcodes depending on flag given
+    if normalize_with_not_assigned:
+        grouped_label = counts.groupby("oligo_name").agg(
         {"dna_count": ["sum", "count"], "rna_count": ["sum", "count"]}
-    )
+        )
+    else:
+        grouped_label = counts[counts.oligo_name != "no_BC"].groupby("oligo_name").agg(
+        {"dna_count": ["sum", "count"], "rna_count": ["sum", "count"]}
+        )
+        total_dna_counts -= unknown_dna_counts
+        total_rna_counts -= unknown_rna_counts
+    
     grouped_label.reset_index(inplace=True)
 
     # add pseudo BC counts to total number of counts if needed
@@ -169,18 +199,16 @@ def cli(
 
     click.echo(grouped_label.head())
 
-    output["name"] = grouped_label["name"]
+    output["oligo_name"] = grouped_label["oligo_name"]
     output["dna_counts"] = grouped_label.dna_count["sum"]
     output["rna_counts"] = grouped_label.rna_count["sum"]
 
-    statistic["oligos dna/rna"] = len(grouped_label) - 1
+    statistic["oligos dna/rna"] = len(grouped_label) -1 if normalize_with_not_assigned else len(grouped_label)
     statistic["avg dna/rna barcodes per oligo"] = (
-        sum(grouped_label[grouped_label["name"] != "no_BC"].dna_count["count"])
+        sum(grouped_label[grouped_label["oligo_name"] != "no_BC"].dna_count["count"])
         / statistic["oligos dna/rna"]
     )
 
-    # scaling = 10**min([len(str(total_dna_counts))-1,len(str(total_rna_counts))-1])
-    scaling = 10**6
 
     output["dna_normalized"] = (
         (
@@ -207,9 +235,9 @@ def cli(
     )
 
     output["ratio"] = output["rna_normalized"] / output["dna_normalized"]
-    output["log2"] = np.log2(output.ratio)
+    output["log2FoldChange"] = np.log2(output.ratio)
 
-    output["n_obs_bc"] = grouped_label.dna_count["count"]
+    output["n_bc"] = grouped_label.dna_count["count"]
 
     click.echo(output_file)
 
diff --git a/workflow/scripts/count/merge_replicates_barcode_counts.py b/workflow/scripts/count/merge_replicates_barcode_counts.py
index a5f79056..a5495264 100644
--- a/workflow/scripts/count/merge_replicates_barcode_counts.py
+++ b/workflow/scripts/count/merge_replicates_barcode_counts.py
@@ -29,82 +29,93 @@
 )
 @click.option(
     "--output",
+    "output_threshold_file",
+    required=True,
+    type=click.Path(writable=True),
+    help="Output file.",
+)
+@click.option(
+    "--output-threshold",
     "output_file",
     required=True,
     type=click.Path(writable=True),
     help="Output file.",
 )
-def cli(counts_files, bc_thresh, replicates, output_file):
-	"""
-	Merge the associated barcode count files of all replicates.
-	"""
-
-	# ensure there are as many replicates as there are files
-	if len(replicates) != len(counts_files):
-		raise (
-			click.BadParameter(
-				"Number of replicates ({}) doesn't equal the number of files ({}).".format(
-					len(replicates), len(counts_files)
-				)
-			)
-		)
+def cli(counts_files, bc_thresh, replicates, output_threshold_file, output_file):
+    """
+    Merge the associated barcode count files of all replicates.
+    """
 
-	# check if every file exists
-	for file in counts_files:
-		if not os.path.exists(file):
-			raise (click.BadParameter("{}: file not found".format(file)))
+    # ensure there are as many replicates as there are files
+    if len(replicates) != len(counts_files):
+        raise (
+            click.BadParameter(
+                "Number of replicates ({}) doesn't equal the number of files ({}).".format(
+                    len(replicates), len(counts_files)
+                )
+            )
+        )
 
-	all_reps = []
-	for file in counts_files:
-		curr_rep = -1
-		# find the replicate name of the current file
-		for rep in replicates:
-			if rep in os.path.basename(file).split("_")[1]:
-				curr_rep = rep
-				break
-		if curr_rep == -1:
-			raise (click.BadParameter("{}: incorrect file".format(file)))
-		df = pd.read_csv(file, sep="\t")
-		df['replicate'] = curr_rep
-		all_reps.append(df)
+    # check if every file exists
+    for file in counts_files:
+        if not os.path.exists(file):
+            raise (click.BadParameter("{}: file not found".format(file)))
 
-	df = pd.concat(all_reps)
-	df = df[df["name"] != "no_BC"]
+    all_reps = []
+    for file in counts_files:
+        curr_rep = -1
+        # find the replicate name of the current file
+        for rep in replicates:
+            if rep in os.path.basename(file).split("_")[1]:
+                curr_rep = rep
+                break
+        if curr_rep == -1:
+            raise (click.BadParameter("{}: incorrect file".format(file)))
+        df = pd.read_csv(file, sep="\t")
+        df['replicate'] = curr_rep
+        all_reps.append(df)
 
-	# only keep oligo's with a number of barcodes of at least the given threshold
-	df_filtered = df.groupby(["name", "replicate"]).filter(lambda x: len(x) >= bc_thresh)
+    df = pd.concat(all_reps)
+    df = df[df["oligo_name"] != "no_BC"]
 
-	# pivot table to make a dna and rna count column for every replicate
-	df_filtered = df_filtered.pivot_table(
-		values=["dna_count", "rna_count"],
-		index=["Barcode", "name"],
-		columns="replicate",
-		aggfunc='first'
-	)
-	df_filtered = df_filtered.sort_values("name")
+    def pivot_table(df, replicates):
+        # pivot table to make a dna and rna count column for every replicate
+        df = df.pivot_table(
+            values=["dna_count", "rna_count"],
+            index=["barcode", "oligo_name"],
+            columns="replicate",
+            aggfunc='first'
+        )
+        df = df.sort_values("oligo_name")
 
 
-	# order columns to have dna then rna count of each replicate
-	col_order = sum(
-		[
-			["dna_count_" + rep, "rna_count_" + rep]
-			for rep in replicates
-		],
-		[],
-	)
+        # order columns to have dna then rna count of each replicate
+        col_order = sum(
+            [
+                ["dna_count_" + rep, "rna_count_" + rep]
+                for rep in replicates
+            ],
+            [],
+        )
 
-	df_filtered = df_filtered.reset_index()
+        df = df.reset_index()
 
-	df_filtered.columns = ['_'.join(col).strip() if col[1] else col[0] for col in df_filtered.columns.values]
-		
-	df_filtered = df_filtered[["Barcode", "name"] + col_order]
+        df.columns = ['_'.join(col).strip() if col[1] else col[0] for col in df.columns.values]
+            
+        df = df[["barcode", "oligo_name"] + col_order]
 
-	for col in col_order:
-		df_filtered[col] = df_filtered[col].astype('Int64')
+        for col in col_order:
+            df[col] = df[col].astype('Int32')
+        
+        return df
 
-	# write to output file
-	df_filtered.to_csv(output_file, sep="\t", index=False, compression="gzip")
+    # only keep oligo's with a number of barcodes of at least the given threshold
+    df_filtered = df.groupby(["oligo_name", "replicate"]).filter(lambda x: len(x) >= bc_thresh)
+    
+    # write to output file
+    pivot_table(df_filtered,replicates).to_csv(output_threshold_file, sep="\t", index=False, compression="gzip")
+    pivot_table(df,replicates).to_csv(output_file, sep="\t", index=False, compression="gzip")
 
 
 if __name__ == "__main__":
-	cli()
+    cli()
diff --git a/workflow/scripts/count/plot_perInsertCounts_correlation.R b/workflow/scripts/count/plot_perInsertCounts_correlation.R
index 6042e2d1..9e07dd73 100644
--- a/workflow/scripts/count/plot_perInsertCounts_correlation.R
+++ b/workflow/scripts/count/plot_perInsertCounts_correlation.R
@@ -64,7 +64,7 @@ if ("label" %in% names(opt)) {
     header = TRUE,
     stringsAsFactors = FALSE
   ))
-  colnames(label_f) <- c("name", "label")
+  colnames(label_f) <- c("oligo_name", "label")
   use_labels <- TRUE
 } else {
   use_labels <- FALSE
@@ -281,7 +281,7 @@ read_data <- function(file) {
     header = TRUE,
     stringsAsFactors = FALSE
   ) %>%
-    filter(name != "no_BC") %>%
+    filter(oligo_name != "no_BC") %>%
     mutate(
       dna_normalized_log2 = log2(dna_normalized),
       rna_normalized_log2 = log2(rna_normalized),
@@ -306,7 +306,7 @@ for (n in 1:(data %>% nrow())) {
 
 if (use_labels) {
   all <- all %>%
-    left_join(label_f, by = c("name")) %>%
+    left_join(label_f, by = c("oligo_name")) %>%
     mutate(label = replace_na(label, "NA"))
 } else {
   if (nrow(all) > 0) { # can be 0 when no BCs are assigned
@@ -339,13 +339,13 @@ if (data %>% nrow() > 1 && nrow(all) > 1) {
     n_oligos_r2 <- data2 %>% nrow()
 
     n_oligos_r1_thres <- data1 %>%
-      filter(n_obs_bc >= thresh) %>%
+      filter(n_bc >= thresh) %>%
       nrow()
     n_oligos_r2_thres <- data2 %>%
-      filter(n_obs_bc >= thresh) %>%
+      filter(n_bc >= thresh) %>%
       nrow()
 
-    res <- data1 %>% inner_join(data2, by = c("name"))
+    res <- data1 %>% inner_join(data2, by = c("oligo_name"))
 
     plots_correlations_dna[[i]] <-
       plot_correlations_dna(res, cond, r1, r2, "pairwise")
@@ -369,7 +369,7 @@ if (data %>% nrow() > 1 && nrow(all) > 1) {
 
     # Min Threshold
     res <-
-      res %>% filter(n_obs_bc.x >= thresh, n_obs_bc.y >= thresh)
+      res %>% filter(n_bc.x >= thresh, n_bc.y >= thresh)
     plots_cor_min_thresh_dna[[i]] <-
       plot_correlations_dna(res, cond, r1, r2, "pairwise_minThreshold")
     plots_cor_min_thresh_rna[[i]] <-
diff --git a/workflow/scripts/count/plot_perInsertCounts_stats.R b/workflow/scripts/count/plot_perInsertCounts_stats.R
index 93f68988..00528297 100644
--- a/workflow/scripts/count/plot_perInsertCounts_stats.R
+++ b/workflow/scripts/count/plot_perInsertCounts_stats.R
@@ -65,7 +65,7 @@ if ("label" %in% names(opt)) {
     header = TRUE,
     stringsAsFactors = FALSE
   ))
-  colnames(label_f) <- c("name", "label")
+  colnames(label_f) <- c("oligo_name", "label")
   use_labels <- TRUE
 } else {
   use_labels <- FALSE
@@ -95,11 +95,10 @@ read_data <- function(file) {
     header = TRUE,
     stringsAsFactors = FALSE
   ) %>%
-    filter(name != "no_BC") %>%
+    filter(oligo_name != "no_BC") %>%
     mutate(
       dna_normalized_log2 = log2(dna_normalized),
       rna_normalized_log2 = log2(rna_normalized),
-      ratio_log2 = log2(ratio)
     )
   return(data)
 }
@@ -117,7 +116,7 @@ for (n in 1:(data %>% nrow())) {
 
 if (use_labels) {
   all <- all %>%
-    left_join(label_f, by = c("name")) %>%
+    left_join(label_f, by = c("oligo_name")) %>%
     mutate(label = replace_na(label, "NA"))
 } else {
   all$label <- "NA"
@@ -128,7 +127,7 @@ print("Histogram plots, RNA/DNA correlation plots, Violin plots")
 
 plot_median_dna_rna_cor <- function(data) {
   data <- data %>%
-    group_by(name) %>%
+    group_by(oligo_name) %>%
     summarise(dna_normalized = median(log10(dna_normalized)), rna_normalized = median(log10(rna_normalized)), n = n())
   data <- data %>% filter(n == length(replicates))
   p <- ggplot(data, aes(x = dna_normalized, y = rna_normalized)) +
@@ -151,7 +150,7 @@ plot_median_dna_rna_cor <- function(data) {
 }
 
 plot_group_bc_per_insert <- function(data) {
-  bp <- ggplot(data, aes(x = label, y = log2, fill = label)) +
+  bp <- ggplot(data, aes(x = label, y = log2FoldChange, fill = label)) +
     geom_violin() +
     geom_boxplot(width = 0.1, fill = "white") +
     xlab("insert") +
@@ -180,7 +179,7 @@ ggsave(sprintf("%s_dna_vs_rna.png", outdir),
   width = 10, height = 10
 )
 ggsave(sprintf("%s_dna_vs_rna_minThreshold.png", outdir),
-  plot_median_dna_rna_cor(all %>% filter(n_obs_bc >= thresh)),
+  plot_median_dna_rna_cor(all %>% filter(n_bc >= thresh)),
   width = 10, height = 10
 )
 
@@ -196,9 +195,9 @@ for (n in 1:(data %>% nrow())) {
   assigned_counts <- all %>% filter(replicate == rep)
 
   # Histograms
-  intercept <- median(assigned_counts$n_obs_bc)
+  intercept <- median(assigned_counts$n_bc)
   hist_plot_list[[n]] <-
-    ggplot(assigned_counts, aes(x = n_obs_bc)) +
+    ggplot(assigned_counts, aes(x = n_bc)) +
     geom_histogram(bins = 300) +
     geom_vline(xintercept = intercept, colour = "red") +
     xlim(0, 300) +
@@ -210,7 +209,7 @@ for (n in 1:(data %>% nrow())) {
     ggtitle(paste("replicate", rep, sep = " "))
 
   box_plot_insert_thresh_list[[n]] <-
-    plot_group_bc_per_insert(assigned_counts %>% filter(n_obs_bc >= thresh)) +
+    plot_group_bc_per_insert(assigned_counts %>% filter(n_bc >= thresh)) +
     ggtitle(paste("replicate", rep, sep = " "))
 }
 
diff --git a/workflow/scripts/variants/correlateVariantTables.py b/workflow/scripts/variants/correlateVariantTables.py
index a1d69069..b79e9f33 100644
--- a/workflow/scripts/variants/correlateVariantTables.py
+++ b/workflow/scripts/variants/correlateVariantTables.py
@@ -32,7 +32,7 @@
 def cli(condition, variants, bc_threshold, output_file):
 
     def filterOnThreshold(variants, threshold):
-        return(variants.query('n_obs_bc_ALT >= %d & n_obs_bc_REF >= %d' % (threshold, threshold)))
+        return(variants.query('n_bc_ALT >= %d & n_bc_REF >= %d' % (threshold, threshold)))
 
     output = pd.DataFrame()
     for i in range(len(variants)):
diff --git a/workflow/scripts/variants/generateMasterVariantTable.py b/workflow/scripts/variants/generateMasterVariantTable.py
index f77c90f0..0828466c 100644
--- a/workflow/scripts/variants/generateMasterVariantTable.py
+++ b/workflow/scripts/variants/generateMasterVariantTable.py
@@ -49,18 +49,18 @@ def cli(input_files, minRNACounts, minDNACounts, output_file):
     click.echo("Create new expression values...")
     df = df.groupby(['ID', 'REF', 'ALT']).agg(dna_counts_REF=('dna_counts_REF', sum),
                                               rna_counts_REF=('rna_counts_REF', sum),
-                                              n_obs_bc_REF=('n_obs_bc_REF', sum),
+                                              n_bc_REF=('n_bc_REF', sum),
                                               dna_counts_ALT=('dna_counts_ALT', sum),
                                               rna_counts_ALT=('rna_counts_ALT', sum),
-                                              n_obs_bc_ALT=('n_obs_bc_ALT', sum)
+                                              n_bc_ALT=('n_bc_ALT', sum)
                                               ).reset_index()
 
     scaling = 10**6
 
-    df_total = df[['dna_counts_REF', 'rna_counts_REF', 'n_obs_bc_REF',
-                   'dna_counts_ALT', 'rna_counts_ALT', 'n_obs_bc_ALT']].sum()
+    df_total = df[['dna_counts_REF', 'rna_counts_REF', 'n_bc_REF',
+                   'dna_counts_ALT', 'rna_counts_ALT', 'n_bc_ALT']].sum()
 
-    total_bc = df_total['n_obs_bc_REF'] + df_total['n_obs_bc_ALT']
+    total_bc = df_total['n_bc_REF'] + df_total['n_bc_ALT']
     total_dna = df_total['dna_counts_REF'] + df_total['dna_counts_ALT'] + total_bc * pseudocountDNA
     total_rna = df_total['rna_counts_REF'] + df_total['rna_counts_ALT'] + total_bc * pseudocountRNA
 
@@ -68,8 +68,8 @@ def cli(input_files, minRNACounts, minDNACounts, output_file):
 
     def normalize(df, total, count_type, ref_alt, pseudocount, scaling):
         return(((
-            df["%s_counts_%s" % (count_type, ref_alt)] + pseudocount * df['n_obs_bc_%s' % ref_alt]
-        )/df['n_obs_bc_%s' % ref_alt])/total*scaling)
+            df["%s_counts_%s" % (count_type, ref_alt)] + pseudocount * df['n_bc_%s' % ref_alt]
+        )/df['n_bc_%s' % ref_alt])/total*scaling)
 
     for ref_alt in ["REF", "ALT"]:
         for count_type in ["dna", "rna"]:
@@ -80,20 +80,20 @@ def normalize(df, total, count_type, ref_alt, pseudocount, scaling):
                 df, total, count_type, ref_alt, pseudocount, scaling)
 
         df['ratio_%s' % ref_alt] = df['rna_normalized_%s' % ref_alt] / df['dna_normalized_%s' % ref_alt]
-        df['log2_%s' % ref_alt] = np.log2(df['ratio_%s' % ref_alt])
+        df['log2FoldChange_%s' % ref_alt] = np.log2(df['ratio_%s' % ref_alt])
 
-    df["log2_expression"] = np.log2(df['ratio_ALT']/df['ratio_REF'])
+    df["log2FoldChange_expression"] = np.log2(df['ratio_ALT']/df['ratio_REF'])
 
     # fill NA and set correct output types
     df.fillna(0, inplace=True)
-    df = df.astype(dtype={'dna_counts_REF': 'int64', 'rna_counts_REF': 'int64', 'n_obs_bc_REF': 'int64',
-                          'dna_counts_ALT': 'int64', 'rna_counts_ALT': 'int64', 'n_obs_bc_ALT': 'int64'}, copy=False)
+    df = df.astype(dtype={'dna_counts_REF': 'int64', 'rna_counts_REF': 'int64', 'n_bc_REF': 'int64',
+                          'dna_counts_ALT': 'int64', 'rna_counts_ALT': 'int64', 'n_bc_ALT': 'int64'}, copy=False)
 
     df = df.reindex(columns=["ID", "REF", "ALT", "dna_counts_REF", "rna_counts_REF",
-                             "dna_normalized_REF", "rna_normalized_REF", "ratio_REF", "log2_REF",
-                             "n_obs_bc_REF", "dna_counts_ALT", "rna_counts_ALT",
+                             "dna_normalized_REF", "rna_normalized_REF", "ratio_REF", "log2FoldChange_REF",
+                             "n_bc_REF", "dna_counts_ALT", "rna_counts_ALT",
                              "dna_normalized_ALT", "rna_normalized_ALT", "ratio_ALT",
-                             "log2_ALT", "n_obs_bc_ALT", "log2_expression"])
+                             "log2FoldChange_ALT", "n_bc_ALT", "log2FoldChange_expression"])
 
     # write output
     click.echo("Write files...")
diff --git a/workflow/scripts/variants/generateVariantTable.py b/workflow/scripts/variants/generateVariantTable.py
index fa2d719a..bf465248 100644
--- a/workflow/scripts/variants/generateVariantTable.py
+++ b/workflow/scripts/variants/generateVariantTable.py
@@ -32,18 +32,18 @@ def cli(counts_file, declaration_file, output_file):
 
     # get count df
     click.echo("Read count file...")
-    # expected column names: name dna_counts rna_counts dna_normalized rna_normalized ratio log2 n_obs_bc
+    # expected column names: name dna_counts rna_counts dna_normalized rna_normalized ratio log2FoldChangeFoldChange n_bc
     counts = pd.read_csv(counts_file, header=0, sep="\t", index_col=0)
     # join ref with index of counts
     output = declaration.join(counts, on='REF')
     # join again for the alt columns
     output = output.join(counts, on='ALT', lsuffix='_REF', rsuffix='_ALT')
-    output["log2_expression"] = np.log2(output['ratio_ALT']/output['ratio_REF'])
+    output["log2FoldChange_expression"] = np.log2(output['ratio_ALT']/output['ratio_REF'])
 
     # fill NA and set correct output types
     output.fillna(0, inplace=True)
-    output = output.astype(dtype={'dna_counts_REF': 'int64', 'rna_counts_REF': 'int64', 'n_obs_bc_REF': 'int64',
-                                  'dna_counts_ALT': 'int64', 'rna_counts_ALT': 'int64', 'n_obs_bc_ALT': 'int64'},
+    output = output.astype(dtype={'dna_counts_REF': 'int64', 'rna_counts_REF': 'int64', 'n_bc_REF': 'int64',
+                                  'dna_counts_ALT': 'int64', 'rna_counts_ALT': 'int64', 'n_bc_ALT': 'int64'},
                            copy=False)
 
     # write output