Skip to content

Commit

Permalink
feat(normalisation): adding plot file normalisation to the compare pi…
Browse files Browse the repository at this point in the history
…peline

We add normalisation to the beginning of the plot compare pipeline.

fix no normalisation issue
  • Loading branch information
sbastkowski committed Feb 25, 2024
1 parent 8c7ac03 commit 9559db5
Show file tree
Hide file tree
Showing 15 changed files with 309 additions and 133 deletions.
Binary file modified docs/pipeline_combine.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/pipeline_combine.pu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ split again
end split

#lightblue:start tradis pipeline compare;

:normalise plot files;
split
: create count file for
original plot and original embl;
Expand Down
7 changes: 3 additions & 4 deletions quatradis/comparison/essentiality_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import csv
import os
from dataclasses import dataclass
from tempfile import mkstemp

from quatradis.comparison.essentiality import GeneEssentiality
from quatradis.util.file_handle_helpers import ensure_output_dir_exists
Expand Down Expand Up @@ -33,7 +32,7 @@ def get_all_gene_names(control_files, condition_files):
gene_names1 = [r[1] for r in reader if len(r) > 1 and r[1] != "gene_name"]
all_gene_names = all_gene_names.union(set(gene_names1))

for f in control_files:
for filename in control_files:
with open(filename, "r") as fileh:
reader = csv.reader(fileh, delimiter="\t", quotechar='"')
gene_names2 = [r[1] for r in reader if len(r) > 1 and r[1] != "gene_name"]
Expand All @@ -51,7 +50,7 @@ def all_gene_essentiality(input: EssentialityInput, analysis_type, verbose=False
genes_ess = {g: GeneEssentiality() for g in all_gene_names}
if analysis_type == "original":
for f in input.only_ess_condition_files:
ess_gene_names = gene_names_from_essentiality_file(f, verbose)
ess_gene_names = gene_names_from_essentiality_file(f)
if verbose:
print("ess_gene_names condition: " + str(len(ess_gene_names)))
print("genes_ess: " + str(len(genes_ess)))
Expand All @@ -60,7 +59,7 @@ def all_gene_essentiality(input: EssentialityInput, analysis_type, verbose=False
genes_ess[e].condition += 1
genes_ess[e].number_of_reps = len(input.only_ess_condition_files)
for f in input.only_ess_control_files:
ess_gene_names = gene_names_from_essentiality_file(f, verbose)
ess_gene_names = gene_names_from_essentiality_file(f)
if verbose:
print("ess_gene_names control: " + str(len(ess_gene_names)))
print("genes_ess: " + str(len(genes_ess)))
Expand Down
2 changes: 1 addition & 1 deletion quatradis/comparison/scatterplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, conditions, controls, window_size, prefix, normalise=False, v
self.normalise = normalise

if normalise:
n = NormalisePlots(self.conditions + self.controls, 0.0000001, verbose=self.verbose)
n = NormalisePlots(self.conditions + self.controls, 0.0000001, output_dir="normalised", verbose=self.verbose)
plotfiles, max_reads = n.create_normalised_files()
self.conditions = plotfiles[0:len(self.conditions)]
self.controls = plotfiles[len(self.conditions):]
Expand Down
2 changes: 1 addition & 1 deletion quatradis/comparison/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_combined_file_path(self):
return self._construct_file_path("combined")

def _create_split_plot_file(self, forward, reverse, filename):
p = PlotFromValuesGenerator(forward, reverse, filename, self.gzipped)
p = PlotFromValuesGenerator(forward, reverse, filename)
p.construct_file()

def create_all_files(self):
Expand Down
2 changes: 1 addition & 1 deletion quatradis/comparison/tradis_comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,4 @@ abline(v=2, col="red")


#write results
write.table(diff,file=opt$output,append=FALSE, quote=TRUE, sep=",", row.names=FALSE, col.names=c("locus_tag","gene_name","function","logFC","logCPM","PValue","q.value"))
write.table(diff,file=opt$output,append=FALSE, quote=FALSE, sep=",", row.names=FALSE, col.names=c("locus_tag","gene_name","function","logFC","logCPM","PValue","q.value"))
72 changes: 40 additions & 32 deletions quatradis/pipelines/compare.smk
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
import os
import shutil

input_files=[]
norm_files=[]
plotnames=[]
controlnames=[]
conditionnames=[]
plot_lut={}
norm_lut={}
for p in config["condition_files"]:
input_files.append(p)
plotname = os.path.basename(p).split('.')[0]
plotnames.append(plotname)
conditionnames.append(plotname)
plot_lut[plotname]=p


for p in config["control_files"]:
input_files.append(p)
plotname = os.path.basename(p).split('.')[0]
plotnames.append(plotname)
controlnames.append(plotname)
plot_lut[plotname]=p

for p in input_files:
plotname = os.path.basename(p).split('.')[0]
n=os.path.join(config["output_dir"], "analysis", plotname, "original.plot.gz")
norm_files.append(n)
norm_lut[plotname]=n

ZCAT_CMD='gzcat'
if not shutil.which('gzcat'):
Expand All @@ -24,14 +33,27 @@ if not shutil.which('gzcat'):
else:
raise Error("Couldn't find gzcat or zcat on your system. Please install and try again.")

def make_no_normalise_cmd():
cmds = []
for i in range(len(input_files)):
out_dir = os.path.join(config["output_dir"], "analysis", plotname)
cmds.append("mkdir -p " + out_dir)
cmds.append("if [[ $(file " + input_files[i] + " | grep ASCII | wc -l | cut -f 1) > 0 ]]; then gzip -c " + input_files[i] + " > " + norm_files[i] + "; else cp " + input_files[i] + " " + norm_files[i] + "; fi")

return "; ".join(cmds)

def make_normalise_cmd():
return "tradis plot normalise -o " + os.path.join(config["output_dir"], "analysis") + " -n original.plot.gz " + \
("--minimum_proportion_insertions=" + config["minimum_proportion_insertions"] if config["minimum_proportion_insertions"] else "") + \
" " + " ".join(input_files)

rule finish:
input:
expand(os.path.join(config["output_dir"], "gene_report.tsv")),
expand(os.path.join(config["output_dir"], "comparison", "{type}", "plot_absscatter.png"), type=["original", "forward", "reverse", "combined"]),
#expand(os.path.join(config["output_dir"],"comparison","{type}","{type}.compare.csv"), type=["original", "forward", "reverse", "combined"]),
expand(os.path.join(config["output_dir"],"comparison","{type}","essentiality.csv"), type=["original", "forward", "reverse", "combined"]),
expand(os.path.join(config["output_dir"],"analysis","{plot}","{type}.count.tsv.essen.csv"), type=["original", "combined", "forward", "reverse"], plot=plotnames)
#expand(os.path.join(config["output_dir"], "comparison", "{type}", "{type}.compare.csv"), type=["original", "forward", "reverse", "combined"]),
expand(os.path.join(config["output_dir"], "comparison", "{type}", "essentiality.csv"), type=["original", "forward", "reverse", "combined"]),
expand(os.path.join(config["output_dir"], "analysis", "{plot}", "{type}.count.tsv.essen.csv"), type=["original", "combined", "forward", "reverse"], plot=plotnames)
run:
print("All done!")

Expand All @@ -49,10 +71,21 @@ rule prepare_embl:
prime_feature_size="--prime_feature_size=" + config["prime_feature_size"] if config["prime_feature_size"] else ""
shell: "tradis compare prepare_embl --output={output} {params.minimum_threshold} {params.window_size} {params.window_interval} {params.prime_feature_size} --emblfile {input.embl} {input.plot}"

rule normalise:
input:
input_files
output:
norm_files
message: "Normalising plot files"
log: os.path.join(config["output_dir"], "analysis", "normalise.log")
params:
cmd=make_normalise_cmd() if config["normalise"] else make_no_normalise_cmd()
shell: "{params.cmd}"


rule split_plots:
input:
p=lambda wildcards: plot_lut[wildcards.plot]
p=lambda wildcards: norm_lut[wildcards.plot]
output:
c=os.path.join(config["output_dir"], "analysis", "{plot}", "combined.plot.gz"),
f=os.path.join(config["output_dir"], "analysis", "{plot}", "forward.plot.gz"),
Expand All @@ -74,34 +107,9 @@ rule count_plots:
params:
output_dir=os.path.join(config["output_dir"], "analysis", "{plot}"),
suffix="count.tsv"
wildcard_constraints:
type="(?!original).*"
shell: "tradis plot count -o {params.output_dir} -s {params.suffix} {input.embl} {input.p}"


rule copy_original:
input:
lambda wildcards: plot_lut[wildcards.plot]
output:
os.path.join(config["output_dir"],"analysis","{plot}","original.plot.gz")
message: "Copying original plot file to target folder: {input}"
params:
output_dir=os.path.join(config["output_dir"],"analysis","{plot}")
shell: "if [[ $(file {input} | grep ASCII | wc -l | cut -f 1) > 0 ]]; then gzip -c {input} > {output}; else cp {input} {output}; fi"


rule count_original_plots:
input:
p=rules.copy_original.output,
embl=config["annotations"]
output:
os.path.join(config["output_dir"], "analysis", "{plot}", "original.count.tsv")
message: "Analysing original plot and embl file {input.p}, {input.embl}"
params:
output_dir=os.path.join(config["output_dir"], "analysis", "{plot}")
shell: "tradis plot count -o {params.output_dir} -s count.tsv {input.embl} {input.p}"


rule essentiality:
input:
c=os.path.join(config["output_dir"], "analysis", "{plot}", "{type}.count.tsv"),
Expand Down
Loading

0 comments on commit 9559db5

Please sign in to comment.