diff --git a/.gitignore b/.gitignore
index ab529cb..38600d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,6 +105,9 @@ ENV/
.DS_Store
+# Idea project settings
+.idea/
+
# mkdocs documentation
/site
diff --git a/.idea/MyAlbaTradis.iml b/.idea/MyAlbaTradis.iml
new file mode 100644
index 0000000..6f63a63
--- /dev/null
+++ b/.idea/MyAlbaTradis.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..15a15b2
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..a2e120d
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..bdfde4d
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..841520d
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,347 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1552061465245
+
+
+ 1552061465245
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index d07d3df..ec6370f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,3 +6,4 @@ RUN Rscript -e "source('http://bioconductor.org/biocLite.R')" -e "biocLite(c('ed
# AlbaTraDIS
RUN pip3 install cython
RUN pip3 install git+git://github.com/quadram-institute-bioscience/albatradis.git
+WORKDIR /work
diff --git a/README.md b/README.md
index bd26212..92b2bc2 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,9 @@ optional arguments:
--pvalue PVALUE, -p PVALUE
Dont report anything above this p-value (default:
0.05)
+ --qvalue QVALUE, -q QVALUE
+ Dont report anything above this q-value (default:
+ 0.05)
--strict_signal, -g A result must be present in the combined plots to be
returned (default: False)
--use_annotation, -a Use the reference annotation rather than a sliding
diff --git a/albatradis/AlbaTraDIS.py b/albatradis/AlbaTraDIS.py
index a9ca59a..3453116 100644
--- a/albatradis/AlbaTraDIS.py
+++ b/albatradis/AlbaTraDIS.py
@@ -19,6 +19,7 @@ def __init__(self, options):
self.verbose = options.verbose
self.minimum_logfc = options.minimum_logfc
self.pvalue = options.pvalue
+ self.qvalue = options.qvalue
self.prefix = options.prefix
self.minimum_logcpm = options.minimum_logcpm
self.iterations = options.iterations
@@ -49,14 +50,14 @@ def run(self):
report_decreased_insertions = n.decreased_insertion_reporting()
if self.iterations == 1:
- bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.prefix, self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
+ bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.qvalue, self.prefix, self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi.run()
self.blocks = bi.blocks
plotfiles = bi.output_plots.values()
else:
for i in range(1,self.iterations+1):
- bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.prefix + "_" +str(i), self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
+ bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.qvalue, self.prefix + "_" +str(i), self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi.run()
self.blocks = bi.blocks
plotfiles = bi.output_plots.values()
diff --git a/albatradis/BlockInsertions.py b/albatradis/BlockInsertions.py
index ba799f5..66685ac 100644
--- a/albatradis/BlockInsertions.py
+++ b/albatradis/BlockInsertions.py
@@ -30,7 +30,7 @@ def __init__(self, forward, reverse, combined, embl_filename):
self.embl_filename = embl_filename
class BlockInsertions:
- def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_interval, verbose, minimum_logfc, pvalue, prefix, minimum_logcpm, minimum_block,span_gaps, emblfile, report_decreased_insertions, strict_signal, use_annotation, prime_feature_size):
+ def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_interval, verbose, minimum_logfc, pvalue, qvalue, prefix, minimum_logcpm, minimum_block,span_gaps, emblfile, report_decreased_insertions, strict_signal, use_annotation, prime_feature_size):
self.logger = logger
self.plotfiles = plotfiles
self.minimum_threshold = minimum_threshold
@@ -39,6 +39,7 @@ def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_inte
self.verbose = verbose
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
+ self.qvalue = qvalue
self.prefix = prefix
self.minimum_logcpm = minimum_logcpm
self.minimum_block = minimum_block
@@ -136,7 +137,7 @@ def generate_logfc_plot(self, analysis_type, essentiality_files):
t = TradisComparison(files[:mid],files[mid:], self.verbose, self.minimum_block, only_ess_files[:mid], only_ess_files[mid:])
t.run()
- p = PlotLog(t.output_filename, self.genome_length, self.minimum_logfc, self.pvalue, self.minimum_logcpm, self.window_size, self.span_gaps, self.report_decreased_insertions, annotation_files[0])
+ p = PlotLog(t.output_filename, self.genome_length, self.minimum_logfc, self.pvalue, self.qvalue, self.minimum_logcpm, self.window_size, self.span_gaps, self.report_decreased_insertions, annotation_files[0])
p.construct_plot_file()
renamed_csv_file = os.path.join(self.prefix, analysis_type + ".csv")
renamed_plot_file = os.path.join(self.prefix, analysis_type + ".plot")
diff --git a/albatradis/PlotLog.py b/albatradis/PlotLog.py
index e3075a8..155139a 100644
--- a/albatradis/PlotLog.py
+++ b/albatradis/PlotLog.py
@@ -13,11 +13,12 @@ def __init__(self, start, end, logfc_value):
self.logfc_value = logfc_value
class PlotLog:
- def __init__(self, comparison_filename, genome_length, minimum_logfc, pvalue, minimum_logcpm, window_size, span_gaps, report_decreased_insertions, embl_file):
+ def __init__(self, comparison_filename, genome_length, minimum_logfc, pvalue, qvalue, minimum_logcpm, window_size, span_gaps, report_decreased_insertions, embl_file):
self.comparison_filename = comparison_filename
self.genome_length = genome_length
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
+ self.qvalue = qvalue
self.minimum_logcpm = minimum_logcpm
self.window_size = window_size
self.span_gaps = span_gaps
@@ -35,6 +36,7 @@ def construct_plot_file(self):
self.create_csv(forward_logfc, reverse_logfc)
+
return self
def create_csv(self, forward_logfc, reverse_logfc):
@@ -134,16 +136,26 @@ def read_comparison_file(self):
if logfc == 'logFC':
continue
- logfc = int(float(row[3]))
+ logfc = float(row[3])
+ temp_pval = float(row[5])
+ temp_logcpm = float(row[4])
+ temp_qval = float(row[6])
+
+
if not self.report_decreased_insertions and logfc < 0:
logfc = 0
- if numpy.absolute(logfc) < self.minimum_logfc or int(float(row[5])) >= self.pvalue:
+ if numpy.absolute(logfc) < self.minimum_logfc or temp_pval >= self.pvalue:
logfc = 0
- if numpy.absolute(int(float(row[4]))) < self.minimum_logcpm:
+ if numpy.absolute(temp_logcpm) < self.minimum_logcpm:
logfc = 0
+
+ if temp_qval >= self.qvalue:
+ logfc = 0
+
+
# Get coordinates
gene_name = row[0]
diff --git a/albatradis/PresenceAbsence.py b/albatradis/PresenceAbsence.py
index 6010d8e..7a730a2 100644
--- a/albatradis/PresenceAbsence.py
+++ b/albatradis/PresenceAbsence.py
@@ -6,6 +6,7 @@
from dendropy.utility.textprocessing import StringIO
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform
+from scipy.cluster import hierarchy
import matplotlib
matplotlib.use('agg')
@@ -50,7 +51,7 @@ def create_output_files(self):
self.create_dot_graph_genes(os.path.join(self.prefix, 'logfc.dot'), self.filtered_gene_names, self.filtered_reports_to_gene_logfc)
- self.plot_distance_matrix(os.path.join(self.prefix, 'distance_matrix_dendrogram.png'))
+ self.plot_distance_matrix(os.path.join(self.prefix, 'distance_matrix_dendrogram.tre'))
self.create_nj_newick(os.path.join(self.prefix, 'nj_newick_tree.tre'))
HeatMap(self.reports_to_gene_logfc, self.gene_names,os.path.join(self.prefix, 'full_heatmap.png')).create_heat_map()
@@ -141,16 +142,17 @@ def distance_matrix(self):
dist_row.append(self.pair_wise_distance(a,b))
distances.append(dist_row)
return distances
-
+
def plot_distance_matrix(self, outputfile):
mat = numpy.array(self.distance_matrix())
dists = squareform(mat)
linkage_matrix = linkage(dists, "single")
- dendrogram(linkage_matrix, labels=self.genereports )
plt.title("Distance matrix")
- plt.savefig(outputfile)
-
- return self
+ tree = hierarchy.to_tree(linkage_matrix, False)
+ ntree = self.get_newick(tree, "", tree.dist, self.genereports)
+ with open(outputfile, 'w') as fh:
+ fh.write(ntree)
+
def nj_distance_matrix_str(self):
@@ -174,5 +176,18 @@ def create_nj_newick(self,outputfile):
fh.write(nj_tree.as_string("newick"))
return self
-
-
+
+
+ def get_newick(self, node, newick, parentdist, leaf_names):
+ if node.is_leaf():
+ return "%s:%.2f%s" % (leaf_names[node.id], parentdist - node.dist, newick)
+ else:
+ if len(newick) > 0:
+ newick = "):%.2f%s" % (parentdist - node.dist, newick)
+ else:
+ newick = ");"
+ newick = self.get_newick(node.get_left(), newick, node.dist, leaf_names)
+ newick = self.get_newick(node.get_right(), ",%s" % (newick), node.dist, leaf_names)
+ newick = "(%s" % (newick)
+ return newick
+
diff --git a/albatradis/tests/AlbaTraDIS_test.py b/albatradis/tests/AlbaTraDIS_test.py
index 955dad7..fbdb306 100644
--- a/albatradis/tests/AlbaTraDIS_test.py
+++ b/albatradis/tests/AlbaTraDIS_test.py
@@ -10,7 +10,7 @@
data_dir = os.path.join(test_modules_dir, 'data','albatradis')
class TestOptions:
- def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, verbose, prefix, minimum_logcpm, minimum_logfc, pvalue, iterations, dont_normalise_plots,minimum_block,span_gaps, emblfile, minimum_proportion_insertions, strict_signal, use_annotation, prime_feature_size):
+ def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, verbose, prefix, minimum_logcpm, minimum_logfc, pvalue, qvalue, iterations, dont_normalise_plots,minimum_block,span_gaps, emblfile, minimum_proportion_insertions, strict_signal, use_annotation, prime_feature_size):
self.plotfiles = plotfiles
self.minimum_threshold = minimum_threshold
self.window_size = window_size
@@ -20,6 +20,7 @@ def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, ve
self.minimum_logcpm = minimum_logcpm
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
+ self.qvalue = qvalue
self.iterations = iterations
self.dont_normalise_plots = dont_normalise_plots
self.minimum_block = minimum_block
@@ -37,7 +38,7 @@ def test_small_real(self):
control = os.path.join(data_dir, 'small_control.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')
- t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, False, 100))
+ t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, False, 100))
self.assertTrue(t.run())
self.assertTrue(os.path.exists('testoutput'))
self.assertTrue(os.path.exists('testoutput/gene_report.csv'))
@@ -48,7 +49,7 @@ def test_ignore_decreased_insertions(self):
control = os.path.join(data_dir, 'small_control_high_insertions.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')
- t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutputx', 1, 1, 1, 1, False,1,0, emblfile, 0.9999, False, False, 100))
+ t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutputx', 1, 1, 1, 1, 1, False,1,0, emblfile, 0.9999, False, False, 100))
self.assertTrue(t.run())
self.assertFalse(os.path.exists('.output.pdf'))
self.assertFalse(os.path.exists('.output.csv'))
@@ -61,7 +62,7 @@ def test_small_use_annotation(self):
control = os.path.join(data_dir, 'small_control.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')
- t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, True, 100))
+ t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, True, 100))
self.assertTrue(t.run())
self.assertTrue(os.path.exists('testoutput'))
shutil.rmtree("testoutput")
diff --git a/albatradis/tests/PlotLog_test.py b/albatradis/tests/PlotLog_test.py
index 02b7397..0234e0a 100644
--- a/albatradis/tests/PlotLog_test.py
+++ b/albatradis/tests/PlotLog_test.py
@@ -10,32 +10,31 @@
class TestPlotLog(unittest.TestCase):
def test_span_gaps_merge(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,9,0,0,0,0,8,8,8,8,0,0,0,0,0])
self.assertEqual([0,0,0,9,9,9,9,2,2,2,2,8,8,8,8,0,0,0,0,0],l )
def test_span_gaps_nomerge(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,0,0,0])
self.assertEqual([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,0,0,0],l )
def test_span_gaps_block_at_end(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,7,7,7])
self.assertEqual([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,2,2,7,7,7],l )
def test_span_gaps_merge_neg(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,-9,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0])
self.assertEqual([0,0,0,-9,-9,-9,-9,-2,-2,-2,-2,-8,-8,-8,-8,0,0,0,0,0],l )
def test_span_gaps_nomerge_neg(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0])
self.assertEqual([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0],l )
def test_span_gaps_block_at_end_neg(self):
- p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
+ p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,-7,-7,-7])
self.assertEqual([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,-2,-2,-7,-7,-7],l )
-
\ No newline at end of file
diff --git a/albatradis/tests/PresenceAbsence_test.py b/albatradis/tests/PresenceAbsence_test.py
index 5138646..16cbb0f 100644
--- a/albatradis/tests/PresenceAbsence_test.py
+++ b/albatradis/tests/PresenceAbsence_test.py
@@ -2,6 +2,8 @@
import os
import filecmp
import shutil
+
+
from albatradis.PresenceAbsence import PresenceAbsence
class ErrorReadingFile (Exception): pass
@@ -9,6 +11,7 @@ class InvalidFileFormat (Exception): pass
data_dir = os.path.join('albatradis','tests', 'data','presenceabsence')
+
class TestPresenceAbsence(unittest.TestCase):
def test_valid_file(self):
@@ -20,17 +23,22 @@ def test_valid_file(self):
emblfile = os.path.join(data_dir, 'reference.embl')
all_outputfile = os.path.join('testoutput', 'all_logfc.csv')
- filtered_outputfile =os.path.join('testoutput', 'filtered_logfc.csv')
+ filtered_outputfile = os.path.join('testoutput', 'filtered_logfc.csv')
+ dendrogram = os.path.join('testoutput', 'distance_matrix_dendrogram.tre')
+ nj_tree = os.path.join('testoutput', 'nj_newick_tree.tre')
p = PresenceAbsence(genereports, emblfile, False, 'testoutput')
-
+ exp_lfc=os.path.join(data_dir, 'expected_logfc.csv')
+
self.assertEqual(962, len(p.gene_names))
self.assertEqual(1079, len(p.features))
self.assertTrue(p.create_output_files())
-
+ self.assertTrue(os.path.exists(exp_lfc))
self.assertTrue(all_outputfile)
self.assertTrue(filtered_outputfile)
- self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'expected_logfc.csv'), all_outputfile))
+ self.assertTrue(dendrogram)
+ self.assertTrue(nj_tree)
+ self.assertTrue(filecmp.cmp(exp_lfc, all_outputfile))
#shutil.rmtree('testoutput')
diff --git a/scripts/albatradis b/scripts/albatradis
index 4f45a03..28ffbbc 100755
--- a/scripts/albatradis
+++ b/scripts/albatradis
@@ -30,6 +30,7 @@ parser.add_argument('--minimum_proportion_insertions', '-d', help='If the propor
parser.add_argument('--dont_normalise_plots', '-n', action='store_true', help='Dont normalise input plots', default = False)
parser.add_argument('--prefix', '-o', help='Output directory prefix', type=str, default='output')
parser.add_argument('--pvalue', '-p', help='Dont report anything above this p-value', type=float, default=0.05)
+parser.add_argument('--qvalue', '-q', help='Dont report anything above this q-value', type=float, default=0.05)
parser.add_argument('--strict_signal', '-g', action='store_true', help='A result must be present in the combined plots to be returned', default = False)
parser.add_argument('--use_annotation', '-a', help='Use the reference annotation rather than a sliding window', action='store_true', default = False)
parser.add_argument('--prime_feature_size', '-z', help='Feature size when adding 5/3 prime block when --use_annotation', type=int, default=198)