Skip to content

Commit

Permalink
Merge pull request #5 from sbastkowski/master
Browse files Browse the repository at this point in the history
Added qvalue option and dendrogram being saved as newick
  • Loading branch information
andrewjpage authored Mar 31, 2019
2 parents 449d007 + dcbe3a7 commit c366bfe
Show file tree
Hide file tree
Showing 17 changed files with 457 additions and 31 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ ENV/

.DS_Store

# Idea project settings
.idea/

# mkdocs documentation
/site

Expand Down
12 changes: 12 additions & 0 deletions .idea/MyAlbaTradis.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

347 changes: 347 additions & 0 deletions .idea/workspace.xml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ RUN Rscript -e "source('http://bioconductor.org/biocLite.R')" -e "biocLite(c('ed
# AlbaTraDIS
RUN pip3 install cython
RUN pip3 install git+git://github.com/quadram-institute-bioscience/albatradis.git
WORKDIR /work
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ optional arguments:
--pvalue PVALUE, -p PVALUE
Dont report anything above this p-value (default:
0.05)
--qvalue QVALUE, -q QVALUE
Dont report anything above this q-value (default:
0.05)
--strict_signal, -g A result must be present in the combined plots to be
returned (default: False)
--use_annotation, -a Use the reference annotation rather than a sliding
Expand Down
5 changes: 3 additions & 2 deletions albatradis/AlbaTraDIS.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(self, options):
self.verbose = options.verbose
self.minimum_logfc = options.minimum_logfc
self.pvalue = options.pvalue
self.qvalue = options.qvalue
self.prefix = options.prefix
self.minimum_logcpm = options.minimum_logcpm
self.iterations = options.iterations
Expand Down Expand Up @@ -49,14 +50,14 @@ def run(self):
report_decreased_insertions = n.decreased_insertion_reporting()

if self.iterations == 1:
bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.prefix, self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.qvalue, self.prefix, self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi.run()
self.blocks = bi.blocks
plotfiles = bi.output_plots.values()
else:

for i in range(1,self.iterations+1):
bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.prefix + "_" +str(i), self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi = BlockInsertions(self.logger, plotfiles, self.minimum_threshold, self.window_size, self.window_interval, self.verbose, self.minimum_logfc, self.pvalue, self.qvalue, self.prefix + "_" +str(i), self.minimum_logcpm, self.minimum_block, self.span_gaps, self.emblfile, report_decreased_insertions,self.strict_signal,self.use_annotation, self.prime_feature_size)
bi.run()
self.blocks = bi.blocks
plotfiles = bi.output_plots.values()
Expand Down
5 changes: 3 additions & 2 deletions albatradis/BlockInsertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self, forward, reverse, combined, embl_filename):
self.embl_filename = embl_filename

class BlockInsertions:
def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_interval, verbose, minimum_logfc, pvalue, prefix, minimum_logcpm, minimum_block,span_gaps, emblfile, report_decreased_insertions, strict_signal, use_annotation, prime_feature_size):
def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_interval, verbose, minimum_logfc, pvalue, qvalue, prefix, minimum_logcpm, minimum_block,span_gaps, emblfile, report_decreased_insertions, strict_signal, use_annotation, prime_feature_size):
self.logger = logger
self.plotfiles = plotfiles
self.minimum_threshold = minimum_threshold
Expand All @@ -39,6 +39,7 @@ def __init__(self, logger,plotfiles, minimum_threshold, window_size, window_inte
self.verbose = verbose
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
self.qvalue = qvalue
self.prefix = prefix
self.minimum_logcpm = minimum_logcpm
self.minimum_block = minimum_block
Expand Down Expand Up @@ -136,7 +137,7 @@ def generate_logfc_plot(self, analysis_type, essentiality_files):

t = TradisComparison(files[:mid],files[mid:], self.verbose, self.minimum_block, only_ess_files[:mid], only_ess_files[mid:])
t.run()
p = PlotLog(t.output_filename, self.genome_length, self.minimum_logfc, self.pvalue, self.minimum_logcpm, self.window_size, self.span_gaps, self.report_decreased_insertions, annotation_files[0])
p = PlotLog(t.output_filename, self.genome_length, self.minimum_logfc, self.pvalue, self.qvalue, self.minimum_logcpm, self.window_size, self.span_gaps, self.report_decreased_insertions, annotation_files[0])
p.construct_plot_file()
renamed_csv_file = os.path.join(self.prefix, analysis_type + ".csv")
renamed_plot_file = os.path.join(self.prefix, analysis_type + ".plot")
Expand Down
20 changes: 16 additions & 4 deletions albatradis/PlotLog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ def __init__(self, start, end, logfc_value):
self.logfc_value = logfc_value

class PlotLog:
def __init__(self, comparison_filename, genome_length, minimum_logfc, pvalue, minimum_logcpm, window_size, span_gaps, report_decreased_insertions, embl_file):
def __init__(self, comparison_filename, genome_length, minimum_logfc, pvalue, qvalue, minimum_logcpm, window_size, span_gaps, report_decreased_insertions, embl_file):
self.comparison_filename = comparison_filename
self.genome_length = genome_length
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
self.qvalue = qvalue
self.minimum_logcpm = minimum_logcpm
self.window_size = window_size
self.span_gaps = span_gaps
Expand All @@ -35,6 +36,7 @@ def construct_plot_file(self):

self.create_csv(forward_logfc, reverse_logfc)


return self

def create_csv(self, forward_logfc, reverse_logfc):
Expand Down Expand Up @@ -134,16 +136,26 @@ def read_comparison_file(self):
if logfc == 'logFC':
continue

logfc = int(float(row[3]))
logfc = float(row[3])
temp_pval = float(row[5])
temp_logcpm = float(row[4])
temp_qval = float(row[6])



if not self.report_decreased_insertions and logfc < 0:
logfc = 0

if numpy.absolute(logfc) < self.minimum_logfc or int(float(row[5])) >= self.pvalue:
if numpy.absolute(logfc) < self.minimum_logfc or temp_pval >= self.pvalue:
logfc = 0

if numpy.absolute(int(float(row[4]))) < self.minimum_logcpm:
if numpy.absolute(temp_logcpm) < self.minimum_logcpm:
logfc = 0

if temp_qval >= self.qvalue:
logfc = 0



# Get coordinates
gene_name = row[0]
Expand Down
31 changes: 23 additions & 8 deletions albatradis/PresenceAbsence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dendropy.utility.textprocessing import StringIO
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform
from scipy.cluster import hierarchy

import matplotlib
matplotlib.use('agg')
Expand Down Expand Up @@ -50,7 +51,7 @@ def create_output_files(self):

self.create_dot_graph_genes(os.path.join(self.prefix, 'logfc.dot'), self.filtered_gene_names, self.filtered_reports_to_gene_logfc)

self.plot_distance_matrix(os.path.join(self.prefix, 'distance_matrix_dendrogram.png'))
self.plot_distance_matrix(os.path.join(self.prefix, 'distance_matrix_dendrogram.tre'))
self.create_nj_newick(os.path.join(self.prefix, 'nj_newick_tree.tre'))

HeatMap(self.reports_to_gene_logfc, self.gene_names,os.path.join(self.prefix, 'full_heatmap.png')).create_heat_map()
Expand Down Expand Up @@ -141,16 +142,17 @@ def distance_matrix(self):
dist_row.append(self.pair_wise_distance(a,b))
distances.append(dist_row)
return distances

def plot_distance_matrix(self, outputfile):
mat = numpy.array(self.distance_matrix())
dists = squareform(mat)
linkage_matrix = linkage(dists, "single")
dendrogram(linkage_matrix, labels=self.genereports )
plt.title("Distance matrix")
plt.savefig(outputfile)

return self
tree = hierarchy.to_tree(linkage_matrix, False)
ntree = self.get_newick(tree, "", tree.dist, self.genereports)
with open(outputfile, 'w') as fh:
fh.write(ntree)



def nj_distance_matrix_str(self):
Expand All @@ -174,5 +176,18 @@ def create_nj_newick(self,outputfile):
fh.write(nj_tree.as_string("newick"))

return self




def get_newick(self, node, newick, parentdist, leaf_names):
if node.is_leaf():
return "%s:%.2f%s" % (leaf_names[node.id], parentdist - node.dist, newick)
else:
if len(newick) > 0:
newick = "):%.2f%s" % (parentdist - node.dist, newick)
else:
newick = ");"
newick = self.get_newick(node.get_left(), newick, node.dist, leaf_names)
newick = self.get_newick(node.get_right(), ",%s" % (newick), node.dist, leaf_names)
newick = "(%s" % (newick)
return newick

9 changes: 5 additions & 4 deletions albatradis/tests/AlbaTraDIS_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
data_dir = os.path.join(test_modules_dir, 'data','albatradis')

class TestOptions:
def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, verbose, prefix, minimum_logcpm, minimum_logfc, pvalue, iterations, dont_normalise_plots,minimum_block,span_gaps, emblfile, minimum_proportion_insertions, strict_signal, use_annotation, prime_feature_size):
def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, verbose, prefix, minimum_logcpm, minimum_logfc, pvalue, qvalue, iterations, dont_normalise_plots,minimum_block,span_gaps, emblfile, minimum_proportion_insertions, strict_signal, use_annotation, prime_feature_size):
self.plotfiles = plotfiles
self.minimum_threshold = minimum_threshold
self.window_size = window_size
Expand All @@ -20,6 +20,7 @@ def __init__(self, plotfiles, minimum_threshold, window_size,window_interval, ve
self.minimum_logcpm = minimum_logcpm
self.minimum_logfc = minimum_logfc
self.pvalue = pvalue
self.qvalue = qvalue
self.iterations = iterations
self.dont_normalise_plots = dont_normalise_plots
self.minimum_block = minimum_block
Expand All @@ -37,7 +38,7 @@ def test_small_real(self):
control = os.path.join(data_dir, 'small_control.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')

t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, False, 100))
t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, False, 100))
self.assertTrue(t.run())
self.assertTrue(os.path.exists('testoutput'))
self.assertTrue(os.path.exists('testoutput/gene_report.csv'))
Expand All @@ -48,7 +49,7 @@ def test_ignore_decreased_insertions(self):
control = os.path.join(data_dir, 'small_control_high_insertions.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')

t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutputx', 1, 1, 1, 1, False,1,0, emblfile, 0.9999, False, False, 100))
t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutputx', 1, 1, 1, 1, 1, False,1,0, emblfile, 0.9999, False, False, 100))
self.assertTrue(t.run())
self.assertFalse(os.path.exists('.output.pdf'))
self.assertFalse(os.path.exists('.output.csv'))
Expand All @@ -61,7 +62,7 @@ def test_small_use_annotation(self):
control = os.path.join(data_dir, 'small_control.insert_site_plot.gz')
emblfile = os.path.join(data_dir, 'annotation.embl')

t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, True, 100))
t = AlbaTraDIS(TestOptions([case, control], 3, 100, 100, False, 'testoutput', 1, 1, 1, 1, 1, True,1,0, emblfile, 0.1, False, True, 100))
self.assertTrue(t.run())
self.assertTrue(os.path.exists('testoutput'))
shutil.rmtree("testoutput")
Expand Down
13 changes: 6 additions & 7 deletions albatradis/tests/PlotLog_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,31 @@
class TestPlotLog(unittest.TestCase):

def test_span_gaps_merge(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,9,0,0,0,0,8,8,8,8,0,0,0,0,0])
self.assertEqual([0,0,0,9,9,9,9,2,2,2,2,8,8,8,8,0,0,0,0,0],l )

def test_span_gaps_nomerge(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,0,0,0])
self.assertEqual([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,0,0,0],l )

def test_span_gaps_block_at_end(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,0,0,7,7,7])
self.assertEqual([0,0,0,9,9,9,0,0,0,0,0,8,8,8,8,2,2,7,7,7],l )

def test_span_gaps_merge_neg(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,-9,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0])
self.assertEqual([0,0,0,-9,-9,-9,-9,-2,-2,-2,-2,-8,-8,-8,-8,0,0,0,0,0],l )

def test_span_gaps_nomerge_neg(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0])
self.assertEqual([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,0,0,0],l )

def test_span_gaps_block_at_end_neg(self):
p = PlotLog('x', 20, 2, 0.05, 1, 4, 1, True, None)
p = PlotLog('x', 20, 2, 0.05, 0.05, 1, 4, 1, True, None)
l = p.span_block_gaps([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,0,0,-7,-7,-7])
self.assertEqual([0,0,0,-9,-9,-9,0,0,0,0,0,-8,-8,-8,-8,-2,-2,-7,-7,-7],l )

16 changes: 12 additions & 4 deletions albatradis/tests/PresenceAbsence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import os
import filecmp
import shutil


from albatradis.PresenceAbsence import PresenceAbsence

class ErrorReadingFile (Exception): pass
class InvalidFileFormat (Exception): pass

data_dir = os.path.join('albatradis','tests', 'data','presenceabsence')


class TestPresenceAbsence(unittest.TestCase):

def test_valid_file(self):
Expand All @@ -20,17 +23,22 @@ def test_valid_file(self):
emblfile = os.path.join(data_dir, 'reference.embl')

all_outputfile = os.path.join('testoutput', 'all_logfc.csv')
filtered_outputfile =os.path.join('testoutput', 'filtered_logfc.csv')
filtered_outputfile = os.path.join('testoutput', 'filtered_logfc.csv')
dendrogram = os.path.join('testoutput', 'distance_matrix_dendrogram.tre')
nj_tree = os.path.join('testoutput', 'nj_newick_tree.tre')
p = PresenceAbsence(genereports, emblfile, False, 'testoutput')

exp_lfc=os.path.join(data_dir, 'expected_logfc.csv')

self.assertEqual(962, len(p.gene_names))
self.assertEqual(1079, len(p.features))
self.assertTrue(p.create_output_files())

self.assertTrue(os.path.exists(exp_lfc))

self.assertTrue(all_outputfile)
self.assertTrue(filtered_outputfile)
self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'expected_logfc.csv'), all_outputfile))
self.assertTrue(dendrogram)
self.assertTrue(nj_tree)
self.assertTrue(filecmp.cmp(exp_lfc, all_outputfile))

#shutil.rmtree('testoutput')

1 change: 1 addition & 0 deletions scripts/albatradis
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ parser.add_argument('--minimum_proportion_insertions', '-d', help='If the propor
parser.add_argument('--dont_normalise_plots', '-n', action='store_true', help='Dont normalise input plots', default = False)
parser.add_argument('--prefix', '-o', help='Output directory prefix', type=str, default='output')
parser.add_argument('--pvalue', '-p', help='Dont report anything above this p-value', type=float, default=0.05)
parser.add_argument('--qvalue', '-q', help='Dont report anything above this q-value', type=float, default=0.05)
parser.add_argument('--strict_signal', '-g', action='store_true', help='A result must be present in the combined plots to be returned', default = False)
parser.add_argument('--use_annotation', '-a', help='Use the reference annotation rather than a sliding window', action='store_true', default = False)
parser.add_argument('--prime_feature_size', '-z', help='Feature size when adding 5/3 prime block when --use_annotation', type=int, default=198)
Expand Down

0 comments on commit c366bfe

Please sign in to comment.