Skip to content

Commit

Permalink
improve ensemble speed for large images
Browse files Browse the repository at this point in the history
  • Loading branch information
haberlmatt committed Oct 15, 2018
1 parent 212e616 commit ef4e847
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 22 deletions.
34 changes: 13 additions & 21 deletions EnsemblePredictions.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,14 @@
% last argument has to be the outputdirectory where the average files are stored
%
% -----------------------------------------------------------------------------
%% NCMIR, UCSD -- Author: M Haberl -- Data: 10/2017
%% NCMIR, UCSD -- Author: M Haberl -- Data: 10/2017 -- Update: 10/2018
% -----------------------------------------------------------------------------
%

%% Initialize
pkg load hdf5oct
pkg load image

script_dir = fileparts(make_absolute_filename(program_invocation_name()));
addpath(genpath(script_dir));
addpath(genpath(strcat(script_dir,filesep(),'scripts',filesep())));
addpath(genpath(strcat(script_dir,filesep(),'scripts',filesep(),'functions')));
tic

arg_list = argv ();
Expand All @@ -29,7 +25,7 @@
for i = 1:(numel(arg_list)-1)
to_process{i} = arg_list{i};
if ~isdir(arg_list{i})
fprintf('%s not a directory\nPlease use: EnsemblePredictions ./inputdir1 ./inputdir2 ./inputdir3 ./outputdir\n',arg_list{i});
fprintf('%s not a directory\nPlease check if predictions ran successfully or ensure to use: EnsemblePredictions ./inputdir1 ./inputdir2 ./inputdir3 ./outputdir\n',arg_list{i});
return
end
list{i} = filter_files(read_files_in_folder(to_process{i}),'.png');
Expand All @@ -40,21 +36,17 @@

%% =============== Generate ensemble predictions =================================

%merged_file_save=fullfile(outfolder, 'EnsemblePredict.tiff');
%if exist(merged_file_save, 'file'),delete(merged_file_save); end
%outputdir = fileparts(to_process{1}); % Writes automatically in the parent directory of the first prediction folder
total_zplanes = size(list{1},1);
for z = 1:total_zplanes
for proc = 1:numel(to_process)
image_name = fullfile(to_process{proc}, list{proc}(z).name);
cumul_plane(:,:,proc) = imread(image_name); %Cumulate all average predictions of this plane
end
prob_map = uint8(mean(cumul_plane,3));

save_file_save = fullfile(outputdir, list{1}(z).name);
fprintf('Saving Image # %s of %s: %s\n', num2str(z), num2str(total_zplanes),save_file_save);
imwrite(prob_map, save_file_save);
clear cumul_plane prob_map;
pysemble = strcat(script_dir,filesep(),'scripts',filesep(),'functions',filesep(),'ensemble.py');

tempmat_infile = fullfile(fileparts(outputdir),'infolders.txt');
delete(tempmat_infile);

fid = fopen(tempmat_infile, 'a')
for fl = 1:numel(to_process)
fprintf(fid, strcat(fullfile(to_process{fl}),'\n'));
end
fclose(fid);

system(sprintf('%s %s %s',pysemble, tempmat_infile, outputdir));

fprintf('Elapsed time for merging predictions is %06d seconds.\n', round(toc));
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.6.2
v1.6.3rc1
102 changes: 102 additions & 0 deletions scripts/functions/ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python

"""
EnsemblePredictions for CDeep3M
different predictions coming from files e.g. from 1fm 3fm and 5fm will be averaged here
flexible number of inputs
last argument has to be the outputdirectory where the average files will be stored
-----------------------------------------------------------------------------
NCMIR, UCSD -- Author: M Haberl -- Data: 10/2018
----------------------------------------------------------------------------
"""
import sys
import os
import argparse
import cv2
import requests
from joblib import Parallel, delayed
# from multiprocessing import Pool, TimeoutError
# import time
import numpy as np
from PIL import Image
from time import time

INSTANCE_TYPE_URL = 'http://169.254.169.254/latest/meta-data/instance-type'

def _get_number_of_tasks_to_run_based_on_instance_type(theargs):
"""Gets instance type and returns number of parallel
tasks to run based on that value. If none are found then
default value of 2 is used.
"""
try:
r = requests.get(theargs.instancetypeurl,
timeout=theargs.instancetypeurltimeout)
if r.status_code is 200:
if 'p3.2xlarge' in r.text:
return 4
if 'p3.8xlarge' in r.text:
return 12
if 'p3.16xlarge' in r.text:
return 20
except Exception as e:
sys.stderr.write('Got exception checking instance type: ' +
str(e) + '\n')
return 4


def _parse_arguments(desc, theargs):
"""Parses command line arguments using argparse
"""
help_formatter = argparse.RawDescriptionHelpFormatter
parser = argparse.ArgumentParser(description=desc,
formatter_class=help_formatter)
parser.add_argument('inputlistfile',
help='File containing list of paths')
parser.add_argument('outputfolder',
help='Path to write output in')
parser.add_argument('--instancetypeurl', default=INSTANCE_TYPE_URL,
help='URL to query for meta data instance type ' +
'(default ' + INSTANCE_TYPE_URL + ')')
parser.add_argument('--instancetypeurltimeout',default='1.0',type=float,
help='Timeout in seconds for checking instancetypeurl' +
' default 1.0')
return parser.parse_args(theargs)

desc = """
Given a file with a list of folder (inputlistfile),
"""

# Parse arguments
theargs = _parse_arguments(desc, sys.argv[1:])
outfolder = theargs.outputfolder;

file = open(theargs.inputlistfile, "r")
infolders = [line.rstrip('\n') for line in file]
file.close()

folder1 = infolders[0];
sys.stdout.write('Reading ' + str(folder1) + ' \n')
filelist1 = [fileb for fileb in os.listdir(folder1) if fileb.endswith('.png')]
print(infolders)
print(filelist1)
sys.stdout.write('Merging ' + str(len(filelist1)) + ' files \n')

def average_img(x):
sys.stdout.write('Loading: ' + str(os.path.join(infolders[0],filelist1[x])) + '\n')
t0 = time()
temp = cv2.imread(os.path.join(infolders[0],filelist1[x]))
# img[:,:,0]
for n in range(1, len(infolders)):
temp = np.dstack((temp, cv2.imread(os.path.join(infolders[n],filelist1[x]))))
print time()-t0
print temp.shape
arr = np.array(np.mean(temp, axis=(2)), dtype=np.uint8)
#aver = Image.fromarray(arr)
cv2.imwrite(os.path.join(outfolder,filelist1[x]), arr)
return

p_tasks = _get_number_of_tasks_to_run_based_on_instance_type(theargs)
sys.stdout.write('Running ' + str(p_tasks) + ' parallel tasks\n')
results = Parallel(n_jobs=p_tasks)(delayed(average_img)(i) for i in range(0, len(filelist1)))

0 comments on commit ef4e847

Please sign in to comment.