improve ensemble speed for large images

CRBS · Oct 15, 2018 · ef4e847 · ef4e847
1 parent 212e616
commit ef4e847
Show file tree

Hide file tree

Showing 3 changed files with 116 additions and 22 deletions.
diff --git a/EnsemblePredictions.m b/EnsemblePredictions.m
@@ -5,18 +5,14 @@
 % last argument has to be the outputdirectory where the average files are stored
 %
 % -----------------------------------------------------------------------------
-%% NCMIR, UCSD -- Author: M Haberl -- Data: 10/2017
+%% NCMIR, UCSD -- Author: M Haberl -- Data: 10/2017 -- Update: 10/2018
 % -----------------------------------------------------------------------------
 %
 
 %% Initialize
-pkg load hdf5oct
-pkg load image
 
 script_dir = fileparts(make_absolute_filename(program_invocation_name()));
 addpath(genpath(script_dir));
-addpath(genpath(strcat(script_dir,filesep(),'scripts',filesep())));
-addpath(genpath(strcat(script_dir,filesep(),'scripts',filesep(),'functions')));
 tic
 
 arg_list = argv ();
@@ -29,7 +25,7 @@
 for i = 1:(numel(arg_list)-1)
     to_process{i} = arg_list{i};
     if ~isdir(arg_list{i})
-    fprintf('%s not a directory\nPlease use: EnsemblePredictions ./inputdir1 ./inputdir2 ./inputdir3 ./outputdir\n',arg_list{i});
+    fprintf('%s not a directory\nPlease check if predictions ran successfully or ensure to use: EnsemblePredictions ./inputdir1 ./inputdir2 ./inputdir3 ./outputdir\n',arg_list{i});
     return
     end
     list{i} = filter_files(read_files_in_folder(to_process{i}),'.png');
@@ -40,21 +36,17 @@
 
 %% =============== Generate ensemble predictions =================================
 
-%merged_file_save=fullfile(outfolder, 'EnsemblePredict.tiff');
-%if exist(merged_file_save, 'file'),delete(merged_file_save); end
-%outputdir =  fileparts(to_process{1}); % Writes automatically in the parent directory of the first prediction folder 
-total_zplanes = size(list{1},1);
-for z = 1:total_zplanes
-    for proc = 1:numel(to_process)                
-        image_name = fullfile(to_process{proc}, list{proc}(z).name);
-        cumul_plane(:,:,proc) = imread(image_name);   %Cumulate all average predictions of this plane
-    end    
-        prob_map = uint8(mean(cumul_plane,3));
-
-        save_file_save = fullfile(outputdir, list{1}(z).name);
-        fprintf('Saving Image # %s of %s: %s\n', num2str(z), num2str(total_zplanes),save_file_save);
-	imwrite(prob_map, save_file_save);
-        clear cumul_plane prob_map;
+pysemble = strcat(script_dir,filesep(),'scripts',filesep(),'functions',filesep(),'ensemble.py');
+
+tempmat_infile = fullfile(fileparts(outputdir),'infolders.txt');
+delete(tempmat_infile);
+
+fid = fopen(tempmat_infile, 'a')
+for fl = 1:numel(to_process)             
+fprintf(fid, strcat(fullfile(to_process{fl}),'\n'));
 end
+fclose(fid);
+
+system(sprintf('%s %s %s',pysemble, tempmat_infile, outputdir));
 
 fprintf('Elapsed time for merging predictions is %06d seconds.\n', round(toc));
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.6.2
+v1.6.3rc1
diff --git a/scripts/functions/ensemble.py b/scripts/functions/ensemble.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+"""
+EnsemblePredictions for CDeep3M
+different predictions coming from files e.g. from 1fm 3fm and 5fm will be averaged here
+flexible number of inputs
+last argument has to be the outputdirectory where the average files will be stored
+
+-----------------------------------------------------------------------------
+ NCMIR, UCSD -- Author: M Haberl -- Data: 10/2018
+ ----------------------------------------------------------------------------
+ 
+"""
+import sys
+import os
+import argparse
+import cv2
+import requests
+from joblib import Parallel, delayed
+# from multiprocessing import Pool, TimeoutError
+# import time
+import numpy as np
+from PIL import Image
+from time import time
+
+INSTANCE_TYPE_URL = 'http://169.254.169.254/latest/meta-data/instance-type'
+
+def _get_number_of_tasks_to_run_based_on_instance_type(theargs):
+    """Gets instance type and returns number of parallel
+       tasks to run based on that value. If none are found then
+       default value of 2 is used.
+    """
+    try:
+        r = requests.get(theargs.instancetypeurl,
+                         timeout=theargs.instancetypeurltimeout)
+        if r.status_code is 200:
+            if 'p3.2xlarge' in r.text:
+                return 4
+            if 'p3.8xlarge' in r.text:
+                return 12
+            if 'p3.16xlarge' in r.text:
+                return 20
+    except Exception as e:
+        sys.stderr.write('Got exception checking instance type: ' +
+                         str(e) + '\n')
+    return 4
+
+
+def _parse_arguments(desc, theargs):
+    """Parses command line arguments using argparse
+    """
+    help_formatter = argparse.RawDescriptionHelpFormatter
+    parser = argparse.ArgumentParser(description=desc,
+                                     formatter_class=help_formatter)
+    parser.add_argument('inputlistfile',
+                        help='File containing list of paths')
+    parser.add_argument('outputfolder',
+                        help='Path to write output in')
+    parser.add_argument('--instancetypeurl', default=INSTANCE_TYPE_URL,
+                        help='URL to query for meta data instance type ' +
+                             '(default ' + INSTANCE_TYPE_URL + ')')
+    parser.add_argument('--instancetypeurltimeout',default='1.0',type=float,
+                        help='Timeout in seconds for checking instancetypeurl' +
+                             ' default 1.0')
+    return parser.parse_args(theargs)
+
+desc = """
+Given a file with a list of folder (inputlistfile), 
+"""
+
+# Parse arguments
+theargs = _parse_arguments(desc, sys.argv[1:])
+outfolder = theargs.outputfolder;
+
+file = open(theargs.inputlistfile, "r")
+infolders = [line.rstrip('\n') for line in file]
+file.close()
+
+folder1 = infolders[0];
+sys.stdout.write('Reading ' + str(folder1) + ' \n')
+filelist1 = [fileb for fileb in os.listdir(folder1) if fileb.endswith('.png')]
+print(infolders)
+print(filelist1)
+sys.stdout.write('Merging ' + str(len(filelist1)) + ' files \n')    
+
+def average_img(x):
+    sys.stdout.write('Loading: ' + str(os.path.join(infolders[0],filelist1[x])) + '\n')        
+    t0 = time()    
+    temp = cv2.imread(os.path.join(infolders[0],filelist1[x]))
+    # img[:,:,0]
+    for n in range(1, len(infolders)):
+        temp = np.dstack((temp, cv2.imread(os.path.join(infolders[n],filelist1[x]))))
+        print time()-t0
+        print temp.shape    
+    arr = np.array(np.mean(temp, axis=(2)), dtype=np.uint8)
+    #aver = Image.fromarray(arr)
+    cv2.imwrite(os.path.join(outfolder,filelist1[x]), arr)
+    return
+
+p_tasks = _get_number_of_tasks_to_run_based_on_instance_type(theargs)
+sys.stdout.write('Running ' + str(p_tasks) + ' parallel tasks\n')
+results = Parallel(n_jobs=p_tasks)(delayed(average_img)(i) for i in range(0, len(filelist1)))