seung-lab · xiuliren · Jun 2, 2016 · Jun 7, 2016 · Jun 7, 2016 · Jun 7, 2016
diff --git a/python/core/Makefile b/python/core/Makefile
@@ -8,7 +8,7 @@ MKL_FLAGS	=	-static-intel -mkl=sequential -DZNN_USE_MKL_FFT -DZNN_USE_MKL_NATIVE
 FFTW_FLAGS	= 	-lfftw3 -lfftw3f
 OPT_FLAGS	=	-O3 -std=c++1y -DZNN_CUBE_POOL_LOCKFREE -DNDEBUG
 SINGLE_FLAG	=	-DZNN_USE_FLOATS
-DONT_CACHE_FLAG	=	-DZNN_DONT_CACHE_FFTS
+NOCACHE_FLAG	=	-DZNN_DONT_CACHE_FFTS
 OTH_FLAGS	=	-Wall -shared -fPIC -Wno-unused-result -Wno-unused-local-typedefs -DBOOST_DISABLE_ASSERTS
 LIBS		=	-lpthread -pthread -lpython2.7 -lboost_python -lboost_numpy
 
@@ -20,7 +20,10 @@ endif
 pyznn: $(SFL)
 	$(CPP) -o $(ODIR)/pyznn.so $(SFL) $(CPP_FLAGS) $(INC_FLAGS) $(FFTW_FLAGS) $(LIB_FLAGS) $(OPT_FLAGS) $(SINGLE_FLAG) $(OTH_FLAGS) $(LIBS)
 
-.PHONY: double mkl mkl_forward  mkl_double clean
+.PHONY: mkl_forward double mkl mkl_double clean
+
+mkl_forward: $(SFL)
+	/opt/intel/bin/icc -o $(ODIR)/pyznn.so $(SFL) $(MKL_FLAGS) $(INC_FLAGS) $(LIB_FLAGS) $(OPT_FLAGS) $(SINGLE_FLAG) $(NOCACHE_FLAG) $(OTH_FLAGS) $(LIBS)
 
 double: $(SFL)
 	$(CPP) -o $(ODIR)/pyznn.so $(SFL) $(CPP_FLAGS) $(INC_FLAGS) $(FFTW_FLAGS) $(LIB_FLAGS) $(OPT_FLAGS) $(OTH_FLAGS) $(LIBS)

diff --git a/python/core/pyznn.cpp b/python/core/pyznn.cpp
@@ -116,7 +116,10 @@ std::shared_ptr< network > CNet_Init(
                     reinterpret_cast<std::int64_t*>(outsz_a.get_data())[2]
         );
     if ( tc == 0 )
-    	tc = std::thread::hardware_concurrency();
+    {
+        tc = std::thread::hardware_concurrency();
+        std::cout << "thread number: " << tc << std::endl;
+    }
 
     // force fft or optimize
     if ( force_fft )
@@ -160,11 +163,12 @@ std::shared_ptr< network > CNet_Init(
 std::shared_ptr<network> CNet_loadopts( bp::tuple const & opts,
                                         std::string const net_config_file,
                                         np::ndarray const & outsz_a,
-                                        std::size_t const tc,
+                                        std::size_t tc,
                                         bool const is_optimize = true,
                                         std::uint8_t const phs = 0,
                                         bool const force_fft = false )
 {
+    if ( tc == 0 ) tc = std::thread::hardware_concurrency();
 
     bp::list node_opts_list = bp::extract<bp::list>( opts[0] );
     bp::list edge_opts_list = bp::extract<bp::list>( opts[1] );
@@ -365,18 +369,18 @@ BOOST_PYTHON_MODULE(pyznn)
     bp::class_<network, boost::shared_ptr<network>, boost::noncopyable>("CNet",bp::no_init)
         .def("__init__", bp::make_constructor(&CNet_Init))
         .def("__init__", bp::make_constructor(&CNet_loadopts))
-        .def("get_fov",  &CNet_fov)
-        .def("forward",  &CNet_forward)
-        .def("backward", &CNet_backward)
-        .def("set_eta",                 &network::set_eta)
-        .def("set_phase",               &CNet_set_phase)
+        .def("get_fov",             &CNet_fov)
+        .def("forward",             &CNet_forward)
+        .def("backward",            &CNet_backward)
+        .def("set_eta",             &network::set_eta)
+        .def("set_phase",           &CNet_set_phase)
         .def("set_momentum",		&network::set_momentum)
         .def("set_weight_decay",	&network::set_weight_decay )
         .def("get_inputs_setsz", 	&CNet_get_inputs_setsz)
         .def("get_input_num", 		&CNet_get_input_num)
         .def("get_outputs_setsz", 	&CNet_get_outputs_setsz)
         .def("get_output_num", 		&CNet_get_output_num)
-        .def("get_opts",		&CNet_getopts)
+        .def("get_opts",            &CNet_getopts)
         ;
     def("get_rand_error", pyget_rand_error);
 }
diff --git a/python/core/pyznn_utils.hpp b/python/core/pyznn_utils.hpp
@@ -111,10 +111,10 @@ bp::tuple bias_string_to_np( std::string const & bin,
 //Same thing for convolution filters
 // Assumes the input size is THREE dimensional
 bp::tuple filter_string_to_np( std::string const & bin,
-	std::vector<std::size_t> size,
-	std::size_t nodes_in,
-	std::size_t nodes_out,
-	bp::object const & self)
+							   std::vector<std::size_t> size,
+							   std::size_t nodes_in,
+							   std::size_t nodes_out,
+							   bp::object const & self )
 {
 	real const * data = reinterpret_cast<real const *>(bin.data());
 
@@ -129,7 +129,7 @@ bp::tuple filter_string_to_np( std::string const & bin,
 		//values
 		np::from_data(data,
 					np::dtype::get_builtin<real>(),
-					bp::make_tuple(nodes_in, nodes_out, size[0],size[1],size[2]),
+					bp::make_tuple(nodes_in,nodes_out,size[0],size[1],size[2]),
 					bp::make_tuple(nodes_out*size[0]*size[1]*size[2]*sizeof(real),
 								   size[0]*size[1]*size[2]*sizeof(real),
 								   size[1]*size[2]*sizeof(real),
@@ -140,7 +140,7 @@ bp::tuple filter_string_to_np( std::string const & bin,
 		//momentum values
 		np::from_data(momentum,
 					np::dtype::get_builtin<real>(),
-					bp::make_tuple(nodes_in, nodes_out, size[0],size[1],size[2]),
+					bp::make_tuple(nodes_in,nodes_out,size[0],size[1],size[2]),
 					bp::make_tuple(nodes_out*size[0]*size[1]*size[2]*sizeof(real),
 								   size[0]*size[1]*size[2]*sizeof(real),
 								   size[1]*size[2]*sizeof(real),
@@ -151,6 +151,45 @@ bp::tuple filter_string_to_np( std::string const & bin,
 		);
 };
 
+// one-to-one edges
+bp::tuple one_to_one_filter_string_to_np( std::string const & bin,
+								   		  std::vector<std::size_t> size,
+							  	   		  std::size_t n,
+							  	   		  bp::object const & self )
+{
+	real const * data = reinterpret_cast<real const *>(bin.data());
+
+	//momentum values stored immediately after array values
+	std::size_t gap = bin.size() / (2 * sizeof(real));
+	real const * momentum = data + gap;
+
+	//Debug
+	//print_data_string(data, bin.size() / (2 * sizeof(real)));
+
+	return bp::make_tuple(
+		//values
+		np::from_data(data,
+					np::dtype::get_builtin<real>(),
+					bp::make_tuple(n,size[0],size[1],size[2]),
+					bp::make_tuple(size[0]*size[1]*size[2]*sizeof(real),
+								   size[1]*size[2]*sizeof(real),
+								   size[2]*sizeof(real),
+								   sizeof(real)),
+					self
+					).copy(),
+		//momentum values
+		np::from_data(momentum,
+					np::dtype::get_builtin<real>(),
+					bp::make_tuple(n,size[0],size[1],size[2]),
+					bp::make_tuple(size[0]*size[1]*size[2]*sizeof(real),
+								   size[1]*size[2]*sizeof(real),
+								   size[2]*sizeof(real),
+								   sizeof(real)),
+					self
+					).copy()
+		);
+};
+
 //Finds the number of nodes for all node groups specified within a vector
 // of options. This is useful in importing the convolution filters
 std::map<std::string, std::size_t> extract_layer_sizes( std::vector<options> opts )
@@ -214,9 +253,11 @@ bp::dict edge_opt_to_dict( options const opt,
 {
 	bp::dict res;
 	std::vector<std::size_t> size;
-	std::string input_layer = "";
+	std::string input_layer  = "";
 	std::string output_layer = "";
 
+	auto type = opt.require_as<std::string>("type");
+
 	//First do a conversion of all fields except
 	// biases and filters to gather necessary information
 	// (size of filters, # input and output filters)
@@ -255,13 +296,24 @@ bp::dict edge_opt_to_dict( options const opt,
 			//Debug
 			// res["raw_filters"] = p.second;
 			//std::cout << opt.require_as<std::string>("name") << std::endl;
-			std::size_t nodes_in = layer_sizes[input_layer];
+			std::size_t nodes_in  = layer_sizes[input_layer];
 			std::size_t nodes_out = layer_sizes[output_layer];
 
-			res[p.first] = filter_string_to_np(p.second, size,
-											nodes_in,
-											nodes_out,
-											self);
+			if ( type == "conv" )
+			{
+				res[p.first] = filter_string_to_np( p.second, size,
+													nodes_in, nodes_out, self );
+			}
+			else if ( type == "normalize" || type == "scale" )
+			{
+				res[p.first] = one_to_one_filter_string_to_np( p.second, size,
+															   nodes_in, self );
+			}
+			else
+			{
+				throw std::logic_error(HERE() +
+					"unknown filter edges type: " + type);
+			}
 		}
 	}
 	return res;

diff --git a/python/front_end/zsample.py b/python/front_end/zsample.py
@@ -156,32 +156,9 @@ def get_random_sample(self):
 
         return ( subinputs, subtlbls, submsks )
 
-    def _get_balance_weight(self, arr, msk=None):
-        mask_empty = msk is None or msk.size == 0
-        if mask_empty:
-            values = arr
-        else:
-            values = arr[ np.nonzero(msk) ]
-
-        # number of nonzero elements
-        pn = float( np.count_nonzero(values) )
-        # total number of elements
-        num = float( np.size(values) )
-        # number of zero elements
-        zn = num - pn
-
-        if pn==0 or zn==0:
-            return 1,1
-        else:
-            # weight of positive and zero
-            wp = 0.5 * num / pn
-            wz = 0.5 * num / zn
-
-            return wp, wz
-
     # ZNNv1 uses different normalization
     # This method is only temporary (for reproducing paper results)
-    def _get_balance_weight_v1(self, arr, msk=None):
+    def _get_balance_weight(self, arr, msk=None):
         mask_empty = msk is None or msk.size == 0
         if mask_empty:
             values = arr
@@ -354,9 +331,9 @@ def _prepare_rebalance_weights(self, taffs, tmsks):
 
                 msk = tmsks[k] if tmsks[k].size != 0 else np.zeros((3,0,0,0))
 
-                self.zwps[k], self.zwzs[k] = self._get_balance_weight_v1(aff[2,:,:,:], msk[2,:,:,:])
-                self.ywps[k], self.ywzs[k] = self._get_balance_weight_v1(aff[1,:,:,:], msk[1,:,:,:])
-                self.xwps[k], self.xwzs[k] = self._get_balance_weight_v1(aff[0,:,:,:], msk[0,:,:,:])
+                self.zwps[k], self.zwzs[k] = self._get_balance_weight(aff[2,:,:,:], msk[2,:,:,:])
+                self.ywps[k], self.ywzs[k] = self._get_balance_weight(aff[1,:,:,:], msk[1,:,:,:])
+                self.xwps[k], self.xwzs[k] = self._get_balance_weight(aff[0,:,:,:], msk[0,:,:,:])
 
         return
 
@@ -459,7 +436,7 @@ def _rebalance_bdr(self, sublbl, submsk, wp, wz):
 
         # recompute weight for patch rebalance
         if self.pars['rebalance_mode'] and 'patch' in self.pars['rebalance_mode']:
-            wp, wz = self._get_balance_weight_v1( sublbl,submsk )
+            wp, wz = self._get_balance_weight( sublbl,submsk )
 
         if self.pars['rebalance_mode']:
             weight[0,:,:,:][sublbl[0,:,:,:]> 0] = wp

diff --git a/src/cpp/training_test.cpp b/src/cpp/training_test.cpp
@@ -44,11 +44,33 @@ int main(int argc, char** argv)
 
     size_t tc = std::thread::hardware_concurrency();
 
-    if ( argc == 6 )
+    if ( argc >= 6 )
     {
         tc = atoi(argv[5]);
     }
 
-    parallel_network::network::optimize(nodes, edges, {z,y,x}, tc , 10);
+    size_t n = 10;
 
+    if ( argc >= 7 )
+    {
+        n = atoi(argv[6]);
+    }
+
+    int forward = 0;
+
+    if ( argc == 8 )
+    {
+        forward = atoi(argv[7]);
+    }
+
+    if ( forward )
+    {
+        std::cout << "optimize forward" << std::endl;
+        parallel_network::network::optimize_forward(nodes,edges,{z,y,x},tc,n);
+    }
+    else
+    {
+        std::cout << "optimize training" << std::endl;
+        parallel_network::network::optimize(nodes,edges,{z,y,x},tc,n);
+    }
 }
diff --git a/src/include/cube/cube.hpp b/src/include/cube/cube.hpp
@@ -49,6 +49,7 @@ template <typename T> using ccube_p = std::shared_ptr<ccube<T>>;
 template <typename T> using qube_p  = std::shared_ptr<qube<T>>;
 template <typename T> using cqube_p = std::shared_ptr<cqube<T>>;
 
+template <typename T> using tensor  = std::vector<cube_p<T>>;
 
 template <typename T>
 inline vec3i size( cube<T> const & a )

diff --git a/src/include/cube/cube_io.hpp b/src/include/cube/cube_io.hpp
@@ -43,6 +43,38 @@ inline cube_p<T> read( std::string const & fname, vec3i const & sz )
     return ret;
 }
 
+template<typename F, typename T>
+inline std::vector<cube_p<T>> read_tensor( std::string const & fname,
+                                           vec3i const & sz,
+                                           size_t n )
+{
+    std::vector<cube_p<T>> ret;
+
+    FILE* fvol = fopen(fname.c_str(), "r");
+
+    STRONG_ASSERT(fvol);
+
+    for ( size_t i = 0; i < n; ++i )
+    {
+        auto c = get_cube<T>(sz);
+        F v;
+
+        for ( long_t z = 0; z < sz[0]; ++z )
+            for ( long_t y = 0; y < sz[1]; ++y )
+                for ( long_t x = 0; x < sz[2]; ++x )
+                {
+                    static_cast<void>(fread(&v, sizeof(F), 1, fvol));
+                    (*c)[z][y][x] = static_cast<T>(v);
+                }
+
+        ret.push_back(c);
+    }
+
+    fclose(fvol);
+
+    return ret;
+}
+
 inline bool export_size_info( std::string const & fname,
                               vec3i const & sz, size_t n = 0 )
 {

diff --git a/src/include/cube/cube_operators.hpp b/src/include/cube/cube_operators.hpp
@@ -362,6 +362,21 @@ inline cube_p<T> exp( cube<T> const & c )
     return r;
 }
 
+template<typename T>
+inline cube_p<T> sqrt( cube<T> const & c )
+{
+    auto r = get_cube<T>(size(c));
+    T* dest = r->data();
+    const T* src = c.data();
+
+    for ( size_t i = 0; i < c.num_elements(); ++i )
+    {
+        dest[i] = std::sqrt(src[i]);
+    }
+
+    return r;
+}
+
 template<typename T>
 inline cube_p<T> sparse_explode( cube<T> const & v,
                                  vec3i const & sparse,
@@ -615,8 +630,6 @@ inline cube_p<T> mirror_boundary( cube<T> const & c,
     auto rp = get_cube<T>(vec3i(rx,ry,rz));
     cube<T>& r = *rp;
 
-    r = 0;
-
     // copy original volume
     for ( long_t x = 0; x < vx; ++x )
         for ( long_t y = 0; y < vy; ++y )
@@ -653,4 +666,19 @@ inline cube_p<T> mirror_boundary( cube<T> const & c,
     return rp;
 }
 
+template<typename T>
+inline T mean( cube<T> const & c )
+{
+    ZI_ASSERT(c.num_elements());
+    return sum(c)/c.num_elements();
+}
+
+template<typename T>
+inline T variance( cube<T> const & c )
+{
+    auto m = mean(c);
+    auto squared = c*c;
+    return mean(*squared) - m*m;
+}
+
 }} // namespace znn::v4