CUDA interface

gpufit · Mar 23, 2018 · 433ce73 · 433ce73
1 parent cdcff3e
commit 433ce73
Show file tree

Hide file tree

Showing 12 changed files with 527 additions and 30 deletions.
diff --git a/Gpufit/Gpufit.def b/Gpufit/Gpufit.def
@@ -4,4 +4,5 @@ EXPORTS
     gpufit_get_last_error @2
     gpufit_get_cuda_version @3
     gpufit_cuda_available @4
-    gpufit_portable_interface @5
+    gpufit_portable_interface @5
+    gpufit_cuda_interface @6
diff --git a/Gpufit/constants.h b/Gpufit/constants.h
@@ -23,4 +23,6 @@ enum FitState { CONVERGED = 0, MAX_ITERATION = 1, SINGULAR_HESSIAN = 2, NEG_CURV
 // return state
 enum ReturnState { OK = 0, ERROR = -1 };
 
+enum DataLocation { HOST = 0, DEVICE = 1 };
+
 #endif
diff --git a/Gpufit/examples/CMakeLists.txt b/Gpufit/examples/CMakeLists.txt
@@ -7,8 +7,18 @@ function( add_example module name )
 	set_property( TARGET ${name} PROPERTY FOLDER GpufitExamples )
 endfunction()
 
+function( add_cuda_example module name )
+	cuda_add_executable( ${name} ${name}.cu )
+	target_link_libraries( ${name} ${module} )
+	set_property( TARGET ${name}
+		PROPERTY RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}" )
+	set_property( TARGET ${name} PROPERTY FOLDER GpufitExamples )
+endfunction()
+
 # Examples
 
 add_example( Gpufit Simple_Example )
 add_example( Gpufit Linear_Regression_Example )
 add_example( Gpufit Gauss_Fit_2D_Example )
+
+add_cuda_example( Gpufit CUDA_Interface_Example )
diff --git a/Gpufit/examples/CUDA_Interface_Example.cu b/Gpufit/examples/CUDA_Interface_Example.cu
@@ -0,0 +1,321 @@
+#include "../gpufit.h"
+
+#include <vector>
+#include <random>
+#include <iostream>
+#include <chrono>
+#include <numeric>
+#include <math.h>
+#include <cuda_runtime.h>
+#include <stdexcept>
+
+#define CUDA_CHECK_STATUS( cuda_function_call ) \
+    if (cudaError_t const status = cuda_function_call) \
+    { \
+        throw std::runtime_error( cudaGetErrorString( status ) ) ; \
+    }
+
+void generate_gauss_2d(
+    std::vector<float> const & x_coordinates,
+    std::vector<float> const & y_coordinates,
+    std::vector<float> const & gauss_params, 
+    std::vector<float> & output_values)
+{
+	// Generates a Gaussian 2D function at a set of X and Y coordinates.  The Gaussian is defined by
+    // an array of five parameters.
+
+	// x_coordinates: Vector of X coordinates.
+	// y_coordinates: Vector of Y coordinates.
+	// gauss_params:  Vector of function parameters.
+	// output_values: Output vector containing the values of the Gaussian function at the
+	//                corresponding X, Y coordinates.
+
+	// gauss_params[0]: Amplitude
+	// gauss_params[1]: Center X position
+	// guass_params[2]: Center Y position
+	// gauss_params[3]: Gaussian width (standard deviation)
+	// gauss_params[4]: Baseline offset
+
+	// This code assumes that x_coordinates.size == y_coordinates.size == output_values.size
+
+	for (size_t i = 0; i < x_coordinates.size(); i++)
+	{
+
+		float arg = -(   (x_coordinates[i] - gauss_params[1]) * (x_coordinates[i] - gauss_params[1]) 
+		               + (y_coordinates[i] - gauss_params[2]) * (y_coordinates[i] - gauss_params[2])   ) 
+					 / (2.f * gauss_params[3] * gauss_params[3]);
+
+		output_values[i] = gauss_params[0] * exp(arg) + gauss_params[4];
+
+	}
+}
+
+void gauss_fit_2d_example()
+{
+	/*
+        This example generates test data in form of 10000 two dimensional Gaussian
+        peaks with the size of 5x5 data points per peak. It is noised by Poisson
+        distributed noise. The initial guesses were randomized, within a specified
+        range of the true value. The GAUSS_2D model is fitted to the test data sets
+        using the MLE estimator.
+
+        The console output shows
+         - the execution time,
+         - the ratio of converged fits including ratios of not converged fits for 
+           different reasons,
+         - the values of the true parameters and the mean values of the fitted
+           parameters including their standard deviation,
+         - the mean chi square value
+         - and the mean number of iterations needed.
+
+		True parameters and noise and number of fits is the same as for the Matlab/Python 2D Gaussian examples.
+	*/
+
+
+	// number of fits, fit points and parameters
+	size_t const n_fits = 10;
+	size_t const size_x = 50;
+	size_t const n_points_per_fit = size_x * size_x;
+	size_t const n_model_parameters = 5;
+
+	// true parameters (amplitude, center x position, center y position, width, offset)
+	std::vector< float > true_parameters{ 10.f, 14.5f, 14.5f, 3.f, 10.f}; 
+
+	std::cout << "generate example data" << std::endl;
+
+	// initialize random number generator
+	std::mt19937 rng;
+	rng.seed(0);
+	std::uniform_real_distribution< float> uniform_dist(0, 1);
+
+	// initial parameters (randomized)
+	std::vector< float > initial_parameters(n_fits * n_model_parameters);
+	for (size_t i = 0; i < n_fits; i++)
+	{
+		for (size_t j = 0; j < n_model_parameters; j++)
+		{
+			if (j == 1 || j == 2)
+			{
+				initial_parameters[i * n_model_parameters + j]
+                    = true_parameters[j] + true_parameters[3] 
+                    * (-0.2f + 0.4f * uniform_dist(rng));
+			}
+			else
+			{
+				initial_parameters[i * n_model_parameters + j]
+                    = true_parameters[j] * (0.8f + 0.4f * uniform_dist(rng));
+			}
+		}
+	}
+
+	// generate x and y values
+	std::vector< float > x(n_points_per_fit);
+	std::vector< float > y(n_points_per_fit);
+	for (size_t i = 0; i < size_x; i++)
+	{
+		for (size_t j = 0; j < size_x; j++) {
+			x[i * size_x + j] = static_cast<float>(j);
+			y[i * size_x + j] = static_cast<float>(i);
+		}
+	}
+
+	// generate test data with Poisson noise
+	std::vector< float > temp(n_points_per_fit);
+	generate_gauss_2d(x, y, true_parameters, temp);
+
+	std::vector< float > data(n_fits * n_points_per_fit);
+	for (size_t i = 0; i < n_fits; i++)
+	{
+		for (size_t j = 0; j < n_points_per_fit; j++)
+		{
+			std::poisson_distribution< int > poisson_dist(temp[j]);
+			data[i * n_points_per_fit + j] = static_cast<float>(poisson_dist(rng));
+		}
+	}
+
+	// tolerance
+	float const tolerance = 0.001f;
+
+	// maximum number of iterations
+	int const max_number_iterations = 20;
+
+	// estimator ID
+	int const estimator_id = MLE;
+
+	// model ID
+	int const model_id = GAUSS_2D;
+
+	// parameters to fit (all of them)
+	std::vector< int > parameters_to_fit(n_model_parameters, 1);
+
+	// output parameters
+	std::vector< float > output_parameters(n_fits * n_model_parameters);
+	std::vector< int > output_states(n_fits);
+	std::vector< float > output_chi_square(n_fits);
+	std::vector< int > output_number_iterations(n_fits);
+
+    float * gpu_data;
+    CUDA_CHECK_STATUS(cudaMalloc(
+        &gpu_data,
+        data.size() * sizeof(float)));
+    CUDA_CHECK_STATUS(cudaMemcpy(
+        gpu_data,
+        data.data(),
+        data.size() * sizeof(float),
+        cudaMemcpyHostToDevice));
+
+    float * gpu_initial_parameters;
+    CUDA_CHECK_STATUS(cudaMalloc(
+        &gpu_initial_parameters,
+        initial_parameters.size() * sizeof(float)));
+    CUDA_CHECK_STATUS(cudaMemcpy(
+        gpu_initial_parameters,
+        initial_parameters.data(),
+        initial_parameters.size() * sizeof(float),
+        cudaMemcpyHostToDevice));
+
+    float * gpu_weights;
+    CUDA_CHECK_STATUS(cudaMalloc(
+        &gpu_weights,
+        data.size() * sizeof(float)));
+    cudaMemset(gpu_weights, 1, data.size() * sizeof(float));
+
+    char * gpu_user_info;
+    CUDA_CHECK_STATUS(cudaMalloc(
+        &gpu_user_info,
+        data.size() * sizeof(float)));
+
+	// call to gpufit (C interface)
+	std::chrono::high_resolution_clock::time_point time_0 = std::chrono::high_resolution_clock::now();
+	int const status = gpufit_cuda_interface
+        (
+            n_fits,
+            n_points_per_fit,
+            gpu_data,
+            gpu_weights,
+            model_id,
+            gpu_initial_parameters,
+            tolerance,
+            max_number_iterations,
+            parameters_to_fit.data(),
+            estimator_id,
+            data.size() * sizeof(float),
+            gpu_user_info,
+            output_parameters.data(),
+            output_states.data(),
+            output_chi_square.data(),
+            output_number_iterations.data()
+        );
+	std::chrono::high_resolution_clock::time_point time_1 = std::chrono::high_resolution_clock::now();
+
+    CUDA_CHECK_STATUS(cudaFree(gpu_data));
+    CUDA_CHECK_STATUS(cudaFree(gpu_initial_parameters));
+
+	// check status
+	if (status != ReturnState::OK)
+	{
+		throw std::runtime_error(gpufit_get_last_error());
+	}
+
+	// print execution time
+	std::cout << "execution time "
+        << std::chrono::duration_cast<std::chrono::milliseconds>(time_1 - time_0).count() << " ms" << std::endl;
+
+	// get fit states
+	std::vector< int > output_states_histogram(5, 0);
+	for (std::vector< int >::iterator it = output_states.begin(); it != output_states.end(); ++it)
+	{
+		output_states_histogram[*it]++;
+	}
+
+	std::cout << "ratio converged              " << (float)output_states_histogram[0] / n_fits << "\n";
+	std::cout << "ratio max iteration exceeded " << (float)output_states_histogram[1] / n_fits << "\n";
+	std::cout << "ratio singular hessian       " << (float)output_states_histogram[2] / n_fits << "\n";
+	std::cout << "ratio neg curvature MLE      " << (float)output_states_histogram[3] / n_fits << "\n";
+	std::cout << "ratio gpu not read           " << (float)output_states_histogram[4] / n_fits << "\n";
+
+	// compute mean of fitted parameters for converged fits
+	std::vector< float > output_parameters_mean(n_model_parameters, 0);
+	for (size_t i = 0; i != n_fits; i++)
+	{
+		if (output_states[i] == FitState::CONVERGED)
+		{
+			for (size_t j = 0; j < n_model_parameters; j++)
+			{
+				output_parameters_mean[j] += output_parameters[i * n_model_parameters + j];
+			}
+		}
+	}
+	// normalize
+	for (size_t j = 0; j < n_model_parameters; j++)
+	{
+		output_parameters_mean[j] /= output_states_histogram[0];
+	}
+
+	// compute std of fitted parameters for converged fits
+	std::vector< float > output_parameters_std(n_model_parameters, 0);
+	for (size_t i = 0; i != n_fits; i++)
+	{
+		if (output_states[i] == FitState::CONVERGED)
+		{
+			for (size_t j = 0; j < n_model_parameters; j++)
+			{
+				output_parameters_std[j]
+                    += (output_parameters[i * n_model_parameters + j] - output_parameters_mean[j])
+                    *  (output_parameters[i * n_model_parameters + j] - output_parameters_mean[j]);
+			}
+		}
+	}
+	// normalize and take square root
+	for (size_t j = 0; j < n_model_parameters; j++)
+	{
+		output_parameters_std[j] = sqrt(output_parameters_std[j] / output_states_histogram[0]);
+	}
+
+	// print true value, fitted mean and std for every parameter
+	for (size_t j = 0; j < n_model_parameters; j++)
+	{
+		std::cout
+            << "parameter "     << j
+            << " true "         << true_parameters[j]
+            << " fitted mean "  << output_parameters_mean[j]
+            << " std "          << output_parameters_std[j] << std::endl;
+	}
+
+	// compute mean chi-square for those converged
+	float  output_chi_square_mean = 0;
+	for (size_t i = 0; i != n_fits; i++)
+	{
+		if (output_states[i] == FitState::CONVERGED)
+		{
+			output_chi_square_mean += output_chi_square[i];
+		}
+	}
+	output_chi_square_mean /= static_cast<float>(output_states_histogram[0]);
+	std::cout << "mean chi square " << output_chi_square_mean << std::endl;
+
+	// compute mean number of iterations for those converged
+	float  output_number_iterations_mean = 0;
+	for (size_t i = 0; i != n_fits; i++)
+	{
+		if (output_states[i] == FitState::CONVERGED)
+		{
+			output_number_iterations_mean += static_cast<float>(output_number_iterations[i]);
+		}
+	}
+	// normalize
+	output_number_iterations_mean /= static_cast<float>(output_states_histogram[0]);
+	std::cout << "mean number of iterations " << output_number_iterations_mean << std::endl;
+
+}
+
+int main(int argc, char *argv[])
+{
+	gauss_fit_2d_example();
+
+    std::cout << std::endl << "Example completed!" << std::endl;
+    std::cout << "Press ENTER to exit" << std::endl;
+    std::getchar();
+
+	return 0;
+}