-
Notifications
You must be signed in to change notification settings - Fork 11
/
mp6.cc
103 lines (74 loc) · 3.35 KB
/
mp6.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <wb.h>
// Check ec2-174-129-21-232.compute-1.amazonaws.com:8080/mp/6 for more information
#define wbCheck(stmt) do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
wbLog(ERROR, "Failed to run stmt ", #stmt); \
return -1; \
} \
} while(0)
#define Mask_width 5
#define Mask_radius Mask_width/2
//@@ INSERT CODE HERE
int main(int argc, char* argv[]) {
wbArg_t arg;
int maskRows;
int maskColumns;
int imageChannels;
int imageWidth;
int imageHeight;
const char * inputImageFile;
const char * inputMaskFile;
wbImage_t inputImage;
wbImage_t outputImage;
float * hostInputImageData;
float * hostOutputImageData;
float * hostMaskData;
float * deviceInputImageData;
float * deviceOutputImageData;
float * deviceMaskData;
arg = wbArg_read(argc, argv); /* parse the input arguments */
inputImageFile = wbArg_getInputFile(arg, 0);
inputMaskFile = wbArg_getInputFile(arg, 1);
inputImage = wbImport(inputImageFile);
hostMaskData = (float *) wbImport(inputMaskFile, &maskRows, &maskColumns);
assert(maskRows == 5); /* mask height is fixed to 5 in this mp */
assert(maskColumns == 5); /* mask width is fixed to 5 in this mp */
imageWidth = wbImage_getWidth(inputImage);
imageHeight = wbImage_getHeight(inputImage);
imageChannels = wbImage_getChannels(inputImage);
outputImage = wbImage_new(imageWidth, imageHeight, imageChannels);
hostInputImageData = wbImage_getData(inputImage);
hostOutputImageData = wbImage_getData(outputImage);
wbTime_start(GPU, "Doing GPU Computation (memory + compute)");
wbTime_start(GPU, "Doing GPU memory allocation");
cudaMalloc((void **) &deviceInputImageData, imageWidth * imageHeight * imageChannels * sizeof(float));
cudaMalloc((void **) &deviceOutputImageData, imageWidth * imageHeight * imageChannels * sizeof(float));
cudaMalloc((void **) &deviceMaskData, maskRows * maskColumns * sizeof(float));
wbTime_stop(GPU, "Doing GPU memory allocation");
wbTime_start(Copy, "Copying data to the GPU");
cudaMemcpy(deviceInputImageData,
hostInputImageData,
imageWidth * imageHeight * imageChannels * sizeof(float),
cudaMemcpyHostToDevice);
cudaMemcpy(deviceMaskData,
hostMaskData,
maskRows * maskColumns * sizeof(float),
cudaMemcpyHostToDevice);
wbTime_stop(Copy, "Copying data to the GPU");
wbTime_start(Compute, "Doing the computation on the GPU");
//@@ INSERT CODE HERE
wbTime_stop(Compute, "Doing the computation on the GPU");
wbTime_start(Copy, "Copying data from the GPU");
cudaMemcpy(hostOutputImageData, deviceOutputImageData, imageWidth * imageHeight * imageChannels * sizeof(float), cudaMemcpyDeviceToHost);
wbTime_stop(Copy, "Copying data from the GPU");
wbTime_stop(GPU, "Doing GPU Computation (memory + compute)");
wbSolution(arg, outputImage);
cudaFree(deviceInputImageData);
cudaFree(deviceOutputImageData);
cudaFree(deviceMaskData);
free(hostMaskData);
wbImage_delete(outputImage);
wbImage_delete(inputImage);
return 0;
}