Skip to content

Commit

Permalink
Merge pull request #183 from AdaptiveParticles/patch_182
Browse files Browse the repository at this point in the history
fix #182
  • Loading branch information
joeljonsson authored Nov 10, 2023
2 parents 2e95a62 + 1b3f29f commit c5e4310
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 16 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ option(APR_BUILD_STATIC_LIB "Builds static library" ON)
option(APR_BUILD_EXAMPLES "Build APR examples" OFF)
option(APR_USE_LIBTIFF "Use LibTIFF" ON)
option(APR_TESTS "Build APR tests" OFF)
option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" ON)
option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" OFF)
option(APR_PREFER_EXTERNAL_BLOSC "When found, use the installed BLOSC libs instead of included sources" OFF)
option(APR_USE_CUDA "should APR use CUDA? (experimental - under development)" OFF)
option(APR_USE_OPENMP "should APR use OpenMP?" ON)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ cmake -DAPR_USE_OPENMP=OFF ..
| APR_TESTS | Build unit tests | OFF |
| APR_BENCHMARK | Build executable performance benchmarks | OFF |
| APR_USE_LIBTIFF | Enable LibTIFF (Required for tests and examples) | ON |
| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | ON |
| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | ON |
| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | OFF |
| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | OFF |
| APR_USE_OPENMP | Enable multithreading via OpenMP | ON |
| APR_USE_CUDA | Enable CUDA functionality (under development) | OFF |
| APR_DENOISE | Enable denoising code (requires Eigen3) | OFF |
Expand Down
4 changes: 0 additions & 4 deletions src/data_structures/APR/APR.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ class APR {
* @param with_tree include the tree access
*/
void init_cuda(bool with_tree=true) {
gpuAccess.genInfo = &aprInfo;
gpuTreeAccess.genInfo = &treeInfo;
linearAccess.genInfo = &aprInfo;
linearAccessTree.genInfo = &treeInfo;
auto apr_helper = gpuAPRHelper();
if(with_tree) {
auto tree_helper = gpuTreeHelper();
Expand Down
8 changes: 5 additions & 3 deletions src/data_structures/APR/access/GPUAccess.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class GPUAccess {
void init_level_xz_vec(VectorData<uint64_t>& level_xz_vec);

GenInfo* genInfo;
uint64_t total_number_particles() { return genInfo->total_number_particles; }
uint64_t total_number_particles() const { return genInfo->total_number_particles; }

int level_max() const { return genInfo->l_max; }
int level_min() const { return genInfo->l_min; }
Expand Down Expand Up @@ -67,6 +67,7 @@ class GPUAccessHelper {
gpuAccess->init_y_vec(linearAccess->y_vec);
gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec);
gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec);
gpuAccess->genInfo = linearAccess->genInfo;
gpuAccess->copy2Device();
gpuAccess->initialized = true;
}
Expand All @@ -77,6 +78,7 @@ class GPUAccessHelper {
gpuAccess->init_y_vec(linearAccess->y_vec);
gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec);
gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec);
gpuAccess->genInfo = linearAccess->genInfo;
gpuAccess->copy2Device(total_number_particles(tree_access.level_max()), tree_access.gpuAccess);
gpuAccess->initialized = true;
}
Expand All @@ -86,9 +88,9 @@ class GPUAccessHelper {
gpuAccess->copy2Host();
}

uint64_t total_number_particles() { return gpuAccess->genInfo->total_number_particles; }
uint64_t total_number_particles() const { return gpuAccess->total_number_particles(); }

uint64_t total_number_particles(const int level) {
uint64_t total_number_particles(const int level) const {
uint64_t index = linearAccess->level_xz_vec[level] + linearAccess->x_num(level) - 1 + (linearAccess->z_num(level)-1)*linearAccess->x_num(level);
return linearAccess->xz_end_vec[index];
}
Expand Down
6 changes: 2 additions & 4 deletions src/numerics/APRDownsampleGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,7 @@ __global__ void _fill_ne_rows_tree_cuda(const uint64_t* __restrict__ level_xz_ve
template<int blockSize_z, int blockSize_x>
void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData<int>& ne_count, ScopedCudaMemHandler<int*, JUST_ALLOC>& ne_rows_gpu) {

ne_count.resize(tree_access.level_max() + 3);
ne_count[0] = 0;
ne_count.resize(tree_access.level_max() + 3, 0);

int z_blocks_max = (tree_access.z_num(tree_access.level_max()) + blockSize_z - 1) / blockSize_z;
int num_levels = tree_access.level_max() - tree_access.level_min() + 1;
Expand Down Expand Up @@ -979,8 +978,7 @@ void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData<int>& ne


void compute_ne_rows_tree(GPUAccessHelper& tree_access, VectorData<int>& ne_counter, VectorData<int>& ne_rows) {
ne_counter.resize(tree_access.level_max() + 3);
ne_counter[0] = 0;
ne_counter.resize(tree_access.level_max() + 3, 0);

int z = 0;
int x = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/numerics/miscCuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ __global__ void fill_ne_rows_cuda(const uint64_t* level_xz_vec,
template<int blockSize_z, int blockSize_x>
void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData<int>& ne_count, ScopedCudaMemHandler<int*, JUST_ALLOC>& ne_rows_gpu, int blockSize) {

ne_count.resize(access.level_max()+2);
ne_count.resize(access.level_max()+2, 0);

int stride = blockSize_z * blockSize;

Expand Down Expand Up @@ -360,7 +360,7 @@ inline void add_nonempty(GPUAccessHelper& access, uint64_t& counter, VectorData<


void compute_ne_rows(GPUAccessHelper& access, VectorData<int>& ne_counter, VectorData<int>& ne_rows, int block_size) {
ne_counter.resize(access.level_max()+2);
ne_counter.resize(access.level_max()+2, 0);

int z = 0;
int x = 0;
Expand Down

0 comments on commit c5e4310

Please sign in to comment.