Skip to content

Commit

Permalink
Fixes for CUDA build.
Browse files Browse the repository at this point in the history
  • Loading branch information
elliottslaughter committed Jul 5, 2024
1 parent 888015a commit 4447765
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 43 deletions.
6 changes: 5 additions & 1 deletion src/CudaHelp.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
#define MIN_CTAS_PER_SM 4
#define MAX_REDUCTION_CTAS 1024

#ifdef __CUDACC__
#ifdef USE_CUDA
#include <cuda_runtime.h>
#include "legion.h"
#ifndef __CUDA_HD__
#define __CUDA_HD__ __host__ __device__
#endif

#ifdef __CUDACC__
template<typename REDUCTION>
__device__ __forceinline__
void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
Expand Down Expand Up @@ -39,6 +42,7 @@ void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
__threadfence_system();
}
}
#endif

#else
#define __CUDA_HD__
Expand Down
10 changes: 5 additions & 5 deletions src/Hydro.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ Hydro::Hydro(
tts = new TTS(inp, this);
qcs = new QCS(inp, this);

const double2 vfixx = double2(1., 0.);
const double2 vfixy = double2(0., 1.);
const double2 vfixx = make_double2(1., 0.);
const double2 vfixy = make_double2(0., 1.);
for (int i = 0; i < bcx.size(); ++i)
bcs.push_back(new HydroBC(mesh, vfixx, bcx[i], true/*xplane*/));
for (int i = 0; i < bcy.size(); ++i)
Expand Down Expand Up @@ -331,7 +331,7 @@ void Hydro::init() {
}
else
{
const double2 zero2(0., 0.);
const double2 zero2 = make_double2(0., 0.);
FillLauncher launcher(lrp, lrp, TaskArgument(&zero2,sizeof(zero2)));
launcher.add_field(FID_PU);
runtime->fill_fields(ctx, launcher);
Expand Down Expand Up @@ -375,7 +375,7 @@ Future Hydro::doCycle(
launchffd.argument = TaskArgument(ffdargs, sizeof(ffdargs));
launchffd.predicate = p_not_done;

double2 ffd2args[] = { double2(0., 0.) };
double2 ffd2args[] = { make_double2(0., 0.) };
IndexFillLauncher launchffd2;
launchffd2.launch_space = ispc;
launchffd2.projection = 0;
Expand Down Expand Up @@ -1802,7 +1802,7 @@ void Hydro::initRadialVelTask(
if (pmag > args->eps)
acc_pu[*itr] = args->vel * px / pmag;
else
acc_pu[*itr] = double2(0., 0.);
acc_pu[*itr] = make_double2(0., 0.);
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/Mesh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ const int SumOp<int>::identity = 0;
template <>
const double SumOp<double>::identity = 0.;
template <>
const double2 SumOp<double2>::identity = double2(0., 0.);
const double2 SumOp<double2>::identity = make_double2(0., 0.);
template <>
const double MinOp<double>::identity = DBL_MAX;
template <>
Expand Down Expand Up @@ -817,7 +817,7 @@ void Mesh::calcCtrsTask(

const IndexSpace& isz = task->regions[1].region.get_index_space();
for (PointIterator itr(runtime, isz); itr(); itr++)
acc_zx[*itr] = double2(0., 0.);
acc_zx[*itr] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
for (PointIterator itr(runtime, iss); itr(); itr++)
Expand Down Expand Up @@ -863,7 +863,7 @@ void Mesh::calcCtrsOMPTask(
const Rect<1> rectz = runtime->get_index_space_domain(isz);
#pragma omp parallel for
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
acc_zx[z] = double2(0., 0.);
acc_zx[z] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
// This will assert if it is not dense
Expand Down Expand Up @@ -1288,7 +1288,7 @@ void Mesh::calcCtrs(

int zfirst = mapsz[sfirst];
int zlast = (slast < nums ? mapsz[slast] : numz);
fill(&zx[zfirst], &zx[zlast], double2(0., 0.));
fill(&zx[zfirst], &zx[zlast], make_double2(0., 0.));

for (int s = sfirst; s < slast; ++s) {
int p1 = mapsp1[s];
Expand Down
4 changes: 2 additions & 2 deletions src/QCS.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void QCS::setCornerDivTask(
// [1] Compute a zone-centered velocity
const IndexSpace& isz = task->regions[1].region.get_index_space();
for (PointIterator itz(runtime, isz); itz(); itz++)
acc_zuc[*itz] = double2(0., 0.);
acc_zuc[*itz] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
for (PointIterator its(runtime, iss); its(); its++)
Expand Down Expand Up @@ -488,7 +488,7 @@ void QCS::setCornerDivOMPTask(
const Rect<1> rectz = runtime->get_index_space_domain(isz);
#pragma omp parallel for
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
acc_zuc[z] = double2(0., 0.);
acc_zuc[z] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
// This will assert if it is not dense
Expand Down
66 changes: 35 additions & 31 deletions src/Vec2.hh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
// This struct is defined with all functions inline,
// to give the compiler maximum opportunity to optimize.

#ifndef __CUDACC__
#ifndef USE_CUDA
struct double2
{
typedef double value_type;
Expand All @@ -37,38 +37,10 @@ struct double2
return(*this);
}

inline double2& operator+=(const double2& v2)
{
x += v2.x;
y += v2.y;
return(*this);
}

inline double2& operator-=(const double2& v2)
{
x -= v2.x;
y -= v2.y;
return(*this);
}

inline double2& operator*=(const double& r)
{
x *= r;
y *= r;
return(*this);
}

inline double2& operator/=(const double& r)
{
x /= r;
y /= r;
return(*this);
}

}; // double2
#endif // __CUDACC__
#endif // USE_CUDA

#ifndef __CUDACC__
#ifndef USE_CUDA
// Already has a decleration in cuda
inline double2 make_double2(double x_, double y_) {
return(double2(x_, y_));
Expand Down Expand Up @@ -119,20 +91,44 @@ inline double2 operator+(const double2& v1, const double2& v2)
return make_double2(v1.x + v2.x, v1.y + v2.y);
}

__CUDA_HD__
inline double2& operator+=(double2& v1, const double2& v2)
{
v1.x += v2.x;
v1.y += v2.y;
return v1;
}

// subtract
__CUDA_HD__
inline double2 operator-(const double2& v1, const double2& v2)
{
return make_double2(v1.x - v2.x, v1.y - v2.y);
}

__CUDA_HD__
inline double2& operator-=(double2& v1, const double2& v2)
{
v1.x -= v2.x;
v1.y -= v2.y;
return v1;
}

// multiply vector by scalar
__CUDA_HD__
inline double2 operator*(const double2& v, const double& r)
{
return make_double2(v.x * r, v.y * r);
}

__CUDA_HD__
inline double2& operator*=(double2& v, const double& r)
{
v.x *= r;
v.y *= r;
return v;
}

// multiply scalar by vector
__CUDA_HD__
inline double2 operator*(const double& r, const double2& v)
Expand All @@ -148,6 +144,14 @@ inline double2 operator/(const double2& v, const double& r)
return make_double2(v.x * rinv, v.y * rinv);
}

__CUDA_HD__
inline double2& operator/=(double2& v, const double& r)
{
double rinv = (double) 1. / r;
v.x *= rinv;
v.y *= rinv;
return v;
}

// other vector operations:

Expand Down

0 comments on commit 4447765

Please sign in to comment.