Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Sep 7, 2024
1 parent 477648e commit 3bcea89
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 77 deletions.
2 changes: 1 addition & 1 deletion Src/EB/AMReX_EB_STL_utils.H
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public:
Array<Array4<EB2::Type_t const>,AMREX_SPACEDIM> const& type_arr,
Array4<Real const> const& lst, Geometry const& geom) ;

void prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts); // public for cuda
void prepare (Gpu::PinnedVector<Triangle> a_tri_pts); // public for cuda

private:

Expand Down
149 changes: 73 additions & 76 deletions Src/EB/AMReX_EB_STL_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ STLtools::read_ascii_stl_file (std::string const& fname, Real scale,
}

void
STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
STLtools::prepare (Gpu::PinnedVector<Triangle> a_tri_pts)
{
ParallelDescriptor::Bcast(&m_num_tri, 1);
if (!ParallelDescriptor::IOProcessor()) {
Expand All @@ -278,19 +278,84 @@ STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
build_bvh(a_tri_pts.data(), a_tri_pts.data()+a_tri_pts.size());
}

auto const tri0 = a_tri_pts[0];

#ifdef AMREX_USE_GPU
m_tri_pts_d.resize(m_num_tri);
Gpu::copyAsync(Gpu::hostToDevice, a_tri_pts.begin(), a_tri_pts.end(),
m_tri_pts_d.begin());
#else
m_tri_pts_d = std::move(a_tri_pts);
#endif
Triangle const* tri_pts = m_tri_pts_d.data();

m_tri_normals_d.resize(m_num_tri); // xxxxx once it all works. no need for this in case of bvh
XDim3* tri_norm = m_tri_normals_d.data();

// Compute normals in case the STL file does not have valid data for normals
ParallelFor(m_num_tri, [=] AMREX_GPU_DEVICE (int i) noexcept
{
Triangle const& tri = tri_pts[i];
XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
vec1.z*vec2.x-vec1.x*vec2.z,
vec1.x*vec2.y-vec1.y*vec2.x};
Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z);
tri_norm[i].x = norm.x * tmp;
tri_norm[i].y = norm.y * tmp;
tri_norm[i].z = norm.z * tmp;
});

ReduceOps<ReduceOpMin,ReduceOpMin,ReduceOpMin,ReduceOpMax,ReduceOpMax,ReduceOpMax> reduce_op;
ReduceData<Real,Real,Real,Real,Real,Real> reduce_data(reduce_op);
using ReduceTuple = typename decltype(reduce_data)::Type;
reduce_op.eval(m_num_tri, reduce_data,
[=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
{
return {amrex::min(tri_pts[i].v1.x,
tri_pts[i].v2.x,
tri_pts[i].v3.x),
amrex::min(tri_pts[i].v1.y,
tri_pts[i].v2.y,
tri_pts[i].v3.y),
amrex::min(tri_pts[i].v1.z,
tri_pts[i].v2.z,
tri_pts[i].v3.z),
amrex::max(tri_pts[i].v1.x,
tri_pts[i].v2.x,
tri_pts[i].v3.x),
amrex::max(tri_pts[i].v1.y,
tri_pts[i].v2.y,
tri_pts[i].v3.y),
amrex::max(tri_pts[i].v1.z,
tri_pts[i].v2.z,
tri_pts[i].v3.z)};
});
auto const& hv = reduce_data.value(reduce_op);
m_ptmin.x = amrex::get<0>(hv);
m_ptmin.y = amrex::get<1>(hv);
m_ptmin.z = amrex::get<2>(hv);
m_ptmax.x = amrex::get<3>(hv);
m_ptmax.y = amrex::get<4>(hv);
m_ptmax.z = amrex::get<5>(hv);

if (amrex::Verbose() > 0) {
amrex::Print() << " Min: " << m_ptmin << " Max: " << m_ptmax << '\n';
}

// Choose a reference point by extending the normal vector of the first
// triangle until it's slightly outside the bounding box.
XDim3 cent0; // centroid of the first triangle
int is_ref_positive;
{
Triangle const& tri = a_tri_pts[0];
cent0 = XDim3{(tri.v1.x + tri.v2.x + tri.v3.x) / 3._rt,
(tri.v1.y + tri.v2.y + tri.v3.y) / 3._rt,
(tri.v1.z + tri.v2.z + tri.v3.z) / 3._rt};
cent0 = XDim3{(tri0.v1.x + tri0.v2.x + tri0.v3.x) / 3._rt,
(tri0.v1.y + tri0.v2.y + tri0.v3.y) / 3._rt,
(tri0.v1.z + tri0.v2.z + tri0.v3.z) / 3._rt};
// We are computing the normal ourselves in case the stl file does
// not have valid data on normal.
XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
XDim3 vec1{tri0.v2.x-tri0.v1.x, tri0.v2.y-tri0.v1.y, tri0.v2.z-tri0.v1.z};
XDim3 vec2{tri0.v3.x-tri0.v2.x, tri0.v3.y-tri0.v2.y, tri0.v3.z-tri0.v2.z};
XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
vec1.z*vec2.x-vec1.x*vec2.z,
vec1.x*vec2.y-vec1.y*vec2.x};
Expand Down Expand Up @@ -369,75 +434,6 @@ STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
}
}

#ifdef AMREX_USE_GPU
//device vectors
m_tri_pts_d.resize(m_num_tri);
Gpu::copyAsync(Gpu::hostToDevice, a_tri_pts.begin(), a_tri_pts.end(),
m_tri_pts_d.begin());
Gpu::streamSynchronize();
a_tri_pts.clear();
#else
m_tri_pts_d = std::move(a_tri_pts);
#endif
// After this, a_tri_pts is no longer valid.

Triangle const* tri_pts = m_tri_pts_d.data();

m_tri_normals_d.resize(m_num_tri); // xxxxx once it all works. no need for this in case of bvh
XDim3* tri_norm = m_tri_normals_d.data();

// Compute normals in case the STL file does not have valid data for normals
ParallelFor(m_num_tri, [=] AMREX_GPU_DEVICE (int i) noexcept
{
Triangle const& tri = tri_pts[i];
XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
vec1.z*vec2.x-vec1.x*vec2.z,
vec1.x*vec2.y-vec1.y*vec2.x};
Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z);
tri_norm[i].x = norm.x * tmp;
tri_norm[i].y = norm.y * tmp;
tri_norm[i].z = norm.z * tmp;
});

ReduceOps<ReduceOpMin,ReduceOpMin,ReduceOpMin,ReduceOpMax,ReduceOpMax,ReduceOpMax> reduce_op;
ReduceData<Real,Real,Real,Real,Real,Real> reduce_data(reduce_op);
using ReduceTuple = typename decltype(reduce_data)::Type;
reduce_op.eval(m_num_tri, reduce_data,
[=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
{
return {amrex::min(tri_pts[i].v1.x,
tri_pts[i].v2.x,
tri_pts[i].v3.x),
amrex::min(tri_pts[i].v1.y,
tri_pts[i].v2.y,
tri_pts[i].v3.y),
amrex::min(tri_pts[i].v1.z,
tri_pts[i].v2.z,
tri_pts[i].v3.z),
amrex::max(tri_pts[i].v1.x,
tri_pts[i].v2.x,
tri_pts[i].v3.x),
amrex::max(tri_pts[i].v1.y,
tri_pts[i].v2.y,
tri_pts[i].v3.y),
amrex::max(tri_pts[i].v1.z,
tri_pts[i].v2.z,
tri_pts[i].v3.z)};
});
auto const& hv = reduce_data.value(reduce_op);
m_ptmin.x = amrex::get<0>(hv);
m_ptmin.y = amrex::get<1>(hv);
m_ptmin.z = amrex::get<2>(hv);
m_ptmax.x = amrex::get<3>(hv);
m_ptmax.y = amrex::get<4>(hv);
m_ptmax.z = amrex::get<5>(hv);

if (amrex::Verbose() > 0) {
amrex::Print() << " Min: " << m_ptmin << " Max: " << m_ptmax << '\n';
}

// We now need to figure out if the boundary and the reference is
// outside or inside the object.
XDim3 ptref = m_ptref;
Expand Down Expand Up @@ -518,6 +514,7 @@ STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom,
auto const& ma = mf.arrays();

if (m_bvh_optimization) {
amrex::Abort("xxxxx");
char const* bvh_root = nullptr;
ParallelFor(mf, nghost,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
Expand Down

0 comments on commit 3bcea89

Please sign in to comment.