wip

WeiqunZhang · Sep 7, 2024 · 3bcea89 · 3bcea89
1 parent 477648e
commit 3bcea89
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 77 deletions.
diff --git a/Src/EB/AMReX_EB_STL_utils.H b/Src/EB/AMReX_EB_STL_utils.H
@@ -65,7 +65,7 @@ public:
  Array<Array4<EB2::Type_t const>,AMREX_SPACEDIM> const& type_arr,
  Array4<Real const> const& lst, Geometry const& geom) ;
 
- void prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts); // public for cuda
+ void prepare (Gpu::PinnedVector<Triangle> a_tri_pts); // public for cuda
 
 private:
 

diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp
@@ -266,7 +266,7 @@ STLtools::read_ascii_stl_file (std::string const& fname, Real scale,
 }
 
 void
-STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
+STLtools::prepare (Gpu::PinnedVector<Triangle> a_tri_pts)
 {
  ParallelDescriptor::Bcast(&m_num_tri, 1);
  if (!ParallelDescriptor::IOProcessor()) {
@@ -278,19 +278,84 @@ STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
  build_bvh(a_tri_pts.data(), a_tri_pts.data()+a_tri_pts.size());
  }
 
+ auto const tri0 = a_tri_pts[0];
+
+#ifdef AMREX_USE_GPU
+ m_tri_pts_d.resize(m_num_tri);
+ Gpu::copyAsync(Gpu::hostToDevice, a_tri_pts.begin(), a_tri_pts.end(),
+ m_tri_pts_d.begin());
+#else
+ m_tri_pts_d = std::move(a_tri_pts);
+#endif
+ Triangle const* tri_pts = m_tri_pts_d.data();
+
+ m_tri_normals_d.resize(m_num_tri); // xxxxx once it all works. no need for this in case of bvh
+ XDim3* tri_norm = m_tri_normals_d.data();
+
+ // Compute normals in case the STL file does not have valid data for normals
+ ParallelFor(m_num_tri, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ Triangle const& tri = tri_pts[i];
+ XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
+ XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
+ XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
+ vec1.z*vec2.x-vec1.x*vec2.z,
+ vec1.x*vec2.y-vec1.y*vec2.x};
+ Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z);
+ tri_norm[i].x = norm.x * tmp;
+ tri_norm[i].y = norm.y * tmp;
+ tri_norm[i].z = norm.z * tmp;
+ });
+
+ ReduceOps<ReduceOpMin,ReduceOpMin,ReduceOpMin,ReduceOpMax,ReduceOpMax,ReduceOpMax> reduce_op;
+ ReduceData<Real,Real,Real,Real,Real,Real> reduce_data(reduce_op);
+ using ReduceTuple = typename decltype(reduce_data)::Type;
+ reduce_op.eval(m_num_tri, reduce_data,
+ [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
+ {
+ return {amrex::min(tri_pts[i].v1.x,
+ tri_pts[i].v2.x,
+ tri_pts[i].v3.x),
+ amrex::min(tri_pts[i].v1.y,
+ tri_pts[i].v2.y,
+ tri_pts[i].v3.y),
+ amrex::min(tri_pts[i].v1.z,
+ tri_pts[i].v2.z,
+ tri_pts[i].v3.z),
+ amrex::max(tri_pts[i].v1.x,
+ tri_pts[i].v2.x,
+ tri_pts[i].v3.x),
+ amrex::max(tri_pts[i].v1.y,
+ tri_pts[i].v2.y,
+ tri_pts[i].v3.y),
+ amrex::max(tri_pts[i].v1.z,
+ tri_pts[i].v2.z,
+ tri_pts[i].v3.z)};
+ });
+ auto const& hv = reduce_data.value(reduce_op);
+ m_ptmin.x = amrex::get<0>(hv);
+ m_ptmin.y = amrex::get<1>(hv);
+ m_ptmin.z = amrex::get<2>(hv);
+ m_ptmax.x = amrex::get<3>(hv);
+ m_ptmax.y = amrex::get<4>(hv);
+ m_ptmax.z = amrex::get<5>(hv);
+
+ if (amrex::Verbose() > 0) {
+ amrex::Print() << " Min: " << m_ptmin << " Max: " << m_ptmax << '\n';
+ }
+
  // Choose a reference point by extending the normal vector of the first
  // triangle until it's slightly outside the bounding box.
  XDim3 cent0; // centroid of the first triangle
  int is_ref_positive;
  {
- Triangle const& tri = a_tri_pts[0];
- cent0 = XDim3{(tri.v1.x + tri.v2.x + tri.v3.x) / 3._rt,
- (tri.v1.y + tri.v2.y + tri.v3.y) / 3._rt,
- (tri.v1.z + tri.v2.z + tri.v3.z) / 3._rt};
+ cent0 = XDim3{(tri0.v1.x + tri0.v2.x + tri0.v3.x) / 3._rt,
+ (tri0.v1.y + tri0.v2.y + tri0.v3.y) / 3._rt,
+ (tri0.v1.z + tri0.v2.z + tri0.v3.z) / 3._rt};
  // We are computing the normal ourselves in case the stl file does
  // not have valid data on normal.
- XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
- XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
+ XDim3 vec1{tri0.v2.x-tri0.v1.x, tri0.v2.y-tri0.v1.y, tri0.v2.z-tri0.v1.z};
+ XDim3 vec2{tri0.v3.x-tri0.v2.x, tri0.v3.y-tri0.v2.y, tri0.v3.z-tri0.v2.z};
  XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
  vec1.z*vec2.x-vec1.x*vec2.z,
  vec1.x*vec2.y-vec1.y*vec2.x};
@@ -369,75 +434,6 @@ STLtools::prepare (Gpu::PinnedVector<Triangle>&& a_tri_pts)
  }
  }
 
-#ifdef AMREX_USE_GPU
- //device vectors
- m_tri_pts_d.resize(m_num_tri);
- Gpu::copyAsync(Gpu::hostToDevice, a_tri_pts.begin(), a_tri_pts.end(),
- m_tri_pts_d.begin());
- Gpu::streamSynchronize();
- a_tri_pts.clear();
-#else
- m_tri_pts_d = std::move(a_tri_pts);
-#endif
- // After this, a_tri_pts is no longer valid.
-
- Triangle const* tri_pts = m_tri_pts_d.data();
-
- m_tri_normals_d.resize(m_num_tri); // xxxxx once it all works. no need for this in case of bvh
- XDim3* tri_norm = m_tri_normals_d.data();
-
- // Compute normals in case the STL file does not have valid data for normals
- ParallelFor(m_num_tri, [=] AMREX_GPU_DEVICE (int i) noexcept
- {
- Triangle const& tri = tri_pts[i];
- XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z};
- XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z};
- XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y,
- vec1.z*vec2.x-vec1.x*vec2.z,
- vec1.x*vec2.y-vec1.y*vec2.x};
- Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z);
- tri_norm[i].x = norm.x * tmp;
- tri_norm[i].y = norm.y * tmp;
- tri_norm[i].z = norm.z * tmp;
- });
-
- ReduceOps<ReduceOpMin,ReduceOpMin,ReduceOpMin,ReduceOpMax,ReduceOpMax,ReduceOpMax> reduce_op;
- ReduceData<Real,Real,Real,Real,Real,Real> reduce_data(reduce_op);
- using ReduceTuple = typename decltype(reduce_data)::Type;
- reduce_op.eval(m_num_tri, reduce_data,
- [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
- {
- return {amrex::min(tri_pts[i].v1.x,
- tri_pts[i].v2.x,
- tri_pts[i].v3.x),
- amrex::min(tri_pts[i].v1.y,
- tri_pts[i].v2.y,
- tri_pts[i].v3.y),
- amrex::min(tri_pts[i].v1.z,
- tri_pts[i].v2.z,
- tri_pts[i].v3.z),
- amrex::max(tri_pts[i].v1.x,
- tri_pts[i].v2.x,
- tri_pts[i].v3.x),
- amrex::max(tri_pts[i].v1.y,
- tri_pts[i].v2.y,
- tri_pts[i].v3.y),
- amrex::max(tri_pts[i].v1.z,
- tri_pts[i].v2.z,
- tri_pts[i].v3.z)};
- });
- auto const& hv = reduce_data.value(reduce_op);
- m_ptmin.x = amrex::get<0>(hv);
- m_ptmin.y = amrex::get<1>(hv);
- m_ptmin.z = amrex::get<2>(hv);
- m_ptmax.x = amrex::get<3>(hv);
- m_ptmax.y = amrex::get<4>(hv);
- m_ptmax.z = amrex::get<5>(hv);
-
- if (amrex::Verbose() > 0) {
- amrex::Print() << " Min: " << m_ptmin << " Max: " << m_ptmax << '\n';
- }
-
  // We now need to figure out if the boundary and the reference is
  // outside or inside the object.
  XDim3 ptref = m_ptref;
@@ -518,6 +514,7 @@ STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom,
  auto const& ma = mf.arrays();
 
  if (m_bvh_optimization) {
+ amrex::Abort("xxxxx");
  char const* bvh_root = nullptr;
  ParallelFor(mf, nghost,
  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept