diff --git a/assets/zhxxgraphs/arts/wxl_graphs/testppw.zsg b/assets/zhxxgraphs/arts/wxl_graphs/testppw.zsg index 8d39c0a04e..f783c1c674 100644 --- a/assets/zhxxgraphs/arts/wxl_graphs/testppw.zsg +++ b/assets/zhxxgraphs/arts/wxl_graphs/testppw.zsg @@ -28,7 +28,7 @@ ] }, "params": { - "value": 1024 + "value": 32768 }, "uipos": [ 554.8903038891797, @@ -133,7 +133,7 @@ "size": [ null, null, - 1024 + 32768 ], "SRC": [ null, @@ -235,12 +235,11 @@ }, "params": {}, "uipos": [ - 919.2151476391793, - -687.1415889533259 + 896.6418577368992, + -715.985237161795 ], "options": [ - "ONCE", - "collapsed" + "ONCE" ] }, "bd86f34a-x": { @@ -271,7 +270,7 @@ ] }, "params": { - "value": "@pos += @vel * $dt\n@clr = abs(@vel)\n@rad = length(@vel) * 5" + "value": "@pos += @vel * $dt\n@clr = abs(@vel)\n@rad = length(@vel) * 0.5" }, "uipos": [ 3137.990469179422, @@ -449,10 +448,10 @@ } }, "view_rect": { - "x": 2807.079957462128, - "y": -689.9524838598911, - "width": 1478.3999999999992, - "height": 315.99999999999983 + "x": 1168.7828739460383, + "y": -976.7866544773483, + "width": 2887.499999999998, + "height": 617.1874999999997 } } }, diff --git a/projects/gmpm/Utils.hpp b/projects/gmpm/Utils.hpp index db6f41ae95..054a04bce0 100644 --- a/projects/gmpm/Utils.hpp +++ b/projects/gmpm/Utils.hpp @@ -5,6 +5,8 @@ #include #include +#include "zensim/container/HashTable.hpp" + // only use this macro within a zeno::INode::apply() #define RETRIEVE_OBJECT_PTRS(T, STR) \ ([this](const std::string_view str) { \ @@ -20,4 +22,87 @@ return objPtrs; \ })(STR); -namespace zeno {} // namespace zeno \ No newline at end of file +namespace zeno { + +template +inline void spatial_hashing(ExecPol &pol, const TileVectorT &tvs, + const typename TileVectorT::value_type radius, + IndexBucketsT &ibs) { + using namespace zs; + constexpr auto space = ExecPol::exec_tag::value; +#if ZS_ENABLE_CUDA && defined(__CUDACC__) + // ZS_LAMBDA -> __device__ + static_assert(space == execspace_e::cuda, + "specialized policy and compiler not match"); +#else + static_assert(space != execspace_e::cuda, + "specialized policy and compiler not match"); +#endif + + auto allocator = tvs.get_allocator(); + auto mloc = allocator.location; + ibs._dx = radius + radius; + /// table + auto &partition = ibs._table; + using Partition = RM_CVREF_T(partition); + partition = Partition{tvs.size(), tvs.memspace(), tvs.devid()}; + + // clean + pol(range(partition._tableSize), + [table = proxy(partition)] ZS_LAMBDA(size_t i) mutable { + table._table.keys[i] = + Partition::key_t::uniform(Partition::key_scalar_sentinel_v); + table._table.indices[i] = Partition::sentinel_v; + table._table.status[i] = -1; + if (i == 0) + *table._cnt = 0; + }); + // compute sparsity + pol(range(tvs.size()), + [tvs = proxy({}, tvs), + ibs = proxy(ibs)] ZS_LAMBDA(size_t pi) mutable { + auto x = tvs.template pack<3>("pos", pi); + auto coord = ibs.bucketCoord(x); + ibs.table.insert(coord); + }); + auto numCells = partition.size() + 1; + + /// counts + using index_type = typename IndexBucketsT::index_type; + auto &counts = ibs._counts; + counts = counts.clone(mloc); + counts.resize(numCells); + zs::memset(mem_device, counts.data(), 0, sizeof(index_type) * numCells); + + auto tmp = counts; // for index distribution later + pol(range(tvs.size()), + [tvs = proxy({}, tvs), + ibs = proxy(ibs)] ZS_LAMBDA(size_t pi) mutable { + auto pos = tvs.template pack<3>("pos", pi); + auto coord = ibs.bucketCoord(pos); + atomic_add(exec_cuda, (index_type *)&ibs.counts[ibs.table.query(coord)], + (index_type)1); + }); + /// offsets + auto &offsets = ibs._offsets; + offsets = offsets.clone(mloc); + offsets.resize(numCells); + exclusive_scan(pol, std::begin(counts), std::end(counts), + std::begin(offsets)); + /// indices + auto &indices = ibs._indices; + indices = indices.clone(mloc); + indices.resize(tvs.size()); + pol(range(tvs.size()), + [tvs = proxy({}, tvs), counts = proxy(tmp), + ibs = proxy(ibs)] ZS_LAMBDA(size_t pi) mutable { + auto pos = tvs.template pack<3>("pos", pi); + auto coord = ibs.bucketCoord(pos); + auto cellno = ibs.table.query(coord); + auto localno = + atomic_add(exec_cuda, (index_type *)&counts[cellno], (index_type)1); + ibs.indices[ibs.offsets[cellno] + localno] = (index_type)pi; + }); +} + +} // namespace zeno \ No newline at end of file diff --git a/projects/gmpm/mpm/PNW.cu b/projects/gmpm/mpm/PNW.cu index 4672b412a4..a39eb1bf16 100644 --- a/projects/gmpm/mpm/PNW.cu +++ b/projects/gmpm/mpm/PNW.cu @@ -31,6 +31,11 @@ struct ZSParticleNeighborWrangler : INode { } void apply() override { using namespace zs; + + auto ¤tContext = Cuda::context(0); + currentContext.setContext(); + auto cudaPol = cuda_exec().device(0).sync(true); + auto code = get_input("zfxCode")->get(); /// parObjPtr @@ -44,9 +49,12 @@ struct ZSParticleNeighborWrangler : INode { // auto parObjPtr = get_input("ZSParticles"); /// parNeighborPtr + auto neighborParObjPtrs = + RETRIEVE_OBJECT_PTRS(ZenoParticles, "ZSNeighborParticles"); std::shared_ptr parNeighborPtr{}; - if (has_input("ZSNeighborParticles")) - parNeighborPtr = get_input("ZSNeighborParticles"); + if (neighborParObjPtrs.size() > 0) + parNeighborPtr = + std::shared_ptr(neighborParObjPtrs[0], [](void *) {}); else if (!has_input("ZSNeighborParticles")) parNeighborPtr = std::make_shared(*parObjPtr); // copy-ctor else @@ -56,7 +64,15 @@ struct ZSParticleNeighborWrangler : INode { const auto neighborProps = neighborPars.getPropertyTags(); /// ibs (TODO: generate based on neighborPars, when this input is absent) - auto ibsPtr = get_input("ZSIndexBuckets"); + std::shared_ptr ibsPtr{}; + if (has_input("ZSIndexBuckets")) + ibsPtr = get_input("ZSIndexBuckets"); + else if (has_input("ZSIndexBuckets")) + spatial_hashing(cudaPol, neighborPars, + get_input("ZSIndexBuckets")->get(), + ibsPtr->get()); + else + ; const auto &ibs = ibsPtr->get(); zfx::Options opts(zfx::Options::for_cuda); @@ -108,10 +124,6 @@ struct ZSParticleNeighborWrangler : INode { for (auto &&[name, nchns] : neighborProps) def_sym("@@", name.asString(), nchns); - auto ¤tContext = Cuda::context(0); - currentContext.setContext(); - auto cudaPol = cuda_exec().device(0).sync(true); - auto prog = compiler.compile(code, opts); auto jitCode = assembler.assemble(prog->assembly); diff --git a/projects/gmpm/utils/IndexBuckets.cu b/projects/gmpm/utils/IndexBuckets.cu index c28858a309..ba79ba97ea 100644 --- a/projects/gmpm/utils/IndexBuckets.cu +++ b/projects/gmpm/utils/IndexBuckets.cu @@ -1,3 +1,4 @@ +#include "../Utils.hpp" #include "../mpm/Structures.hpp" #include "zensim/cuda/execution/ExecutionPolicy.cuh" #include "zensim/omp/execution/ExecutionPolicy.hpp" @@ -19,77 +20,8 @@ struct MakeZSBuckets : zeno::INode { auto &ibs = out->get(); using namespace zs; - auto allocator = pars.get_allocator(); - auto mloc = allocator.location; - ibs._dx = radius + radius; - /// table - auto &partition = ibs._table; - using Partition = RM_CVREF_T(partition); - partition = Partition{pars.size(), pars.memspace(), pars.devid()}; - - // clean auto cudaPol = cuda_exec().device(0); - cudaPol(range(partition._tableSize), - [table = proxy(partition)] __device__( - size_t i) mutable { - table._table.keys[i] = - Partition::key_t::uniform(Partition::key_scalar_sentinel_v); - table._table.indices[i] = Partition::sentinel_v; - table._table.status[i] = -1; - if (i == 0) - *table._cnt = 0; - }); - // compute sparsity - cudaPol(range(pars.size()), [pars = proxy({}, pars), - ibs = proxy( - ibs)] __device__(size_t pi) mutable { - auto x = pars.template pack<3>("pos", pi); - auto coord = ibs.bucketCoord(x); - ibs.table.insert(coord); - }); - auto numCells = partition.size() + 1; - - /// counts - using index_type = typename ZenoIndexBuckets::buckets_t::index_type; - auto &counts = ibs._counts; - counts = counts.clone(mloc); - counts.resize(numCells); - zs::memset(mem_device, counts.data(), 0, sizeof(index_type) * numCells); -#if 0 - cudaPol(range(counts.size()), - [counts = proxy(counts)] __device__( - size_t i) mutable { counts[i] = 0; }); -#endif - auto tmp = counts; // for index distribution later - cudaPol(range(pars.size()), [pars = proxy({}, pars), - ibs = proxy( - ibs)] __device__(size_t pi) mutable { - auto pos = pars.template pack<3>("pos", pi); - auto coord = ibs.bucketCoord(pos); - atomic_add(exec_cuda, (index_type *)&ibs.counts[ibs.table.query(coord)], - (index_type)1); - }); - /// offsets - auto &offsets = ibs._offsets; - offsets = offsets.clone(mloc); - offsets.resize(numCells); - exclusive_scan(cudaPol, std::begin(counts), std::end(counts), - std::begin(offsets)); - /// indices - auto &indices = ibs._indices; - indices = indices.clone(mloc); - indices.resize(pars.size()); - cudaPol(range(pars.size()), [pars = proxy({}, pars), - counts = proxy(tmp), - ibs = proxy( - ibs)] __device__(size_t pi) mutable { - auto pos = pars.template pack<3>("pos", pi); - auto coord = ibs.bucketCoord(pos); - auto cellno = ibs.table.query(coord); - auto localno = - atomic_add(exec_cuda, (index_type *)&counts[cellno], (index_type)1); - ibs.indices[ibs.offsets[cellno] + localno] = (index_type)pi; - }); + spatial_hashing(cudaPol, pars, radius, ibs); fmt::print("done building index buckets with {} entries, {} buckets\n", ibs.numEntries(), ibs.numBuckets());