From 124918f25531a72bf0df0dbd8842df3fdd46b944 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 2 Jun 2023 14:00:53 -0700 Subject: [PATCH 1/4] initial implementation of multipoint distribution with a normal distribution to the number of point per multipoint --- .../pairwise_point_polygon_distance.cu | 11 +- cpp/include/cuspatial/iterator_factory.cuh | 8 + .../cuspatial_test/geometry_generator.cuh | 138 +++++++++++++++--- cpp/include/cuspatial_test/random.cuh | 2 +- .../utility_test/test_geometry_generators.cu | 51 +++++++ 5 files changed, 181 insertions(+), 29 deletions(-) diff --git a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu index c3f84da70..bbb353505 100644 --- a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu @@ -49,7 +49,7 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t auto mpoly_generator_param = multipolygon_generator_parameter{ num_pairs, num_polygons_per_multipolygon, num_holes_per_polygon, num_edges_per_ring}; - auto mpoint_generator_param = multipoint_generator_parameter{ + auto mpoint_generator_param = multipoint_generator_parameter_idendity{ num_pairs, num_points_per_multipoint, vec_2d{-1, -1}, vec_2d{0, 0}}; auto multipolygons = generate_multipolygon_array(mpoly_generator_param, stream); @@ -74,10 +74,11 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t state.add_global_memory_reads( mpoly_generator_param.num_coords() + mpoint_generator_param.num_points(), "CoordinatesReadSize"); - state.add_global_memory_reads( - (mpoly_generator_param.num_rings() + 1) + (mpoly_generator_param.num_polygons() + 1) + - (mpoly_generator_param.num_multipolygons + 1) + (mpoint_generator_param.num_multipoints + 1), - "OffsetsDataSize"); + state.add_global_memory_reads((mpoly_generator_param.num_rings() + 1) + + (mpoly_generator_param.num_polygons() + 1) + + (mpoly_generator_param.num_multipolygons + 1) + + (mpoint_generator_param.num_multipoints() + 1), + "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); diff --git a/cpp/include/cuspatial/iterator_factory.cuh b/cpp/include/cuspatial/iterator_factory.cuh index 1f026512c..8fe088461 100644 --- a/cpp/include/cuspatial/iterator_factory.cuh +++ b/cpp/include/cuspatial/iterator_factory.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -424,6 +425,13 @@ auto make_geometry_id_iterator(GeometryIter geometry_offsets_begin, std::distance(geometry_offsets_begin, geometry_offsets_end))); } +template +auto make_element_count_iterator_from_offset(OffsetIter offset_begin) +{ + auto zipped = thrust::make_zip_iterator(offset_begin, thrust::next(offset_begin)); + return thrust::make_transform_iterator(zipped, detail::offset_pair_to_count_functor{}); +} + /** * @} // end of doxygen group */ diff --git a/cpp/include/cuspatial_test/geometry_generator.cuh b/cpp/include/cuspatial_test/geometry_generator.cuh index 73b1d270e..57869b80d 100644 --- a/cpp/include/cuspatial_test/geometry_generator.cuh +++ b/cpp/include/cuspatial_test/geometry_generator.cuh @@ -16,10 +16,12 @@ #pragma once +#include "thrust/random/normal_distribution.h" #include #include #include +#include #include #include #include @@ -28,6 +30,7 @@ #include #include +#include #include #include @@ -54,6 +57,37 @@ struct random_walk_functor { } }; +/** + * @brief Struct to store the parameters of the multipoint aray + * + * @tparam T Type of the coordinates + */ +template +class multipoint_generator_parameter { + public: + using element_t = T; + + std::size_t num_multipoints; + std::size_t num_points_per_multipoints; + vec_2d lower_left; + vec_2d upper_right; + + auto points_generator() + { + auto engine_x = deterministic_engine(0); + auto engine_y = deterministic_engine(1); + + auto x_dist = make_uniform_dist(lower_left.x, upper_right.x); + auto y_dist = make_uniform_dist(lower_left.y, upper_right.y); + + return point_generator(lower_left, upper_right, engine_x, engine_y, x_dist, y_dist); + } +}; + +template +multipoint_generator_parameter(std::size_t, std::size_t, vec_2d, vec_2d) + -> multipoint_generator_parameter; + } // namespace detail /** @@ -356,25 +390,58 @@ auto generate_multilinestring_array(multilinestring_generator_parameter param return make_multilinestring_array( std::move(geometry_offset), std::move(part_offset), std::move(points)); } - -/** - * @brief Struct to store the parameters of the multipoint aray - * - * @tparam T Type of the coordinates - */ template -struct multipoint_generator_parameter { - using element_t = T; +class multipoint_generator_parameter_idendity { + private: + detail::multipoint_generator_parameter _params; + + public: + multipoint_generator_parameter_idendity(std::size_t num_multipoints, + std::size_t num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right) + : _params{num_multipoints, num_points_per_multipoints, lower_left, upper_right} + { + } + std::size_t num_multipoints() { return _params.num_multipoints; } + std::size_t num_points_per_multipoints() { return _params.num_points_per_multipoints; } + std::size_t num_points() { return num_multipoints() * num_points_per_multipoints(); } + vec_2d lower_left() { return _params.lower_left; } + vec_2d upper_right() { return _params.upper_right; } - std::size_t num_multipoints; - std::size_t num_points_per_multipoints; - vec_2d lower_left; - vec_2d upper_right; + auto points_generator() { return _params.points_generator(); } +}; + +template +class multipoint_generator_parameter_normal { + private: + detail::multipoint_generator_parameter _params; + T stddev; + + public: + multipoint_generator_parameter_normal(std::size_t num_multipoints, + std::size_t num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right, + T stddev) + : _params{num_multipoints, num_points_per_multipoints, lower_left, upper_right}, stddev(stddev) + { + } - CUSPATIAL_HOST_DEVICE std::size_t num_points() + auto multipoint_count_generator() { - return num_multipoints * num_points_per_multipoints; + auto lower = std::max(1, static_cast(_params.num_points_per_multipoints - 6 * stddev)); + auto upper = static_cast(_params.num_points_per_multipoints + 6 * stddev); + auto engine = deterministic_engine(0); + auto normal = make_normal_dist(lower, upper); + return value_generator{lower, upper, engine, normal}; } + + auto points_generator() { return _params.points_generator(); } + + std::size_t num_multipoints() { return _params.num_multipoints; } + vec_2d lower_left() { return _params.lower_left; } + vec_2d upper_right() { return _params.upper_right; } }; /** @@ -386,27 +453,52 @@ struct multipoint_generator_parameter { * @return a cuspatial::test::multipoint_array object */ template -auto generate_multipoint_array(multipoint_generator_parameter params, +auto generate_multipoint_array(multipoint_generator_parameter_idendity params, rmm::cuda_stream_view stream) { rmm::device_uvector> coordinates(params.num_points(), stream); - rmm::device_uvector offsets(params.num_multipoints + 1, stream); + rmm::device_uvector offsets(params.num_multipoints() + 1, stream); thrust::sequence(rmm::exec_policy(stream), offsets.begin(), offsets.end(), std::size_t{0}, - params.num_points_per_multipoints); + params.num_points_per_multipoints()); - auto engine_x = deterministic_engine(params.num_points()); - auto engine_y = deterministic_engine(2 * params.num_points()); + auto point_gen = params.points_generator(); + thrust::tabulate(rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), point_gen); - auto x_dist = make_uniform_dist(params.lower_left.x, params.upper_right.x); - auto y_dist = make_uniform_dist(params.lower_left.y, params.upper_right.y); + return make_multipoint_array(std::move(offsets), std::move(coordinates)); +} + +template +rmm::device_uvector make_offsets(Generator count_generator, + std::size_t size, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector offsets(size, stream); - auto point_gen = - point_generator(params.lower_left, params.upper_right, engine_x, engine_y, x_dist, y_dist); + zero_data_async(offsets.begin(), offsets.end(), stream); + thrust::tabulate( + rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), count_generator); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::next(offsets.begin()), + offsets.end(), + thrust::next(offsets.begin())); + + return offsets; +} + +template +auto generate_multipoint_array(multipoint_generator_parameter_normal params, + rmm::cuda_stream_view stream) +{ + auto offsets = + make_offsets(params.multipoint_count_generator(), params.num_multipoints() + 1, stream); + auto num_points = offsets.element(offsets.size() - 1, stream); + rmm::device_uvector> coordinates(num_points, stream); + auto point_gen = params.points_generator(); thrust::tabulate(rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), point_gen); return make_multipoint_array(std::move(offsets), std::move(coordinates)); diff --git a/cpp/include/cuspatial_test/random.cuh b/cpp/include/cuspatial_test/random.cuh index d2a65af3b..9b0d3316d 100644 --- a/cpp/include/cuspatial_test/random.cuh +++ b/cpp/include/cuspatial_test/random.cuh @@ -62,7 +62,7 @@ using integral_to_realType = std::conditional_t>; /** - * @brief Generates a normal distribution between zero and upper_bound. + * @brief Generates a normal distribution between lower_bound and upper_bound. */ template auto make_normal_dist(T lower_bound, T upper_bound) diff --git a/cpp/tests/utility_test/test_geometry_generators.cu b/cpp/tests/utility_test/test_geometry_generators.cu index d0b35e9e4..c2322660f 100644 --- a/cpp/tests/utility_test/test_geometry_generators.cu +++ b/cpp/tests/utility_test/test_geometry_generators.cu @@ -13,15 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "thrust/detail/advance.inl" #include #include #include #include +#include #include #include +#include + using namespace cuspatial; using namespace cuspatial::test; @@ -320,3 +324,50 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(0, 100), // num_holes_per_polygon ::testing::Values(3, 100) // num_sides_per_ring )); + +struct MultiPointFactoryStatsValidator : public BaseFixtureWithParam { + void run(multipoint_generator_parameter_normal params) + { + auto got = generate_multipoint_array(params, stream()); + + auto [got_geometry_offsets, got_coordinates] = got.release(); + + auto num_geometry_counts_it = + make_element_count_iterator_from_offset(got_geometry_offsets.begin()); + + auto h = cuspatial::test::to_host(got_geometry_offsets); + + std::ofstream ofs("/home/coder/output.txt", std::ios::out); + for (std::size_t i = 0; i < h.size() - 1; ++i) { + ofs << h[i + 1] - h[i] << ", "; + } + ofs.close(); + + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(stream()), + num_geometry_counts_it, + thrust::next(num_geometry_counts_it, params.num_multipoints()), + [] __device__(auto count) { return count >= 1; })); + EXPECT_EQ(got_geometry_offsets.size(), params.num_multipoints() + 1); + } +}; + +TEST_P(MultiPointFactoryStatsValidator, CountsVerification) +{ + // Structured binding unsupported by Gtest + std::size_t num_multipoints = std::get<0>(GetParam()); + std::size_t num_points_per_multipoints = std::get<1>(GetParam()); + + auto params = multipoint_generator_parameter_normal{num_multipoints, + num_points_per_multipoints, + vec_2d{0.0, 0.0}, + vec_2d{1.0, 1.0}, + 5.0}; + CUSPATIAL_RUN_TEST(this->run, params); +} + +INSTANTIATE_TEST_SUITE_P( + MultiPointFactoryStatsValidators, + MultiPointFactoryStatsValidator, + ::testing::Combine(::testing::Values(1, 1000), // num_multipoints + ::testing::Values(1, 30) // num_points_per_multipoints + )); From f0e793c5783ad9c5a1c041603cb748fa4410f69a Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 2 Jun 2023 15:35:38 -0700 Subject: [PATCH 2/4] update design --- .../pairwise_point_polygon_distance.cu | 2 +- .../cuspatial_test/geometry_generator.cuh | 176 ++++++++++-------- cpp/include/cuspatial_test/random.cuh | 15 ++ .../utility_test/test_geometry_generators.cu | 19 +- 4 files changed, 123 insertions(+), 89 deletions(-) diff --git a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu index bbb353505..0f10bd8e9 100644 --- a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu @@ -49,7 +49,7 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t auto mpoly_generator_param = multipolygon_generator_parameter{ num_pairs, num_polygons_per_multipolygon, num_holes_per_polygon, num_edges_per_ring}; - auto mpoint_generator_param = multipoint_generator_parameter_idendity{ + auto mpoint_generator_param = multipoint_fixed_generator_parameter{ num_pairs, num_points_per_multipoint, vec_2d{-1, -1}, vec_2d{0, 0}}; auto multipolygons = generate_multipolygon_array(mpoly_generator_param, stream); diff --git a/cpp/include/cuspatial_test/geometry_generator.cuh b/cpp/include/cuspatial_test/geometry_generator.cuh index 57869b80d..185e0812f 100644 --- a/cpp/include/cuspatial_test/geometry_generator.cuh +++ b/cpp/include/cuspatial_test/geometry_generator.cuh @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -88,6 +89,33 @@ template multipoint_generator_parameter(std::size_t, std::size_t, vec_2d, vec_2d) -> multipoint_generator_parameter; +template +rmm::device_uvector make_offsets(Param params, + std::size_t size, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector offsets(size, stream); + + if (params.count_has_variance()) { + auto count_generator = params.multipoint_count_generator(); + zero_data_async(offsets.begin(), offsets.end(), stream); + thrust::tabulate( + rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), count_generator); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::next(offsets.begin()), + offsets.end(), + thrust::next(offsets.begin())); + } else { + thrust::sequence(rmm::exec_policy(stream), + offsets.begin(), + offsets.end(), + std::size_t{0}, + static_cast(params.num_points_per_multipoints().mu)); + } + + return offsets; +} + } // namespace detail /** @@ -390,62 +418,90 @@ auto generate_multilinestring_array(multilinestring_generator_parameter param return make_multilinestring_array( std::move(geometry_offset), std::move(part_offset), std::move(points)); } -template -class multipoint_generator_parameter_idendity { - private: - detail::multipoint_generator_parameter _params; - - public: - multipoint_generator_parameter_idendity(std::size_t num_multipoints, - std::size_t num_points_per_multipoints, - vec_2d lower_left, - vec_2d upper_right) - : _params{num_multipoints, num_points_per_multipoints, lower_left, upper_right} - { - } - std::size_t num_multipoints() { return _params.num_multipoints; } - std::size_t num_points_per_multipoints() { return _params.num_points_per_multipoints; } - std::size_t num_points() { return num_multipoints() * num_points_per_multipoints(); } - vec_2d lower_left() { return _params.lower_left; } - vec_2d upper_right() { return _params.upper_right; } - auto points_generator() { return _params.points_generator(); } -}; - -template -class multipoint_generator_parameter_normal { - private: - detail::multipoint_generator_parameter _params; - T stddev; +/** + * @brief Creates a parameter set that configures the multipoint generator + * + * This assumes that the multipoint generator uses a normal distribution on the point_per_multipoint + * parameter. + * + * @tparam CoordType The type of coordinate + */ +template +class multipoint_normal_distribution_generator_parameter { + protected: + std::size_t _num_multipoints; + cuspatial::test::normal_random_variable _num_points_per_multipoints; + vec_2d _lower_left; + vec_2d _upper_right; public: - multipoint_generator_parameter_normal(std::size_t num_multipoints, - std::size_t num_points_per_multipoints, - vec_2d lower_left, - vec_2d upper_right, - T stddev) - : _params{num_multipoints, num_points_per_multipoints, lower_left, upper_right}, stddev(stddev) + multipoint_normal_distribution_generator_parameter( + std::size_t num_multipoints, + cuspatial::test::normal_random_variable num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right) + : _num_multipoints(num_multipoints), + _num_points_per_multipoints(num_points_per_multipoints), + _lower_left(lower_left), + _upper_right(upper_right) { } + bool count_has_variance() { return _num_points_per_multipoints.stddev != 0.0; } + auto multipoint_count_generator() { - auto lower = std::max(1, static_cast(_params.num_points_per_multipoints - 6 * stddev)); - auto upper = static_cast(_params.num_points_per_multipoints + 6 * stddev); + auto lower = std::max(1, static_cast(_num_points_per_multipoints.neg_6stddev())); + auto upper = static_cast(_num_points_per_multipoints.plus_6stddev()); auto engine = deterministic_engine(0); auto normal = make_normal_dist(lower, upper); return value_generator{lower, upper, engine, normal}; } - auto points_generator() { return _params.points_generator(); } + auto points_generator() + { + auto engine_x = deterministic_engine(0); + auto engine_y = deterministic_engine(1); + + auto x_dist = make_uniform_dist(_lower_left.x, _upper_right.x); + auto y_dist = make_uniform_dist(_lower_left.y, _upper_right.y); + + return point_generator(_lower_left, _upper_right, engine_x, engine_y, x_dist, y_dist); + } + + std::size_t num_multipoints() { return _num_multipoints; } + auto num_points_per_multipoints() { return _num_points_per_multipoints; } + vec_2d lower_left() { return _lower_left; } + vec_2d upper_right() { return _upper_right; } +}; + +template +class multipoint_fixed_generator_parameter + : public multipoint_normal_distribution_generator_parameter { + public: + multipoint_fixed_generator_parameter(std::size_t num_multipoints, + std::size_t num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right) + : multipoint_normal_distribution_generator_parameter( + num_multipoints, + {static_cast(num_points_per_multipoints), 0.0}, + lower_left, + upper_right) + { + } - std::size_t num_multipoints() { return _params.num_multipoints; } - vec_2d lower_left() { return _params.lower_left; } - vec_2d upper_right() { return _params.upper_right; } + std::size_t num_points() + { + return this->num_multipoints() * + static_cast(this->num_points_per_multipoints().mu); + } }; /** - * @brief Helper to generate random multipoints within a range + * @brief Generate a multipoint array, the number of point in each multipoint follows a normal + * distribution * * @tparam T The floating point type for the coordinates * @param params Parameters to specify for the multipoints @@ -453,48 +509,10 @@ class multipoint_generator_parameter_normal { * @return a cuspatial::test::multipoint_array object */ template -auto generate_multipoint_array(multipoint_generator_parameter_idendity params, - rmm::cuda_stream_view stream) -{ - rmm::device_uvector> coordinates(params.num_points(), stream); - rmm::device_uvector offsets(params.num_multipoints() + 1, stream); - - thrust::sequence(rmm::exec_policy(stream), - offsets.begin(), - offsets.end(), - std::size_t{0}, - params.num_points_per_multipoints()); - - auto point_gen = params.points_generator(); - thrust::tabulate(rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), point_gen); - - return make_multipoint_array(std::move(offsets), std::move(coordinates)); -} - -template -rmm::device_uvector make_offsets(Generator count_generator, - std::size_t size, - rmm::cuda_stream_view stream) -{ - rmm::device_uvector offsets(size, stream); - - zero_data_async(offsets.begin(), offsets.end(), stream); - thrust::tabulate( - rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), count_generator); - thrust::inclusive_scan(rmm::exec_policy(stream), - thrust::next(offsets.begin()), - offsets.end(), - thrust::next(offsets.begin())); - - return offsets; -} - -template -auto generate_multipoint_array(multipoint_generator_parameter_normal params, +auto generate_multipoint_array(multipoint_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { - auto offsets = - make_offsets(params.multipoint_count_generator(), params.num_multipoints() + 1, stream); + auto offsets = detail::make_offsets(params, params.num_multipoints() + 1, stream); auto num_points = offsets.element(offsets.size() - 1, stream); rmm::device_uvector> coordinates(num_points, stream); diff --git a/cpp/include/cuspatial_test/random.cuh b/cpp/include/cuspatial_test/random.cuh index 9b0d3316d..8d35b1c46 100644 --- a/cpp/include/cuspatial_test/random.cuh +++ b/cpp/include/cuspatial_test/random.cuh @@ -37,6 +37,21 @@ namespace cuspatial { namespace test { +/** + * @brief Class to represent a random variable that follows normal distribution. + * + * @tparam RealT Type of the random variable + */ +template +class normal_random_variable { + public: + RealT mu; + float stddev; + + RealT neg_6stddev() { return mu - 6 * stddev; } + RealT plus_6stddev() { return mu + 6 * stddev; } +}; + /** * @brief Identifies a probability distribution type. */ diff --git a/cpp/tests/utility_test/test_geometry_generators.cu b/cpp/tests/utility_test/test_geometry_generators.cu index c2322660f..a41bd1257 100644 --- a/cpp/tests/utility_test/test_geometry_generators.cu +++ b/cpp/tests/utility_test/test_geometry_generators.cu @@ -326,7 +326,7 @@ INSTANTIATE_TEST_SUITE_P( )); struct MultiPointFactoryStatsValidator : public BaseFixtureWithParam { - void run(multipoint_generator_parameter_normal params) + void run(multipoint_normal_distribution_generator_parameter params) { auto got = generate_multipoint_array(params, stream()); @@ -354,14 +354,15 @@ struct MultiPointFactoryStatsValidator : public BaseFixtureWithParam(GetParam()); - std::size_t num_points_per_multipoints = std::get<1>(GetParam()); - - auto params = multipoint_generator_parameter_normal{num_multipoints, - num_points_per_multipoints, - vec_2d{0.0, 0.0}, - vec_2d{1.0, 1.0}, - 5.0}; + std::size_t num_multipoints = std::get<0>(GetParam()); + double num_points_per_multipoints = static_cast(std::get<1>(GetParam())); + float stddev = 20.0; + + auto params = + multipoint_normal_distribution_generator_parameter{num_multipoints, + {num_points_per_multipoints, stddev}, + vec_2d{0.0, 0.0}, + vec_2d{1.0, 1.0}}; CUSPATIAL_RUN_TEST(this->run, params); } From 3b0768c66aa9e7a58faf8a8fdad059060ecaf799 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 2 Jun 2023 16:16:12 -0700 Subject: [PATCH 3/4] [skip ci] update docs --- .../distance/pairwise_linestring_distance.cu | 4 ++-- .../pairwise_linestring_polygon_distance.cu | 2 +- .../cuspatial_test/geometry_generator.cuh | 18 ++++++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cpp/benchmarks/distance/pairwise_linestring_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_distance.cu index c17c59b0d..556ca07e6 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_distance.cu @@ -38,9 +38,9 @@ void pairwise_linestring_distance_benchmark(nvbench::state& state, nvbench::type auto const num_segments_per_linestring{ static_cast(state.get_int64("NumSegmentsPerLineString"))}; - auto params1 = test::multilinestring_generator_parameter{ + auto params1 = test::multilinestring_normal_distribution_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; - auto params2 = test::multilinestring_generator_parameter{num_pairs, + auto params2 = test::multilinestring_normal_distribution_generator_parameter{num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, diff --git a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu index 0d4fdf574..81f3794f1 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu @@ -43,7 +43,7 @@ void pairwise_linestring_polygon_distance_benchmark(nvbench::state& state, nvben auto const num_ring_per_polygon{static_cast(state.get_int64("NumRingsPerPolygon"))}; auto const num_points_per_ring{static_cast(state.get_int64("NumPointsPerRing"))}; - auto params1 = test::multilinestring_generator_parameter{ + auto params1 = test::multilinestring_normal_distribution_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; auto params2 = test::multipolygon_generator_parameter{num_pairs, num_polygon_per_multipolygon, diff --git a/cpp/include/cuspatial_test/geometry_generator.cuh b/cpp/include/cuspatial_test/geometry_generator.cuh index 185e0812f..692eec836 100644 --- a/cpp/include/cuspatial_test/geometry_generator.cuh +++ b/cpp/include/cuspatial_test/geometry_generator.cuh @@ -342,10 +342,10 @@ auto generate_multipolygon_array(multipolygon_generator_parameter params, * @tparam T Underlying type of the coordinates */ template -struct multilinestring_generator_parameter { +struct multilinestring_normal_distribution_generator_parameter { std::size_t num_multilinestrings; - std::size_t num_linestrings_per_multilinestring; - std::size_t num_segments_per_linestring; + cuspatial::test::normal_random_variable num_linestrings_per_multilinestring; + cuspatial::test::normal_random_variable num_segments_per_linestring; T segment_length; vec_2d origin; @@ -384,7 +384,7 @@ struct multilinestring_generator_parameter { * @return The generated multilinestring array */ template -auto generate_multilinestring_array(multilinestring_generator_parameter params, +auto generate_multilinestring_array(multilinestring_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { rmm::device_uvector geometry_offset(params.num_multilinestrings + 1, stream); @@ -422,8 +422,7 @@ auto generate_multilinestring_array(multilinestring_generator_parameter param /** * @brief Creates a parameter set that configures the multipoint generator * - * This assumes that the multipoint generator uses a normal distribution on the point_per_multipoint - * parameter. + * The number of point in each multipoint is sampled from a normal distribution. * * @tparam CoordType The type of coordinate */ @@ -476,6 +475,13 @@ class multipoint_normal_distribution_generator_parameter { vec_2d upper_right() { return _upper_right; } }; +/** + * @brief Parameters to configure a multipoint generator to generate identical multipoint for each element + * + * Idendity function is a special case of normal distribution where deviation is 0. + * + * @tparam CoordType The type of underlying coordinates + */ template class multipoint_fixed_generator_parameter : public multipoint_normal_distribution_generator_parameter { From e7da1751f99346c7e7738087bb502b11b1cb2fce Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 5 Jun 2023 10:37:11 -0700 Subject: [PATCH 4/4] add normal distribution generator for multilinestring generator --- .../distance/pairwise_linestring_distance.cu | 21 +- .../pairwise_linestring_polygon_distance.cu | 10 +- .../pairwise_point_polygon_distance.cu | 7 +- .../cuspatial_test/geometry_generator.cuh | 234 +++++++++--------- cpp/include/cuspatial_test/random.cuh | 27 +- .../utility_test/test_geometry_generators.cu | 19 +- 6 files changed, 175 insertions(+), 143 deletions(-) diff --git a/cpp/benchmarks/distance/pairwise_linestring_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_distance.cu index 556ca07e6..b3f4a63be 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_distance.cu @@ -38,13 +38,14 @@ void pairwise_linestring_distance_benchmark(nvbench::state& state, nvbench::type auto const num_segments_per_linestring{ static_cast(state.get_int64("NumSegmentsPerLineString"))}; - auto params1 = test::multilinestring_normal_distribution_generator_parameter{ + auto params1 = test::multilinestring_fixed_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; - auto params2 = test::multilinestring_normal_distribution_generator_parameter{num_pairs, - num_linestrings_per_multilinestring, - num_segments_per_linestring, - 1.0, - {100000., 100000.}}; + auto params2 = + test::multilinestring_fixed_generator_parameter{num_pairs, + num_linestrings_per_multilinestring, + num_segments_per_linestring, + 1.0, + {100000., 100000.}}; auto ls1 = generate_multilinestring_array(params1, stream); auto ls2 = generate_multilinestring_array(params2, stream); @@ -55,15 +56,15 @@ void pairwise_linestring_distance_benchmark(nvbench::state& state, nvbench::type auto output = rmm::device_uvector(num_pairs, stream); auto out_it = output.begin(); - auto const total_points = params1.num_points() + params2.num_points(); + auto const total_points = ls1range.num_points() + ls2range.num_points(); state.add_element_count(num_pairs, "NumPairs"); state.add_element_count(total_points, "NumPoints"); state.add_global_memory_reads(total_points * 2, "CoordinatesDataSize"); - state.add_global_memory_reads(params1.num_multilinestrings + - params2.num_multilinestrings + - params1.num_linestrings() + params2.num_linestrings(), + state.add_global_memory_reads(ls1range.num_multilinestrings() + + ls2range.num_multilinestrings() + + ls1range.num_linestrings() + ls2range.num_linestrings(), "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); diff --git a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu index 81f3794f1..0c7b92bcb 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu @@ -43,7 +43,7 @@ void pairwise_linestring_polygon_distance_benchmark(nvbench::state& state, nvben auto const num_ring_per_polygon{static_cast(state.get_int64("NumRingsPerPolygon"))}; auto const num_points_per_ring{static_cast(state.get_int64("NumPointsPerRing"))}; - auto params1 = test::multilinestring_normal_distribution_generator_parameter{ + auto params1 = test::multilinestring_fixed_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; auto params2 = test::multipolygon_generator_parameter{num_pairs, num_polygon_per_multipolygon, @@ -66,10 +66,10 @@ void pairwise_linestring_polygon_distance_benchmark(nvbench::state& state, nvben state.add_element_count(num_pairs, "NumPairs"); state.add_element_count(total_points, "NumPoints"); state.add_global_memory_reads(total_points * 2, "CoordinatesDataSize"); - state.add_global_memory_reads(params1.num_multilinestrings + params1.num_linestrings() + - params2.num_multipolygons + params2.num_polygons() + - params2.num_rings() + 5, - "OffsetsDataSize"); + state.add_global_memory_reads( + lines_range.num_multilinestrings() + lines_range.num_linestrings() + + poly_range.num_multipolygons() + poly_range.num_polygons() + poly_range.num_rings() + 5, + "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); state.exec(nvbench::exec_tag::sync, diff --git a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu index 0f10bd8e9..baec497e9 100644 --- a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu @@ -69,11 +69,10 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t mpoly_generator_param.num_rings() * mpoly_generator_param.num_polygons()), "Multipolygon Complexity"); - state.add_element_count(mpoint_generator_param.num_points(), "NumPoints (in multipoints)"); + state.add_element_count(mpoint_view.num_points(), "NumPoints (in multipoints)"); - state.add_global_memory_reads( - mpoly_generator_param.num_coords() + mpoint_generator_param.num_points(), - "CoordinatesReadSize"); + state.add_global_memory_reads(mpoly_generator_param.num_coords() + mpoint_view.num_points(), + "CoordinatesReadSize"); state.add_global_memory_reads((mpoly_generator_param.num_rings() + 1) + (mpoly_generator_param.num_polygons() + 1) + (mpoly_generator_param.num_multipolygons + 1) + diff --git a/cpp/include/cuspatial_test/geometry_generator.cuh b/cpp/include/cuspatial_test/geometry_generator.cuh index 692eec836..697eb66d3 100644 --- a/cpp/include/cuspatial_test/geometry_generator.cuh +++ b/cpp/include/cuspatial_test/geometry_generator.cuh @@ -16,7 +16,6 @@ #pragma once -#include "thrust/random/normal_distribution.h" #include #include @@ -26,81 +25,31 @@ #include #include -#include #include #include #include +#include #include #include #include +#include + namespace cuspatial { namespace test { namespace detail { - -template -struct tabulate_direction_functor { - vec_2d __device__ operator()(index_t i) - { - return vec_2d{cos(static_cast(i)), sin(static_cast(i))}; - } -}; - -template -struct random_walk_functor { - T segment_length; - - vec_2d __device__ operator()(vec_2d prev, vec_2d rad) - { - return prev + segment_length * rad; - } -}; - -/** - * @brief Struct to store the parameters of the multipoint aray - * - * @tparam T Type of the coordinates - */ -template -class multipoint_generator_parameter { - public: - using element_t = T; - - std::size_t num_multipoints; - std::size_t num_points_per_multipoints; - vec_2d lower_left; - vec_2d upper_right; - - auto points_generator() - { - auto engine_x = deterministic_engine(0); - auto engine_y = deterministic_engine(1); - - auto x_dist = make_uniform_dist(lower_left.x, upper_right.x); - auto y_dist = make_uniform_dist(lower_left.y, upper_right.y); - - return point_generator(lower_left, upper_right, engine_x, engine_y, x_dist, y_dist); - } -}; - -template -multipoint_generator_parameter(std::size_t, std::size_t, vec_2d, vec_2d) - -> multipoint_generator_parameter; - -template -rmm::device_uvector make_offsets(Param params, +template +rmm::device_uvector make_offsets(Generator gen, std::size_t size, rmm::cuda_stream_view stream) { rmm::device_uvector offsets(size, stream); - if (params.count_has_variance()) { - auto count_generator = params.multipoint_count_generator(); + if (gen.is_random()) { zero_data_async(offsets.begin(), offsets.end(), stream); - thrust::tabulate( - rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), count_generator); + thrust::tabulate(rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), gen); thrust::inclusive_scan(rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), @@ -110,7 +59,7 @@ rmm::device_uvector make_offsets(Param params, offsets.begin(), offsets.end(), std::size_t{0}, - static_cast(params.num_points_per_multipoints().mu)); + static_cast(gen.mean())); } return offsets; @@ -341,23 +290,97 @@ auto generate_multipolygon_array(multipolygon_generator_parameter params, * * @tparam T Underlying type of the coordinates */ -template -struct multilinestring_normal_distribution_generator_parameter { - std::size_t num_multilinestrings; - cuspatial::test::normal_random_variable num_linestrings_per_multilinestring; - cuspatial::test::normal_random_variable num_segments_per_linestring; - T segment_length; - vec_2d origin; - - std::size_t num_linestrings() +template +class multilinestring_normal_distribution_generator_parameter { + private: + static int constexpr NUM_LINESTRING_GEN_SEED = 0; + static int constexpr NUM_SEGMENT_GEN_SEED = 1; + + std::size_t _num_multilinestrings; + cuspatial::test::normal_random_variable _num_linestrings_per_multilinestring; + cuspatial::test::normal_random_variable _num_segments_per_linestring; + CoordType _segment_length; + vec_2d _origin; + + public: + template + struct _direction_functor { + vec_2d __device__ operator()(index_t i) + { + return vec_2d{cos(static_cast(i)), sin(static_cast(i))}; + } + }; + + struct _random_walk_functor { + CoordType segment_length; + + vec_2d __device__ operator()(vec_2d prev, vec_2d rad) + { + return prev + segment_length * rad; + } + }; + + multilinestring_normal_distribution_generator_parameter( + std::size_t num_multilinestrings, + cuspatial::test::normal_random_variable num_linestrings_per_multilinestring, + cuspatial::test::normal_random_variable num_segments_per_linestring, + CoordType segment_length, + vec_2d origin) + : _num_multilinestrings(num_multilinestrings), + _num_linestrings_per_multilinestring(num_linestrings_per_multilinestring), + _num_segments_per_linestring(num_segments_per_linestring), + _segment_length(segment_length), + _origin(origin) { - return num_multilinestrings * num_linestrings_per_multilinestring; } - std::size_t num_points_per_linestring() { return num_segments_per_linestring + 1; } + std::size_t num_multilinestrings() { return _num_multilinestrings; } + auto num_linestrings_per_multilinestring() { return _num_linestrings_per_multilinestring; } + auto num_segments_per_linestring() { return _num_segments_per_linestring; } + CoordType segment_length() { return _segment_length; } + vec_2d origin() { return _origin; } - std::size_t num_segments() { return num_linestrings() * num_segments_per_linestring; } - std::size_t num_points() { return num_linestrings() * num_points_per_linestring(); } + auto num_linestrings_generator() + { + auto lower = std::max( + std::size_t{1}, static_cast(_num_linestrings_per_multilinestring.neg_6stddev())); + auto upper = static_cast(_num_linestrings_per_multilinestring.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper, NUM_LINESTRING_GEN_SEED); + } + + auto num_points_generator() + { + auto lower = std::max(std::size_t{1}, + static_cast(_num_segments_per_linestring.neg_6stddev())); + auto upper = static_cast(_num_segments_per_linestring.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper, NUM_SEGMENT_GEN_SEED); + } + + auto direction_functor() { return _direction_functor{}; } + auto random_walk_functor() { return _random_walk_functor{}; } +}; + +/** + * @brief + * + * @tparam + */ +template +struct multilinestring_fixed_generator_parameter + : public multilinestring_normal_distribution_generator_parameter { + multilinestring_fixed_generator_parameter(std::size_t num_multilinestrings, + std::size_t num_linestrings_per_multilinestring, + std::size_t num_segments_per_linestring, + CoordType segment_length, + vec_2d origin) + : multilinestring_normal_distribution_generator_parameter( + num_multilinestrings, + {static_cast(num_linestrings_per_multilinestring), 0.0}, + {static_cast(num_segments_per_linestring), 0.0}, + segment_length, + origin) + { + } }; /** @@ -384,36 +407,25 @@ struct multilinestring_normal_distribution_generator_parameter { * @return The generated multilinestring array */ template -auto generate_multilinestring_array(multilinestring_normal_distribution_generator_parameter params, - rmm::cuda_stream_view stream) +auto generate_multilinestring_array( + multilinestring_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { - rmm::device_uvector geometry_offset(params.num_multilinestrings + 1, stream); - rmm::device_uvector part_offset(params.num_linestrings() + 1, stream); - rmm::device_uvector> points(params.num_points(), stream); + auto geometry_offset = detail::make_offsets( + params.num_linestrings_generator(), params.num_multilinestrings() + 1, stream); + auto num_linestrings = geometry_offset.element(geometry_offset.size() - 1, stream); + auto part_offset = detail::make_offsets(params.num_points_generator(), num_linestrings, stream); + auto num_points = part_offset.element(part_offset.size() - 1, stream); - thrust::sequence(rmm::exec_policy(stream), - geometry_offset.begin(), - geometry_offset.end(), - static_cast(0), - params.num_linestrings_per_multilinestring); - - thrust::sequence(rmm::exec_policy(stream), - part_offset.begin(), - part_offset.end(), - static_cast(0), - params.num_segments_per_linestring + 1); - - thrust::tabulate(rmm::exec_policy(stream), - points.begin(), - points.end(), - detail::tabulate_direction_functor{}); + rmm::device_uvector> points(num_points, stream); + thrust::tabulate( + rmm::exec_policy(stream), points.begin(), points.end(), params.direction_functor()); thrust::exclusive_scan(rmm::exec_policy(stream), points.begin(), points.end(), points.begin(), - params.origin, - detail::random_walk_functor{params.segment_length}); + params.origin(), + params.random_walk_functor()); return make_multilinestring_array( std::move(geometry_offset), std::move(part_offset), std::move(points)); @@ -437,7 +449,7 @@ class multipoint_normal_distribution_generator_parameter { public: multipoint_normal_distribution_generator_parameter( std::size_t num_multipoints, - cuspatial::test::normal_random_variable num_points_per_multipoints, + normal_random_variable num_points_per_multipoints, vec_2d lower_left, vec_2d upper_right) : _num_multipoints(num_multipoints), @@ -451,11 +463,9 @@ class multipoint_normal_distribution_generator_parameter { auto multipoint_count_generator() { - auto lower = std::max(1, static_cast(_num_points_per_multipoints.neg_6stddev())); - auto upper = static_cast(_num_points_per_multipoints.plus_6stddev()); - auto engine = deterministic_engine(0); - auto normal = make_normal_dist(lower, upper); - return value_generator{lower, upper, engine, normal}; + auto lower = std::max(1, static_cast(_num_points_per_multipoints.neg_6stddev())); + auto upper = static_cast(_num_points_per_multipoints.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper); } auto points_generator() @@ -476,7 +486,8 @@ class multipoint_normal_distribution_generator_parameter { }; /** - * @brief Parameters to configure a multipoint generator to generate identical multipoint for each element + * @brief Parameters to configure a multipoint generator to generate identical multipoint for each + * element * * Idendity function is a special case of normal distribution where deviation is 0. * @@ -497,12 +508,6 @@ class multipoint_fixed_generator_parameter upper_right) { } - - std::size_t num_points() - { - return this->num_multipoints() * - static_cast(this->num_points_per_multipoints().mu); - } }; /** @@ -514,16 +519,17 @@ class multipoint_fixed_generator_parameter * @param stream The CUDA stream to use for device memory operations and kernel launches * @return a cuspatial::test::multipoint_array object */ -template -auto generate_multipoint_array(multipoint_normal_distribution_generator_parameter params, +template +auto generate_multipoint_array(multipoint_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { - auto offsets = detail::make_offsets(params, params.num_multipoints() + 1, stream); + auto offsets = + detail::make_offsets(params.multipoint_count_generator(), params.num_multipoints() + 1, stream); auto num_points = offsets.element(offsets.size() - 1, stream); - rmm::device_uvector> coordinates(num_points, stream); - auto point_gen = params.points_generator(); - thrust::tabulate(rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), point_gen); + rmm::device_uvector> coordinates(num_points, stream); + thrust::tabulate( + rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), params.points_generator()); return make_multipoint_array(std::move(offsets), std::move(coordinates)); } diff --git a/cpp/include/cuspatial_test/random.cuh b/cpp/include/cuspatial_test/random.cuh index 8d35b1c46..3e557bb81 100644 --- a/cpp/include/cuspatial_test/random.cuh +++ b/cpp/include/cuspatial_test/random.cuh @@ -46,12 +46,15 @@ template class normal_random_variable { public: RealT mu; - float stddev; + RealT stddev; RealT neg_6stddev() { return mu - 6 * stddev; } RealT plus_6stddev() { return mu + 6 * stddev; } }; +template +normal_random_variable(RealT, RealT) -> normal_random_variable; + /** * @brief Identifies a probability distribution type. */ @@ -160,6 +163,9 @@ struct value_generator { } } + bool is_random() { return dist.stddev() != 0; } + T mean() { return dist.mean(); } + T lower_bound; T upper_bound; thrust::minstd_rand engine; @@ -191,6 +197,25 @@ struct point_generator { */ auto deterministic_engine(unsigned seed) { return thrust::minstd_rand{seed}; } +/** + * @brief Make a value generator that samples a value from a clipped normal distribution + * + * @tparam T + * @param lower_bound + * @param upper_bound + * @param seed + * @return auto + */ +template +auto make_clipped_normal_distribution_value_generator(T lower_bound, + T upper_bound, + std::size_t seed = 0) +{ + auto engine = deterministic_engine(seed); + auto normal = make_normal_dist(lower_bound, upper_bound); + return value_generator{lower_bound, upper_bound, engine, normal}; +} + } // namespace test } // namespace cuspatial diff --git a/cpp/tests/utility_test/test_geometry_generators.cu b/cpp/tests/utility_test/test_geometry_generators.cu index a41bd1257..8bdfbcc1b 100644 --- a/cpp/tests/utility_test/test_geometry_generators.cu +++ b/cpp/tests/utility_test/test_geometry_generators.cu @@ -354,15 +354,16 @@ struct MultiPointFactoryStatsValidator : public BaseFixtureWithParam(GetParam()); - double num_points_per_multipoints = static_cast(std::get<1>(GetParam())); - float stddev = 20.0; - - auto params = - multipoint_normal_distribution_generator_parameter{num_multipoints, - {num_points_per_multipoints, stddev}, - vec_2d{0.0, 0.0}, - vec_2d{1.0, 1.0}}; + std::size_t num_multipoints = std::get<0>(GetParam()); + std::size_t num_points_per_multipoints = std::get<1>(GetParam()); + double stddev = 20.0; + + auto params = multipoint_normal_distribution_generator_parameter{ + num_multipoints, + cuspatial::test::normal_random_variable{static_cast(num_points_per_multipoints), + stddev}, + vec_2d{0.0, 0.0}, + vec_2d{1.0, 1.0}}; CUSPATIAL_RUN_TEST(this->run, params); }