diff --git a/cpp/benchmarks/distance/pairwise_linestring_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_distance.cu index c17c59b0d..b3f4a63be 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_distance.cu @@ -38,13 +38,14 @@ void pairwise_linestring_distance_benchmark(nvbench::state& state, nvbench::type auto const num_segments_per_linestring{ static_cast(state.get_int64("NumSegmentsPerLineString"))}; - auto params1 = test::multilinestring_generator_parameter{ + auto params1 = test::multilinestring_fixed_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; - auto params2 = test::multilinestring_generator_parameter{num_pairs, - num_linestrings_per_multilinestring, - num_segments_per_linestring, - 1.0, - {100000., 100000.}}; + auto params2 = + test::multilinestring_fixed_generator_parameter{num_pairs, + num_linestrings_per_multilinestring, + num_segments_per_linestring, + 1.0, + {100000., 100000.}}; auto ls1 = generate_multilinestring_array(params1, stream); auto ls2 = generate_multilinestring_array(params2, stream); @@ -55,15 +56,15 @@ void pairwise_linestring_distance_benchmark(nvbench::state& state, nvbench::type auto output = rmm::device_uvector(num_pairs, stream); auto out_it = output.begin(); - auto const total_points = params1.num_points() + params2.num_points(); + auto const total_points = ls1range.num_points() + ls2range.num_points(); state.add_element_count(num_pairs, "NumPairs"); state.add_element_count(total_points, "NumPoints"); state.add_global_memory_reads(total_points * 2, "CoordinatesDataSize"); - state.add_global_memory_reads(params1.num_multilinestrings + - params2.num_multilinestrings + - params1.num_linestrings() + params2.num_linestrings(), + state.add_global_memory_reads(ls1range.num_multilinestrings() + + ls2range.num_multilinestrings() + + ls1range.num_linestrings() + ls2range.num_linestrings(), "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); diff --git a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu index 0d4fdf574..0c7b92bcb 100644 --- a/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_linestring_polygon_distance.cu @@ -43,7 +43,7 @@ void pairwise_linestring_polygon_distance_benchmark(nvbench::state& state, nvben auto const num_ring_per_polygon{static_cast(state.get_int64("NumRingsPerPolygon"))}; auto const num_points_per_ring{static_cast(state.get_int64("NumPointsPerRing"))}; - auto params1 = test::multilinestring_generator_parameter{ + auto params1 = test::multilinestring_fixed_generator_parameter{ num_pairs, num_linestrings_per_multilinestring, num_segments_per_linestring, 1.0, {0., 0.}}; auto params2 = test::multipolygon_generator_parameter{num_pairs, num_polygon_per_multipolygon, @@ -66,10 +66,10 @@ void pairwise_linestring_polygon_distance_benchmark(nvbench::state& state, nvben state.add_element_count(num_pairs, "NumPairs"); state.add_element_count(total_points, "NumPoints"); state.add_global_memory_reads(total_points * 2, "CoordinatesDataSize"); - state.add_global_memory_reads(params1.num_multilinestrings + params1.num_linestrings() + - params2.num_multipolygons + params2.num_polygons() + - params2.num_rings() + 5, - "OffsetsDataSize"); + state.add_global_memory_reads( + lines_range.num_multilinestrings() + lines_range.num_linestrings() + + poly_range.num_multipolygons() + poly_range.num_polygons() + poly_range.num_rings() + 5, + "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); state.exec(nvbench::exec_tag::sync, diff --git a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu index c3f84da70..baec497e9 100644 --- a/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu +++ b/cpp/benchmarks/distance/pairwise_point_polygon_distance.cu @@ -49,7 +49,7 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t auto mpoly_generator_param = multipolygon_generator_parameter{ num_pairs, num_polygons_per_multipolygon, num_holes_per_polygon, num_edges_per_ring}; - auto mpoint_generator_param = multipoint_generator_parameter{ + auto mpoint_generator_param = multipoint_fixed_generator_parameter{ num_pairs, num_points_per_multipoint, vec_2d{-1, -1}, vec_2d{0, 0}}; auto multipolygons = generate_multipolygon_array(mpoly_generator_param, stream); @@ -69,15 +69,15 @@ void pairwise_point_polygon_distance_benchmark(nvbench::state& state, nvbench::t mpoly_generator_param.num_rings() * mpoly_generator_param.num_polygons()), "Multipolygon Complexity"); - state.add_element_count(mpoint_generator_param.num_points(), "NumPoints (in multipoints)"); - - state.add_global_memory_reads( - mpoly_generator_param.num_coords() + mpoint_generator_param.num_points(), - "CoordinatesReadSize"); - state.add_global_memory_reads( - (mpoly_generator_param.num_rings() + 1) + (mpoly_generator_param.num_polygons() + 1) + - (mpoly_generator_param.num_multipolygons + 1) + (mpoint_generator_param.num_multipoints + 1), - "OffsetsDataSize"); + state.add_element_count(mpoint_view.num_points(), "NumPoints (in multipoints)"); + + state.add_global_memory_reads(mpoly_generator_param.num_coords() + mpoint_view.num_points(), + "CoordinatesReadSize"); + state.add_global_memory_reads((mpoly_generator_param.num_rings() + 1) + + (mpoly_generator_param.num_polygons() + 1) + + (mpoly_generator_param.num_multipolygons + 1) + + (mpoint_generator_param.num_multipoints() + 1), + "OffsetsDataSize"); state.add_global_memory_writes(num_pairs); diff --git a/cpp/include/cuspatial/iterator_factory.cuh b/cpp/include/cuspatial/iterator_factory.cuh index 1f026512c..8fe088461 100644 --- a/cpp/include/cuspatial/iterator_factory.cuh +++ b/cpp/include/cuspatial/iterator_factory.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -424,6 +425,13 @@ auto make_geometry_id_iterator(GeometryIter geometry_offsets_begin, std::distance(geometry_offsets_begin, geometry_offsets_end))); } +template +auto make_element_count_iterator_from_offset(OffsetIter offset_begin) +{ + auto zipped = thrust::make_zip_iterator(offset_begin, thrust::next(offset_begin)); + return thrust::make_transform_iterator(zipped, detail::offset_pair_to_count_functor{}); +} + /** * @} // end of doxygen group */ diff --git a/cpp/include/cuspatial_test/geometry_generator.cuh b/cpp/include/cuspatial_test/geometry_generator.cuh index 73b1d270e..697eb66d3 100644 --- a/cpp/include/cuspatial_test/geometry_generator.cuh +++ b/cpp/include/cuspatial_test/geometry_generator.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -28,31 +29,41 @@ #include #include +#include +#include #include #include +#include + namespace cuspatial { namespace test { namespace detail { - -template -struct tabulate_direction_functor { - vec_2d __device__ operator()(index_t i) - { - return vec_2d{cos(static_cast(i)), sin(static_cast(i))}; +template +rmm::device_uvector make_offsets(Generator gen, + std::size_t size, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector offsets(size, stream); + + if (gen.is_random()) { + zero_data_async(offsets.begin(), offsets.end(), stream); + thrust::tabulate(rmm::exec_policy(stream), thrust::next(offsets.begin()), offsets.end(), gen); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::next(offsets.begin()), + offsets.end(), + thrust::next(offsets.begin())); + } else { + thrust::sequence(rmm::exec_policy(stream), + offsets.begin(), + offsets.end(), + std::size_t{0}, + static_cast(gen.mean())); } -}; - -template -struct random_walk_functor { - T segment_length; - vec_2d __device__ operator()(vec_2d prev, vec_2d rad) - { - return prev + segment_length * rad; - } -}; + return offsets; +} } // namespace detail @@ -279,23 +290,97 @@ auto generate_multipolygon_array(multipolygon_generator_parameter params, * * @tparam T Underlying type of the coordinates */ -template -struct multilinestring_generator_parameter { - std::size_t num_multilinestrings; - std::size_t num_linestrings_per_multilinestring; - std::size_t num_segments_per_linestring; - T segment_length; - vec_2d origin; - - std::size_t num_linestrings() +template +class multilinestring_normal_distribution_generator_parameter { + private: + static int constexpr NUM_LINESTRING_GEN_SEED = 0; + static int constexpr NUM_SEGMENT_GEN_SEED = 1; + + std::size_t _num_multilinestrings; + cuspatial::test::normal_random_variable _num_linestrings_per_multilinestring; + cuspatial::test::normal_random_variable _num_segments_per_linestring; + CoordType _segment_length; + vec_2d _origin; + + public: + template + struct _direction_functor { + vec_2d __device__ operator()(index_t i) + { + return vec_2d{cos(static_cast(i)), sin(static_cast(i))}; + } + }; + + struct _random_walk_functor { + CoordType segment_length; + + vec_2d __device__ operator()(vec_2d prev, vec_2d rad) + { + return prev + segment_length * rad; + } + }; + + multilinestring_normal_distribution_generator_parameter( + std::size_t num_multilinestrings, + cuspatial::test::normal_random_variable num_linestrings_per_multilinestring, + cuspatial::test::normal_random_variable num_segments_per_linestring, + CoordType segment_length, + vec_2d origin) + : _num_multilinestrings(num_multilinestrings), + _num_linestrings_per_multilinestring(num_linestrings_per_multilinestring), + _num_segments_per_linestring(num_segments_per_linestring), + _segment_length(segment_length), + _origin(origin) + { + } + + std::size_t num_multilinestrings() { return _num_multilinestrings; } + auto num_linestrings_per_multilinestring() { return _num_linestrings_per_multilinestring; } + auto num_segments_per_linestring() { return _num_segments_per_linestring; } + CoordType segment_length() { return _segment_length; } + vec_2d origin() { return _origin; } + + auto num_linestrings_generator() { - return num_multilinestrings * num_linestrings_per_multilinestring; + auto lower = std::max( + std::size_t{1}, static_cast(_num_linestrings_per_multilinestring.neg_6stddev())); + auto upper = static_cast(_num_linestrings_per_multilinestring.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper, NUM_LINESTRING_GEN_SEED); } - std::size_t num_points_per_linestring() { return num_segments_per_linestring + 1; } + auto num_points_generator() + { + auto lower = std::max(std::size_t{1}, + static_cast(_num_segments_per_linestring.neg_6stddev())); + auto upper = static_cast(_num_segments_per_linestring.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper, NUM_SEGMENT_GEN_SEED); + } - std::size_t num_segments() { return num_linestrings() * num_segments_per_linestring; } - std::size_t num_points() { return num_linestrings() * num_points_per_linestring(); } + auto direction_functor() { return _direction_functor{}; } + auto random_walk_functor() { return _random_walk_functor{}; } +}; + +/** + * @brief + * + * @tparam + */ +template +struct multilinestring_fixed_generator_parameter + : public multilinestring_normal_distribution_generator_parameter { + multilinestring_fixed_generator_parameter(std::size_t num_multilinestrings, + std::size_t num_linestrings_per_multilinestring, + std::size_t num_segments_per_linestring, + CoordType segment_length, + vec_2d origin) + : multilinestring_normal_distribution_generator_parameter( + num_multilinestrings, + {static_cast(num_linestrings_per_multilinestring), 0.0}, + {static_cast(num_segments_per_linestring), 0.0}, + segment_length, + origin) + { + } }; /** @@ -322,92 +407,129 @@ struct multilinestring_generator_parameter { * @return The generated multilinestring array */ template -auto generate_multilinestring_array(multilinestring_generator_parameter params, - rmm::cuda_stream_view stream) +auto generate_multilinestring_array( + multilinestring_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { - rmm::device_uvector geometry_offset(params.num_multilinestrings + 1, stream); - rmm::device_uvector part_offset(params.num_linestrings() + 1, stream); - rmm::device_uvector> points(params.num_points(), stream); - - thrust::sequence(rmm::exec_policy(stream), - geometry_offset.begin(), - geometry_offset.end(), - static_cast(0), - params.num_linestrings_per_multilinestring); - - thrust::sequence(rmm::exec_policy(stream), - part_offset.begin(), - part_offset.end(), - static_cast(0), - params.num_segments_per_linestring + 1); + auto geometry_offset = detail::make_offsets( + params.num_linestrings_generator(), params.num_multilinestrings() + 1, stream); + auto num_linestrings = geometry_offset.element(geometry_offset.size() - 1, stream); + auto part_offset = detail::make_offsets(params.num_points_generator(), num_linestrings, stream); + auto num_points = part_offset.element(part_offset.size() - 1, stream); - thrust::tabulate(rmm::exec_policy(stream), - points.begin(), - points.end(), - detail::tabulate_direction_functor{}); + rmm::device_uvector> points(num_points, stream); + thrust::tabulate( + rmm::exec_policy(stream), points.begin(), points.end(), params.direction_functor()); thrust::exclusive_scan(rmm::exec_policy(stream), points.begin(), points.end(), points.begin(), - params.origin, - detail::random_walk_functor{params.segment_length}); + params.origin(), + params.random_walk_functor()); return make_multilinestring_array( std::move(geometry_offset), std::move(part_offset), std::move(points)); } /** - * @brief Struct to store the parameters of the multipoint aray + * @brief Creates a parameter set that configures the multipoint generator * - * @tparam T Type of the coordinates + * The number of point in each multipoint is sampled from a normal distribution. + * + * @tparam CoordType The type of coordinate */ -template -struct multipoint_generator_parameter { - using element_t = T; +template +class multipoint_normal_distribution_generator_parameter { + protected: + std::size_t _num_multipoints; + cuspatial::test::normal_random_variable _num_points_per_multipoints; + vec_2d _lower_left; + vec_2d _upper_right; + + public: + multipoint_normal_distribution_generator_parameter( + std::size_t num_multipoints, + normal_random_variable num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right) + : _num_multipoints(num_multipoints), + _num_points_per_multipoints(num_points_per_multipoints), + _lower_left(lower_left), + _upper_right(upper_right) + { + } + + bool count_has_variance() { return _num_points_per_multipoints.stddev != 0.0; } + + auto multipoint_count_generator() + { + auto lower = std::max(1, static_cast(_num_points_per_multipoints.neg_6stddev())); + auto upper = static_cast(_num_points_per_multipoints.plus_6stddev()); + return make_clipped_normal_distribution_value_generator(lower, upper); + } + + auto points_generator() + { + auto engine_x = deterministic_engine(0); + auto engine_y = deterministic_engine(1); + + auto x_dist = make_uniform_dist(_lower_left.x, _upper_right.x); + auto y_dist = make_uniform_dist(_lower_left.y, _upper_right.y); - std::size_t num_multipoints; - std::size_t num_points_per_multipoints; - vec_2d lower_left; - vec_2d upper_right; + return point_generator(_lower_left, _upper_right, engine_x, engine_y, x_dist, y_dist); + } + + std::size_t num_multipoints() { return _num_multipoints; } + auto num_points_per_multipoints() { return _num_points_per_multipoints; } + vec_2d lower_left() { return _lower_left; } + vec_2d upper_right() { return _upper_right; } +}; - CUSPATIAL_HOST_DEVICE std::size_t num_points() +/** + * @brief Parameters to configure a multipoint generator to generate identical multipoint for each + * element + * + * Idendity function is a special case of normal distribution where deviation is 0. + * + * @tparam CoordType The type of underlying coordinates + */ +template +class multipoint_fixed_generator_parameter + : public multipoint_normal_distribution_generator_parameter { + public: + multipoint_fixed_generator_parameter(std::size_t num_multipoints, + std::size_t num_points_per_multipoints, + vec_2d lower_left, + vec_2d upper_right) + : multipoint_normal_distribution_generator_parameter( + num_multipoints, + {static_cast(num_points_per_multipoints), 0.0}, + lower_left, + upper_right) { - return num_multipoints * num_points_per_multipoints; } }; /** - * @brief Helper to generate random multipoints within a range + * @brief Generate a multipoint array, the number of point in each multipoint follows a normal + * distribution * * @tparam T The floating point type for the coordinates * @param params Parameters to specify for the multipoints * @param stream The CUDA stream to use for device memory operations and kernel launches * @return a cuspatial::test::multipoint_array object */ -template -auto generate_multipoint_array(multipoint_generator_parameter params, +template +auto generate_multipoint_array(multipoint_normal_distribution_generator_parameter params, rmm::cuda_stream_view stream) { - rmm::device_uvector> coordinates(params.num_points(), stream); - rmm::device_uvector offsets(params.num_multipoints + 1, stream); - - thrust::sequence(rmm::exec_policy(stream), - offsets.begin(), - offsets.end(), - std::size_t{0}, - params.num_points_per_multipoints); - - auto engine_x = deterministic_engine(params.num_points()); - auto engine_y = deterministic_engine(2 * params.num_points()); - - auto x_dist = make_uniform_dist(params.lower_left.x, params.upper_right.x); - auto y_dist = make_uniform_dist(params.lower_left.y, params.upper_right.y); - - auto point_gen = - point_generator(params.lower_left, params.upper_right, engine_x, engine_y, x_dist, y_dist); + auto offsets = + detail::make_offsets(params.multipoint_count_generator(), params.num_multipoints() + 1, stream); + auto num_points = offsets.element(offsets.size() - 1, stream); - thrust::tabulate(rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), point_gen); + rmm::device_uvector> coordinates(num_points, stream); + thrust::tabulate( + rmm::exec_policy(stream), coordinates.begin(), coordinates.end(), params.points_generator()); return make_multipoint_array(std::move(offsets), std::move(coordinates)); } diff --git a/cpp/include/cuspatial_test/random.cuh b/cpp/include/cuspatial_test/random.cuh index d2a65af3b..3e557bb81 100644 --- a/cpp/include/cuspatial_test/random.cuh +++ b/cpp/include/cuspatial_test/random.cuh @@ -37,6 +37,24 @@ namespace cuspatial { namespace test { +/** + * @brief Class to represent a random variable that follows normal distribution. + * + * @tparam RealT Type of the random variable + */ +template +class normal_random_variable { + public: + RealT mu; + RealT stddev; + + RealT neg_6stddev() { return mu - 6 * stddev; } + RealT plus_6stddev() { return mu + 6 * stddev; } +}; + +template +normal_random_variable(RealT, RealT) -> normal_random_variable; + /** * @brief Identifies a probability distribution type. */ @@ -62,7 +80,7 @@ using integral_to_realType = std::conditional_t>; /** - * @brief Generates a normal distribution between zero and upper_bound. + * @brief Generates a normal distribution between lower_bound and upper_bound. */ template auto make_normal_dist(T lower_bound, T upper_bound) @@ -145,6 +163,9 @@ struct value_generator { } } + bool is_random() { return dist.stddev() != 0; } + T mean() { return dist.mean(); } + T lower_bound; T upper_bound; thrust::minstd_rand engine; @@ -176,6 +197,25 @@ struct point_generator { */ auto deterministic_engine(unsigned seed) { return thrust::minstd_rand{seed}; } +/** + * @brief Make a value generator that samples a value from a clipped normal distribution + * + * @tparam T + * @param lower_bound + * @param upper_bound + * @param seed + * @return auto + */ +template +auto make_clipped_normal_distribution_value_generator(T lower_bound, + T upper_bound, + std::size_t seed = 0) +{ + auto engine = deterministic_engine(seed); + auto normal = make_normal_dist(lower_bound, upper_bound); + return value_generator{lower_bound, upper_bound, engine, normal}; +} + } // namespace test } // namespace cuspatial diff --git a/cpp/tests/utility_test/test_geometry_generators.cu b/cpp/tests/utility_test/test_geometry_generators.cu index d0b35e9e4..8bdfbcc1b 100644 --- a/cpp/tests/utility_test/test_geometry_generators.cu +++ b/cpp/tests/utility_test/test_geometry_generators.cu @@ -13,15 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "thrust/detail/advance.inl" #include #include #include #include +#include #include #include +#include + using namespace cuspatial; using namespace cuspatial::test; @@ -320,3 +324,52 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(0, 100), // num_holes_per_polygon ::testing::Values(3, 100) // num_sides_per_ring )); + +struct MultiPointFactoryStatsValidator : public BaseFixtureWithParam { + void run(multipoint_normal_distribution_generator_parameter params) + { + auto got = generate_multipoint_array(params, stream()); + + auto [got_geometry_offsets, got_coordinates] = got.release(); + + auto num_geometry_counts_it = + make_element_count_iterator_from_offset(got_geometry_offsets.begin()); + + auto h = cuspatial::test::to_host(got_geometry_offsets); + + std::ofstream ofs("/home/coder/output.txt", std::ios::out); + for (std::size_t i = 0; i < h.size() - 1; ++i) { + ofs << h[i + 1] - h[i] << ", "; + } + ofs.close(); + + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(stream()), + num_geometry_counts_it, + thrust::next(num_geometry_counts_it, params.num_multipoints()), + [] __device__(auto count) { return count >= 1; })); + EXPECT_EQ(got_geometry_offsets.size(), params.num_multipoints() + 1); + } +}; + +TEST_P(MultiPointFactoryStatsValidator, CountsVerification) +{ + // Structured binding unsupported by Gtest + std::size_t num_multipoints = std::get<0>(GetParam()); + std::size_t num_points_per_multipoints = std::get<1>(GetParam()); + double stddev = 20.0; + + auto params = multipoint_normal_distribution_generator_parameter{ + num_multipoints, + cuspatial::test::normal_random_variable{static_cast(num_points_per_multipoints), + stddev}, + vec_2d{0.0, 0.0}, + vec_2d{1.0, 1.0}}; + CUSPATIAL_RUN_TEST(this->run, params); +} + +INSTANTIATE_TEST_SUITE_P( + MultiPointFactoryStatsValidators, + MultiPointFactoryStatsValidator, + ::testing::Combine(::testing::Values(1, 1000), // num_multipoints + ::testing::Values(1, 30) // num_points_per_multipoints + ));