diff --git a/test_plans/root_group.asciidoc b/test_plans/root_group.asciidoc new file mode 100644 index 000000000..c1a8c96a9 --- /dev/null +++ b/test_plans/root_group.asciidoc @@ -0,0 +1,115 @@ +:sectnums: +:xrefstyle: short + += Test plan for sycl_ext_oneapi_root_group + +This is a test plan for the APIs described in +https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc[sycl_ext_oneapi_root_group]. + + +== Testing scope + +=== Device coverage + +All of the tests described below are performed only on the default device that +is selected on the CTS command line. + +=== Feature test macro + +All of the tests should use `#ifdef SYCL_EXT_ONEAPI_ROOT_GROUP` so they can be skipped +if feature is not supported. + +== Tests + +* All following tests run with `Dimensions` = 1, 2, 3 +* Get root group object by +[source,c++] +---- +auto bundle = sycl::get_kernel_bundle(q.get_context()); +auto kernel = bundle.get_kernel(); +auto maxWGs = kernel.ext_oneapi_get_info(q); +REQUIRE(maxWGs >= 1); +auto nd_range = sycl::nd_range{global_range, local_range}; +auto props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; +q.parallel_for(nd_range, props, [=](sycl::nd_item it) { + auto root = it.ext_oneapi_get_root_group(); +}); +---- +where `global_range.size()` = `local_range.size() * maxWGs`. + +* Check that `root` is of type `ext::oneapi::experimental::root_group` + +=== The `root_group` class API + +==== Members + +Check the following: + +* `id_type` is same as `id` +* `range_type` is same as `range` +* `linear_id_type` is same as `size_t` +* `dimensions` is `int` and is equal to Dimensions +* `fence_scope` is `memory_scope` and is equal to `memory_scope::device` + +==== get_group_id + +Check that `get_group_id()` return type is `id` and return value is equal to `id()`. + +==== get_local_id + +Check that `get_local_id()` return type is `id` and return value is equal to `nd_item::get_global_id()`. + +==== get_group_range + +Check that `get_group_range()` return type is `range` and return value is equal to `1`. + +==== get_local_range + +Check that `get_local_range()` return type is `range` and return value is equal to `nd_item::get_global_range()`. + +==== get_max_local_range + +Check that `get_max_local_range()` return type is `range` and return value is equal to `get_local_range()`. + +==== get_group_linear_id + +Check that `get_group_linear_id()` return type is `size_t` and return value equals 0. + +==== get_local_linear_id + +Check that `get_local_linear_id()` return type is `size_t` and the return value is equal to `nd_item::get_global_linear_id()`. + +==== get_group_linear_range + +Check that `get_group_linear_range()` return type is `size_t` and return value is equal to `get_group_range.size()`. + +==== get_local_linear_range + +Check that `get_local_linear_range()` return type is `size_t` and return value is equal to `get_local_range.size()`. + +==== leader + +Check that `leader()` return type is `bool` and return value is equal to `get_local_id() == 0`. + +=== get_child_group + +* Get object `g` via `get_child_group(root)` +* Check that `g` is `group` +* Check that `g` equals `nd_item::get_group()` + +* Get object `sb` via `get_child_group(g)` +* Check that `g` is `sub_group` +* Check that `g` equals `nd_item::get_sub_group()` + +=== group_barrier for root_group + +For folowing as `fence_scope` parameter: + +* `sycl::memory_scope::device` +* `sycl::memory_scope::system` +* no argument, expecting to use default `root_group::fence_scope` = `sycl::memory_scope::device` + +if `fence_scope` is supported by the hardware (from query of `info::device::atomic_fence_scope_capabilities`) +check that invocation of `group_barrier(root, fence_scope)` behaves as expected: +when each of workitems writes 1 into zero-initialized global memory array +after barrier all array values read by workitems in reverse order are equal to 1. diff --git a/tests/accessor_legacy/accessor_api_common_buffer_local.h b/tests/accessor_legacy/accessor_api_common_buffer_local.h index 2d8a728ac..4ea6f51a4 100644 --- a/tests/accessor_legacy/accessor_api_common_buffer_local.h +++ b/tests/accessor_legacy/accessor_api_common_buffer_local.h @@ -349,9 +349,7 @@ class buffer_accessor_get_pointer { void operator()() const { check_get_pointer(acc_mode_tag::get()); } - void operator()(sycl_id_t) const { - operator()(); - } + void operator()(common_id) const { operator()(); } private: void check_get_pointer(acc_mode_tag::generic) const { @@ -534,7 +532,7 @@ class buffer_accessor_api_rw { m_range(rng), size(size_) {} - void operator()(sycl_id_t idx) const { + void operator()(common_id idx) const { // We do not need work-item synchronization for atomic mode because of: // - load-store consistency within single work-item // - access to the different elements from different work-items diff --git a/tests/accessor_legacy/accessor_api_utility.h b/tests/accessor_legacy/accessor_api_utility.h index c42581a9a..59dfc13df 100644 --- a/tests/accessor_legacy/accessor_api_utility.h +++ b/tests/accessor_legacy/accessor_api_utility.h @@ -763,6 +763,23 @@ inline std::unique_ptr get_error_data(size_t count) { return get_buffer_input_data(count, dims, useIndexes); } +/** + * @brief Common class for nd_item and item arguments to allow for a common + * kernel definition for different parallel_for. + */ +template +struct common_id { + common_id(sycl_id_t I) : Id{I} {} + common_id(sycl::item::value> I) : Id{I} {} + common_id(sycl::nd_item::value> I) + : Id{I.get_global_id()} {} + + operator sycl_id_t() const { return Id; } + + private: + sycl_id_t Id; +}; + } // namespace #endif // SYCL_1_2_1_TESTS_ACCESSOR_ACCESSOR_API_UTILITY_H diff --git a/tests/group_functions/group_scan.h b/tests/group_functions/group_scan.h index 66bdf9a3a..7fe8766be 100644 --- a/tests/group_functions/group_scan.h +++ b/tests/group_functions/group_scan.h @@ -206,7 +206,7 @@ struct joint_scan_group { sycl::nd_range executionRange(work_group_range, work_group_range); - const size_t sizes[3] = {5, work_group_size / 2, 3 * work_group_size}; + const size_t sizes[2] = {5, 2}; for (size_t size : sizes) { check_scan(queue, size, executionRange, OperatorT(), op_name, false); @@ -261,7 +261,7 @@ struct init_joint_scan_group { size_t work_group_size = work_group_range.size(); - const size_t sizes[3] = {5, work_group_size / 2, 3 * work_group_size}; + const size_t sizes[2] = {5, 2}; for (size_t size : sizes) { check_scan(queue, size, executionRange, OperatorT(), op_name, true); @@ -392,6 +392,9 @@ template void check_scan_over_group(sycl::queue& queue, sycl::range range, OpT op, const std::string& op_name, bool with_init) { auto range_size = range.size(); + REQUIRE(((range_size * (range_size + 1) / 2) + T(init)) <= + std::numeric_limits::max()); + ScanOverGroupDataStruct host_data{range_size}; { auto ref_input_sycl = host_data.create_ref_input_buffer(); diff --git a/tests/kernel/kernel_info.cpp b/tests/kernel/kernel_info.cpp index d336d9fc9..2861c25e3 100644 --- a/tests/kernel/kernel_info.cpp +++ b/tests/kernel/kernel_info.cpp @@ -203,6 +203,10 @@ TEST_CASE("Test kernel info", "[kernel]") { "device_specific::max_sub_group_size>(dev)"); { auto sub_group_sizes = dev.get_info(); + { + INFO("The device must report at least one sub-group size"); + REQUIRE(!sub_group_sizes.empty()); + } uint32_t max = *std::max_element(sub_group_sizes.begin(), sub_group_sizes.end()); INFO( diff --git a/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp b/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp index e8ea75c71..ac4d271ff 100644 --- a/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp +++ b/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp @@ -165,14 +165,15 @@ DISABLED_FOR_TEST_CASE(hipSYCL) if (max_wg_size >= testing_wg_size[0]) { { - const auto separate_lambda_item_arg = [](sycl::item<1>) - [[sycl::reqd_work_group_size(testing_wg_size[0])]]{}; + const auto separate_lambda_nd_item_arg = + [](sycl::nd_item<1>) + [[sycl::reqd_work_group_size(testing_wg_size[0])]] {}; const auto separate_lambda_group_arg = [](sycl::group<1>) [[sycl::reqd_work_group_size(testing_wg_size[0])]]{}; run_separate_lambda_nd_range>( - is_exception_expected, errc_expected, queue, separate_lambda_item_arg, - separate_lambda_group_arg); + is_exception_expected, errc_expected, queue, + separate_lambda_nd_item_arg, separate_lambda_group_arg); } { @@ -191,14 +192,15 @@ DISABLED_FOR_TEST_CASE(hipSYCL) if (max_wg_size >= testing_wg_size[1]) { { - const auto separate_lambda_item_arg = [](sycl::item<1>) - [[sycl::reqd_work_group_size(testing_wg_size[1])]]{}; + const auto separate_lambda_nd_item_arg = + [](sycl::nd_item<1>) + [[sycl::reqd_work_group_size(testing_wg_size[1])]] {}; const auto separate_lambda_group_arg = [](sycl::group<1>) [[sycl::reqd_work_group_size(testing_wg_size[1])]]{}; run_separate_lambda_nd_range>( - is_exception_expected, errc_expected, queue, separate_lambda_item_arg, - separate_lambda_group_arg); + is_exception_expected, errc_expected, queue, + separate_lambda_nd_item_arg, separate_lambda_group_arg); } { @@ -223,14 +225,15 @@ DISABLED_FOR_TEST_CASE(hipSYCL) std::find(sg_sizes_vec.begin(), sg_sizes_vec.end(), testing_sg_size[0]); if (find_res != sg_sizes_vec.end()) { { - const auto separate_lambda_item_arg = - [](sycl::item<1>) [[sycl::reqd_sub_group_size(testing_sg_size[0])]]{}; + const auto separate_lambda_nd_item_arg = + [](sycl::nd_item<1>) + [[sycl::reqd_sub_group_size(testing_sg_size[0])]] {}; const auto separate_lambda_group_arg = [](sycl::group<1>) [[sycl::reqd_sub_group_size(testing_sg_size[0])]]{}; run_separate_lambda_nd_range>( - is_exception_expected, errc_expected, queue, separate_lambda_item_arg, - separate_lambda_group_arg); + is_exception_expected, errc_expected, queue, + separate_lambda_nd_item_arg, separate_lambda_group_arg); } { @@ -251,14 +254,15 @@ DISABLED_FOR_TEST_CASE(hipSYCL) std::find(sg_sizes_vec.begin(), sg_sizes_vec.end(), testing_sg_size[1]); if (find_res != sg_sizes_vec.end()) { { - const auto separate_lambda_item_arg = - [](sycl::item<1>) [[sycl::reqd_sub_group_size(testing_sg_size[1])]]{}; + const auto separate_lambda_nd_item_arg = + [](sycl::nd_item<1>) + [[sycl::reqd_sub_group_size(testing_sg_size[1])]] {}; const auto separate_lambda_group_arg = [](sycl::group<1>) [[sycl::reqd_sub_group_size(testing_sg_size[1])]]{}; run_separate_lambda_nd_range>( - is_exception_expected, errc_expected, queue, separate_lambda_item_arg, - separate_lambda_group_arg); + is_exception_expected, errc_expected, queue, + separate_lambda_nd_item_arg, separate_lambda_group_arg); } {