diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index bd39d7949..6e9970ab8 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -43,9 +43,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; /** * @brief Returns whether or not `value` is a power of 2. * - * @param[in] value to check. + * @param[in] value value to check. * - * @return Whether the input a power of two with non-negative exponent + * @return True if the input is a power of two with non-negative integer exponent, false otherwise. */ [[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept { @@ -57,7 +57,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * * @param[in] alignment to check * - * @return Whether the alignment is valid + * @return True if the alignment is valid, false otherwise. */ [[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept { @@ -70,7 +70,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * - * @return Return the aligned value, as one would expect + * @return the aligned value */ [[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept { @@ -84,7 +84,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * - * @return Return the aligned value, as one would expect + * @return the aligned value */ [[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept { diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 7e7b42a18..eb31658e9 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -108,36 +108,35 @@ namespace rmm::detail { } /** - * @brief Allocates sufficient memory to satisfy the requested size `bytes` with + * @brief Allocates sufficient host-accessible memory to satisfy the requested size `bytes` with * alignment `alignment` using the unary callable `alloc` to allocate memory. * - * Given a pointer `p` to an allocation of size `n` returned from the unary - * callable `alloc`, the pointer `q` returned from `aligned_alloc` points to a - * location within the `n` bytes with sufficient space for `bytes` that - * satisfies `alignment`. + * Given a pointer `p` to an allocation of size `n` returned from the unary callable `alloc`, the + * pointer `q` returned from `aligned_alloc` points to a location within the `n` bytes with + * sufficient space for `bytes` that satisfies `alignment`. * - * In order to retrieve the original allocation pointer `p`, the offset - * between `p` and `q` is stored at `q - sizeof(std::ptrdiff_t)`. + * In order to retrieve the original allocation pointer `p`, the offset between `p` and `q` is + * stored at `q - sizeof(std::ptrdiff_t)`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` with - * a compatible unary `dealloc` callable capable of freeing the memory returned - * from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * If `alignment` is not a power of 2, behavior is undefined. + * If `Alloc` does not allocate host-accessible memory, behavior is undefined. * * @param bytes The desired size of the allocation * @param alignment Desired alignment of allocation * @param alloc Unary callable given a size `n` will allocate at least `n` bytes - * of host memory. - * @tparam Alloc a unary callable type that allocates memory. + * of host-accessible memory. + * @tparam Alloc a unary callable type that allocates host-accessible memory. * @return void* Pointer into allocation of at least `bytes` with desired * `alignment`. */ template -void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) +void* aligned_host_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { - assert(rmm::is_pow2(alignment)); + assert(rmm::is_supported_alignment(alignment)); // allocate memory for bytes, plus potential alignment correction, // plus store of the correction offset @@ -163,25 +162,27 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) } /** - * @brief Frees an allocation returned from `aligned_allocate`. + * @brief Frees an allocation of host-accessible returned from `aligned_host_allocate`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` - * with a compatible unary `dealloc` callable capable of freeing the memory - * returned from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * @param p The aligned pointer to deallocate - * @param bytes The number of bytes requested from `aligned_allocate` - * @param alignment The alignment required from `aligned_allocate` - * @param dealloc A unary callable capable of freeing memory returned from - * `alloc` in `aligned_allocate`. - * @tparam Dealloc A unary callable type that deallocates memory. + * @param bytes The number of bytes requested from `aligned_host_allocate` + * @param alignment The alignment required from `aligned_host_allocate` + * @param dealloc A unary callable capable of freeing host-accessible memory returned from `alloc` + * in `aligned_host_allocate`. + * @tparam Dealloc A unary callable type that deallocates host-accessible memory. */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +void aligned_host_deallocate(void* ptr, + [[maybe_unused]] std::size_t bytes, + [[maybe_unused]] std::size_t alignment, + Dealloc dealloc) noexcept { - (void)alignment; + assert(rmm::is_supported_alignment(alignment)); // Get offset from the location immediately prior to the aligned pointer // NOLINTNEXTLINE diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 4bb272df3..ccb294d21 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -65,7 +65,7 @@ class new_delete_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate( + return rmm::detail::aligned_host_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } @@ -86,7 +86,7 @@ class new_delete_resource final : public host_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index b5c273ef5..cb8524999 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -147,7 +147,7 @@ class pinned_memory_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; auto status = cudaMallocHost(&ptr, size); if (cudaSuccess != status) { throw std::bad_alloc{}; } @@ -173,7 +173,7 @@ class pinned_memory_resource final : public host_memory_resource { std::size_t alignment = alignof(std::max_align_t)) override { if (nullptr == ptr) { return; } - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } }; diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp new file mode 100644 index 000000000..c51af4182 --- /dev/null +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +namespace rmm::mr { + +/** + * @brief Memory resource class for allocating pinned host memory. + * + * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the + * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and + * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties. + */ +class pinned_host_memory_resource { + public: + // Disable clang-tidy complaining about the easily swappable size and alignment parameters + // of allocate and deallocate + // NOLINTBEGIN(bugprone-easily-swappable-parameters) + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * reason. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. + * + * @return Pointer to the newly allocated memory. + */ + static void* allocate(std::size_t bytes, + [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + // don't allocate anything if the user requested zero bytes + if (0 == bytes) { return nullptr; } + + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault)); + return ptr; + }); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. + */ + static void deallocate(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + { + rmm::detail::aligned_host_deallocate( + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes, alignment); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p + * alignment bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes, alignment); + } + // NOLINTEND(bugprone-easily-swappable-parameters) + + /** + * @briefreturn{true if the specified resource is the same type as this resource.} + */ + bool operator==(const pinned_host_memory_resource&) const { return true; } + + /** + * @briefreturn{true if the specified resource is not the same type as this resource, otherwise + * false.} + */ + bool operator!=(const pinned_host_memory_resource&) const { return false; } + + /** + * @brief Query whether the resource supports reporting free and available memory. + * + * @return false + */ + static bool supports_get_mem_info() { return false; } + + /** + * @brief Query the total amount of memory and free memory available for allocation by this + * resource. + * + * @throws nothing + * + * @return std::pair containing 0 for both total and free memory. + */ + [[nodiscard]] static std::pair get_mem_info(cuda::stream_ref) noexcept + { + return {0, 0}; + } + + /** + * @brief Enables the `cuda::mr::device_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides device accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept + { + } + + /** + * @brief Enables the `cuda::mr::host_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides host accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept + { + } +}; + +static_assert(cuda::mr::async_resource_with); +} // namespace rmm::mr diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 25ff76891..9826c10be 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "test_utils.hpp" #include #include @@ -35,8 +36,6 @@ #include -#include - #include #include @@ -50,17 +49,6 @@ using async_resource_ref = cuda::mr::async_resource_ref(index_distribution(generator) % active_allocations); active_allocations--; @@ -317,7 +305,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index ef4b4bc80..3808ec6f3 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "test_utils.hpp" #include #include @@ -32,11 +33,10 @@ #include #include #include +#include #include -#include - #include #include #include @@ -45,17 +45,6 @@ namespace rmm::test { -/** - * @brief Returns if a pointer points to a device memory or managed memory - * allocation. - */ -inline bool is_device_memory(void* ptr) -{ - cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); -} - enum size_in_bytes : size_t {}; constexpr auto default_num_allocations{100}; @@ -75,8 +64,8 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -87,8 +76,8 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } } @@ -155,7 +144,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -197,7 +186,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -246,6 +235,8 @@ struct mr_allocation_test : public mr_test {}; /// MR factory functions inline auto make_cuda() { return std::make_shared(); } +inline auto make_host_pinned() { return std::make_shared(); } + inline auto make_cuda_async() { if (rmm::detail::async_alloc::is_supported()) { @@ -262,6 +253,12 @@ inline auto make_pool() make_cuda(), rmm::percent_of_free_device_memory(50)); } +inline auto make_host_pinned_pool() +{ + return rmm::mr::make_owning_wrapper( + make_host_pinned(), 2_GiB, 8_GiB); +} + inline auto make_arena() { return rmm::mr::make_owning_wrapper(make_cuda()); diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index f6141e90f..bf513adda 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}, mr_factory{"Fixed_Size", &make_fixed_size}), @@ -45,6 +46,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); diff --git a/tests/mr/device/test_utils.hpp b/tests/mr/device/test_utils.hpp new file mode 100644 index 000000000..932a72a7e --- /dev/null +++ b/tests/mr/device/test_utils.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace rmm::test { + +/** + * @brief Returns if a pointer points to a device memory or managed memory + * allocation. + */ +inline bool is_device_accessible_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or + ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); +} + +inline bool is_host_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +inline bool is_properly_aligned(void* ptr) +{ + if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } + return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); +} + +} // namespace rmm::test