Add a host-pinned memory resource that can be used as upstream for `p…

…ool_memory_resource`. (#1392) Depends on #1417 Adds a new `host_pinned_memory_resource` that implements the new `cuda::mr::memory_resource` and `cuda::mr::async_memory_resource` concepts which makes it usable as an upstream MR for `rmm::mr::device_memory_resource`. Also tests a pool made with this new MR as the upstream. Note that the tests explicitly set the initial and maximum pool sizes as using the defaults does not currently work. See #1388 . Closes #618 Authors: - Mark Harris (https://github.com/harrism) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Alessandro Bellina (https://github.com/abellina) - Lawrence Mitchell (https://github.com/wence-) - Jake Hemstad (https://github.com/jrhemstad) - Bradley Dice (https://github.com/bdice) URL: #1392
rapidsai · Jan 18, 2024 · 12f8de3 · 12f8de3
1 parent bb8fdf1
commit 12f8de3
Show file tree

Hide file tree

Showing 9 changed files with 336 additions and 76 deletions.
diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp
@@ -43,9 +43,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
 /**
  * @brief Returns whether or not `value` is a power of 2.
  *
- * @param[in] value to check.
+ * @param[in] value value to check.
  *
- * @return Whether the input a power of two with non-negative exponent
+ * @return True if the input is a power of two with non-negative integer exponent, false otherwise.
  */
 [[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept
 {
@@ -57,7 +57,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  *
  * @param[in] alignment to check
  *
- * @return Whether the alignment is valid
+ * @return True if the alignment is valid, false otherwise.
  */
 [[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept
 {
@@ -70,7 +70,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  * @param[in] value value to align
  * @param[in] alignment amount, in bytes, must be a power of 2
  *
- * @return Return the aligned value, as one would expect
+ * @return the aligned value
  */
 [[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
 {
@@ -84,7 +84,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  * @param[in] value value to align
  * @param[in] alignment amount, in bytes, must be a power of 2
  *
- * @return Return the aligned value, as one would expect
+ * @return the aligned value
  */
 [[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
 {

diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
@@ -108,36 +108,35 @@ namespace rmm::detail {
 }
 
 /**
- * @brief Allocates sufficient memory to satisfy the requested size `bytes` with
+ * @brief Allocates sufficient host-accessible memory to satisfy the requested size `bytes` with
  * alignment `alignment` using the unary callable `alloc` to allocate memory.
  *
- * Given a pointer `p` to an allocation of size `n` returned from the unary
- * callable `alloc`, the pointer `q` returned from `aligned_alloc` points to a
- * location within the `n` bytes with sufficient space for `bytes` that
- * satisfies `alignment`.
+ * Given a pointer `p` to an allocation of size `n` returned from the unary callable `alloc`, the
+ * pointer `q` returned from `aligned_alloc` points to a location within the `n` bytes with
+ * sufficient space for `bytes` that satisfies `alignment`.
  *
- * In order to retrieve the original allocation pointer `p`, the offset
- * between `p` and `q` is stored at `q - sizeof(std::ptrdiff_t)`.
+ * In order to retrieve the original allocation pointer `p`, the offset between `p` and `q` is
+ * stored at `q - sizeof(std::ptrdiff_t)`.
  *
- * Allocations returned from `aligned_allocate` *MUST* be freed by calling
- * `aligned_deallocate` with the same arguments for `bytes` and `alignment` with
- * a compatible unary `dealloc` callable capable of freeing the memory returned
- * from `alloc`.
+ * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling
+ * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible
+ * unary `dealloc` callable capable of freeing the memory returned from `alloc`.
  *
  * If `alignment` is not a power of 2, behavior is undefined.
+ * If `Alloc` does not allocate host-accessible memory, behavior is undefined.
  *
  * @param bytes The desired size of the allocation
  * @param alignment Desired alignment of allocation
  * @param alloc Unary callable given a size `n` will allocate at least `n` bytes
- * of host memory.
- * @tparam Alloc a unary callable type that allocates memory.
+ * of host-accessible memory.
+ * @tparam Alloc a unary callable type that allocates host-accessible memory.
  * @return void* Pointer into allocation of at least `bytes` with desired
  * `alignment`.
  */
 template <typename Alloc>
-void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
+void* aligned_host_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
 {
-  assert(rmm::is_pow2(alignment));
+  assert(rmm::is_supported_alignment(alignment));
 
   // allocate memory for bytes, plus potential alignment correction,
   // plus store of the correction offset
@@ -163,25 +162,27 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
 }
 
 /**
- * @brief Frees an allocation returned from `aligned_allocate`.
+ * @brief Frees an allocation of host-accessible returned from `aligned_host_allocate`.
  *
- * Allocations returned from `aligned_allocate` *MUST* be freed by calling
- * `aligned_deallocate` with the same arguments for `bytes` and `alignment`
- * with a compatible unary `dealloc` callable capable of freeing the memory
- * returned from `alloc`.
+ * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling
+ * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible
+ * unary `dealloc` callable capable of freeing the memory returned from `alloc`.
  *
  * @param p The aligned pointer to deallocate
- * @param bytes The number of bytes requested from `aligned_allocate`
- * @param alignment The alignment required from `aligned_allocate`
- * @param dealloc A unary callable capable of freeing memory returned from
- * `alloc` in `aligned_allocate`.
- * @tparam Dealloc A unary callable type that deallocates memory.
+ * @param bytes The number of bytes requested from `aligned_host_allocate`
+ * @param alignment The alignment required from `aligned_host_allocate`
+ * @param dealloc A unary callable capable of freeing host-accessible memory returned from `alloc`
+ * in `aligned_host_allocate`.
+ * @tparam Dealloc A unary callable type that deallocates host-accessible memory.
  */
 template <typename Dealloc>
 // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
-void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
+void aligned_host_deallocate(void* ptr,
+                             [[maybe_unused]] std::size_t bytes,
+                             [[maybe_unused]] std::size_t alignment,
+                             Dealloc dealloc) noexcept
 {
-  (void)alignment;
+  assert(rmm::is_supported_alignment(alignment));
 
   // Get offset from the location immediately prior to the aligned pointer
   // NOLINTNEXTLINE

diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
@@ -65,7 +65,7 @@ class new_delete_resource final : public host_memory_resource {
     alignment =
       (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT;
 
-    return rmm::detail::aligned_allocate(
+    return rmm::detail::aligned_host_allocate(
       bytes, alignment, [](std::size_t size) { return ::operator new(size); });
   }
 
@@ -86,7 +86,7 @@ class new_delete_resource final : public host_memory_resource {
                      std::size_t bytes,
                      std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
-    rmm::detail::aligned_deallocate(
+    rmm::detail::aligned_host_deallocate(
       ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); });
   }
 };

diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -147,7 +147,7 @@ class pinned_memory_resource final : public host_memory_resource {
     alignment =
       (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT;
 
-    return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) {
+    return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) {
       void* ptr{nullptr};
       auto status = cudaMallocHost(&ptr, size);
       if (cudaSuccess != status) { throw std::bad_alloc{}; }
@@ -173,7 +173,7 @@ class pinned_memory_resource final : public host_memory_resource {
                      std::size_t alignment = alignof(std::max_align_t)) override
   {
     if (nullptr == ptr) { return; }
-    rmm::detail::aligned_deallocate(
+    rmm::detail::aligned_host_deallocate(
       ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
   }
 };

diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/aligned.hpp>
+#include <rmm/detail/aligned.hpp>
+#include <rmm/detail/error.hpp>
+
+#include <cuda/memory_resource>
+#include <cuda/stream_ref>
+
+#include <cuda_runtime_api.h>
+
+#include <cstddef>
+#include <utility>
+
+namespace rmm::mr {
+
+/**
+ * @brief Memory resource class for allocating pinned host memory.
+ *
+ * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the
+ * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and
+ * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties.
+ */
+class pinned_host_memory_resource {
+ public:
+  // Disable clang-tidy complaining about the easily swappable size and alignment parameters
+  // of allocate and deallocate
+  // NOLINTBEGIN(bugprone-easily-swappable-parameters)
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * reason.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes. Default alignment is used if unspecified.
+   *
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate(std::size_t bytes,
+                        [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+  {
+    // don't allocate anything if the user requested zero bytes
+    if (0 == bytes) { return nullptr; }
+
+    return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) {
+      void* ptr{nullptr};
+      RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault));
+      return ptr;
+    });
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes. Default alignment is used if unspecified.
+   */
+  static void deallocate(void* ptr,
+                         std::size_t bytes,
+                         std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
+  {
+    rmm::detail::aligned_host_deallocate(
+      ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes);
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes,
+                              std::size_t alignment,
+                              [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes, alignment);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p
+   * alignment bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               std::size_t alignment,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes, alignment);
+  }
+  // NOLINTEND(bugprone-easily-swappable-parameters)
+
+  /**
+   * @briefreturn{true if the specified resource is the same type as this resource.}
+   */
+  bool operator==(const pinned_host_memory_resource&) const { return true; }
+
+  /**
+   * @briefreturn{true if the specified resource is not the same type as this resource, otherwise
+   * false.}
+   */
+  bool operator!=(const pinned_host_memory_resource&) const { return false; }
+
+  /**
+   * @brief Query whether the resource supports reporting free and available memory.
+   *
+   * @return false
+   */
+  static bool supports_get_mem_info() { return false; }
+
+  /**
+   * @brief Query the total amount of memory and free memory available for allocation by this
+   * resource.
+   *
+   * @throws nothing
+   *
+   * @return std::pair containing 0 for both total and free memory.
+   */
+  [[nodiscard]] static std::pair<std::size_t, std::size_t> get_mem_info(cuda::stream_ref) noexcept
+  {
+    return {0, 0};
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::device_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::host_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept
+  {
+  }
+};
+
+static_assert(cuda::mr::async_resource_with<pinned_host_memory_resource,
+                                            cuda::mr::device_accessible,
+                                            cuda::mr::host_accessible>);
+}  // namespace rmm::mr