rapidsai · mfoerste4 · Sep 20, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 25, 2024
@@ -36,17 +36,13 @@ struct SvcParams {
   BlobsParams blobs;
   raft::distance::kernels::KernelParams kernel;
   ML::SVM::SvmParameter svm_param;
-  ML::SVM::SvmModel<D> model;
 };
 
 template <typename D>
 class SVC : public BlobsFixture<D, D> {
  public:
   SVC(const std::string& name, const SvcParams<D>& p)
-    : BlobsFixture<D, D>(name, p.data, p.blobs),
-      kernel(p.kernel),
-      model(p.model),
-      svm_param(p.svm_param)
+    : BlobsFixture<D, D>(name, p.data, p.blobs), kernel(p.kernel), svm_param(p.svm_param)
   {
     std::vector<std::string> kernel_names{"linear", "poly", "rbf", "tanh"};
     std::ostringstream oss;
@@ -101,7 +97,6 @@ std::vector<SvcParams<D>> getInputs()
 
   // SvmParameter{C, cache_size, max_iter, nochange_steps, tol, verbosity})
   p.svm_param = ML::SVM::SvmParameter{1, 200, 100, 100, 1e-3, CUML_LEVEL_INFO, 0, ML::SVM::C_SVC};
-  p.model     = ML::SVM::SvmModel<D>{0, 0, 0, nullptr, {}, nullptr, 0, nullptr};
 
   std::vector<Triplets> rowcols = {{50000, 2, 2}, {2048, 100000, 2}, {50000, 1000, 2}};
 

@@ -36,17 +36,13 @@ struct SvrParams {
   RegressionParams regression;
   raft::distance::kernels::KernelParams kernel;
   ML::SVM::SvmParameter svm_param;
-  ML::SVM::SvmModel<D>* model;
 };
 
 template <typename D>
 class SVR : public RegressionFixture<D> {
  public:
   SVR(const std::string& name, const SvrParams<D>& p)
-    : RegressionFixture<D>(name, p.data, p.regression),
-      kernel(p.kernel),
-      model(p.model),
-      svm_param(p.svm_param)
+    : RegressionFixture<D>(name, p.data, p.regression), kernel(p.kernel), svm_param(p.svm_param)
   {
     std::vector<std::string> kernel_names{"linear", "poly", "rbf", "tanh"};
     std::ostringstream oss;
@@ -69,16 +65,16 @@ class SVR : public RegressionFixture<D> {
                       this->data.y.data(),
                       this->svm_param,
                       this->kernel,
-                      *(this->model));
+                      this->model);
       this->handle->sync_stream(this->stream);
-      ML::SVM::svmFreeBuffers(*this->handle, *(this->model));
+      ML::SVM::svmFreeBuffers(*this->handle, this->model);
     });
   }
 
  private:
   raft::distance::kernels::KernelParams kernel;
   ML::SVM::SvmParameter svm_param;
-  ML::SVM::SvmModel<D>* model;
+  ML::SVM::SvmModel<D> model;
 };
 
 template <typename D>
@@ -103,7 +99,6 @@ std::vector<SvrParams<D>> getInputs()
   //              epsilon, svmType})
   p.svm_param =
     ML::SVM::SvmParameter{1, 200, 200, 100, 1e-3, CUML_LEVEL_INFO, 0.1, ML::SVM::EPSILON_SVR};
-  p.model = new ML::SVM::SvmModel<D>{0, 0, 0, 0};
 
   std::vector<Triplets> rowcols = {{50000, 2, 2}, {1024, 10000, 10}, {3000, 200, 200}};
 

@@ -219,7 +219,7 @@ class SVC {
 
   raft::distance::kernels::KernelParams kernel_params;
   SvmParameter param;
-  SvmModel<math_t> model;
+  SvmModelContainer<math_t> model_container;
   /**
    * @brief Constructs a support vector classifier
    * @param handle cuML handle

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,16 +15,17 @@
  */
 #pragma once
 
+#include <rmm/device_buffer.hpp>
+
 namespace ML {
 namespace SVM {
 
 // Contains array(s) for matrix storage
-template <typename math_t>
 struct SupportStorage {
-  int nnz      = -1;
-  int* indptr  = nullptr;
-  int* indices = nullptr;
-  math_t* data = nullptr;
+  int nnz = -1;
+  rmm::device_buffer* indptr;
+  rmm::device_buffer* indices;
+  rmm::device_buffer* data;
 };
 
 /**
@@ -39,17 +40,50 @@ struct SvmModel {
 
   //! Non-zero dual coefficients ( dual_coef[i] = \f$ y_i \alpha_i \f$).
   //! Size [n_support].
-  math_t* dual_coefs;
+  rmm::device_buffer* dual_coefs;
 
   //! Support vector storage - can contain either CSR or dense
-  SupportStorage<math_t> support_matrix;
+  SupportStorage support_matrix;
 
   //! Indices (from the training set) of the support vectors, size [n_support].
-  int* support_idx;
+  rmm::device_buffer* support_idx;
 
   int n_classes;  //!< Number of classes found in the input labels
   //! Device pointer for the unique classes. Size [n_classes]
-  math_t* unique_labels;
+  rmm::device_buffer* unique_labels;
+};
+
+/**
+ * Helper container that allows a SvmModel+buffer construction on the stack
+ */
+template <typename math_t>
+struct SvmModelContainer {
+  SvmModelContainer()
+    : dual_coef_bf(),
+      support_idx_bf(),
+      unique_labels_bf(),
+      support_matrix_indptr_bf(),
+      support_matrix_indices_bf(),
+      support_matrix_data_bf(),
+      model({0,
+             0,
+             0,
+             &dual_coef_bf,
+             SupportStorage{
+               -1, &support_matrix_indptr_bf, &support_matrix_indices_bf, &support_matrix_data_bf},
+             &support_idx_bf,
+             0,
+             &unique_labels_bf})
+  {
+  }
+
+  rmm::device_buffer dual_coef_bf;
+  rmm::device_buffer support_idx_bf;
+  rmm::device_buffer unique_labels_bf;
+  rmm::device_buffer support_matrix_indptr_bf;
+  rmm::device_buffer support_matrix_indices_bf;
+  rmm::device_buffer support_matrix_data_bf;
+  SvmModel<math_t> model;
 };
 
 };  // namespace SVM

@@ -117,56 +117,56 @@ class Results {
    */
   void Get(const math_t* alpha,
            const math_t* f,
-           math_t** dual_coefs,
-           int* n_support,
-           int** idx,
-           SupportStorage<math_t>* support_matrix,
-           math_t* b)
+           rmm::device_buffer& dual_coefs,
+           int& n_support,
+           rmm::device_buffer& idx,
+           SupportStorage& support_matrix,
+           math_t& b)
   {
     CombineCoefs(alpha, val_tmp.data());
     GetDualCoefs(val_tmp.data(), dual_coefs, n_support);
-    *b = CalcB(alpha, f, *n_support);
-    if (*n_support > 0) {
-      *idx            = GetSupportVectorIndices(val_tmp.data(), *n_support);
-      *support_matrix = CollectSupportVectorMatrix(*idx, *n_support);
-    } else {
-      *dual_coefs     = nullptr;
-      *idx            = nullptr;
-      *support_matrix = {};
-    }
+    b = CalcB(alpha, f, n_support);
+    GetSupportVectorIndices(idx, val_tmp.data(), n_support);
+    CollectSupportVectorMatrix(support_matrix, idx, n_support);
     // Make sure that all pending GPU calculations finished before we return
     handle.sync_stream(stream);
   }
 
   /**
    * Collect support vectors into a matrix storage
    *
+   * @param [out] support_matrix containing the support vectors, size [n_suppor*n_cols]
    * @param [in] idx indices of support vectors, size [n_support]
    * @param [in] n_support number of support vectors
-   * @return pointer to a newly allocated device buffer that stores the support
-   *   vectors, size [n_suppor*n_cols]
    */
-  SupportStorage<math_t> CollectSupportVectorMatrix(const int* idx, int n_support)
+  void CollectSupportVectorMatrix(SupportStorage& support_matrix,
+                                  rmm::device_buffer& idx,
+                                  int n_support)
   {
-    SupportStorage<math_t> support_matrix;
     // allow ~1GB dense support matrix
     if (isDenseType<MatrixViewType>() ||
         ((size_t)n_support * n_cols * sizeof(math_t) < (1 << 30))) {
-      support_matrix.data = (math_t*)rmm_alloc.allocate_async(
-        n_support * n_cols * sizeof(math_t), rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
-      ML::SVM::extractRows<math_t>(matrix, support_matrix.data, idx, n_support, handle);
+      support_matrix.nnz = -1;
+      support_matrix.indptr->resize(0, stream);
+      support_matrix.indices->resize(0, stream);
+      support_matrix.data->resize(n_support * n_cols * sizeof(math_t), stream);
+      if (n_support > 0) {
+        ML::SVM::extractRows<math_t>(matrix,
+                                     reinterpret_cast<math_t*>(support_matrix.data->data()),
+                                     reinterpret_cast<int*>(idx.data()),
+                                     n_support,
+                                     handle);
+      }
     } else {
       ML::SVM::extractRows<math_t>(matrix,
-                                   &(support_matrix.indptr),
-                                   &(support_matrix.indices),
-                                   &(support_matrix.data),
+                                   *(support_matrix.indptr),
+                                   *(support_matrix.indices),
+                                   *(support_matrix.data),
                                    &(support_matrix.nnz),
-                                   idx,
+                                   reinterpret_cast<int*>(idx.data()),
                                    n_support,
                                    handle);
     }
-
-    return support_matrix;
   }
 
   /**
@@ -205,33 +205,34 @@ class Results {
    *   unallocated on entry, on exit size [n_support]
    * @param [out] n_support number of support vectors
    */
-  void GetDualCoefs(const math_t* val_tmp, math_t** dual_coefs, int* n_support)
+  void GetDualCoefs(const math_t* val_tmp, rmm::device_buffer& dual_coefs, int& n_support)
   {
     // Return only the non-zero coefficients
     auto select_op = [] __device__(math_t a) { return 0 != a; };
-    *n_support     = SelectByCoef(val_tmp, n_rows, val_tmp, select_op, val_selected.data());
-    *dual_coefs    = (math_t*)rmm_alloc.allocate_async(
-      *n_support * sizeof(math_t), rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
-    raft::copy(*dual_coefs, val_selected.data(), *n_support, stream);
+    n_support      = SelectByCoef(val_tmp, n_rows, val_tmp, select_op, val_selected.data());
+    dual_coefs.resize(n_support * sizeof(math_t), stream);
+    raft::copy((math_t*)dual_coefs.data(), val_selected.data(), n_support, stream);
     handle.sync_stream(stream);
   }
 
   /**
    * Flag support vectors and also collect their indices.
    * Support vectors are the vectors where alpha > 0.
    *
+   * @param [out] idx the training set indices of the support vectors, size [n_support]
    * @param [in] coef dual coefficients, size [n_rows]
    * @param [in] n_support number of support vectors
-   * @return indices of the support vectors, size [n_support]
    */
-  int* GetSupportVectorIndices(const math_t* coef, int n_support)
+  void GetSupportVectorIndices(rmm::device_buffer& idx, const math_t* coef, int n_support)
   {
-    auto select_op = [] __device__(math_t a) -> bool { return 0 != a; };
-    SelectByCoef(coef, n_rows, f_idx.data(), select_op, idx_selected.data());
-    int* idx = (int*)rmm_alloc.allocate_async(
-      n_support * sizeof(int), rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
-    raft::copy(idx, idx_selected.data(), n_support, stream);
-    return idx;
+    if (n_support > 0) {
+      auto select_op = [] __device__(math_t a) -> bool { return 0 != a; };
+      SelectByCoef(coef, n_rows, f_idx.data(), select_op, idx_selected.data());
+      idx.resize(n_support * sizeof(int), stream);
+      raft::copy((int*)idx.data(), idx_selected.data(), n_support, stream);
+    } else {
+      idx.resize(0, stream);
+    }
   }
 
   /**

@@ -103,11 +103,11 @@ void SmoSolver<math_t>::Solve(MatrixViewType matrix,
                               int n_cols,
                               math_t* y,
                               const math_t* sample_weight,
-                              math_t** dual_coefs,
-                              int* n_support,
-                              SupportStorage<math_t>* support_matrix,
-                              int** idx,
-                              math_t* b,
+                              rmm::device_buffer& dual_coefs,
+                              int& n_support,
+                              SupportStorage& support_matrix,
+                              rmm::device_buffer& idx,
+                              math_t& b,
                               int max_outer_iter,
                               int max_inner_iter)
 {

@@ -124,11 +124,11 @@ class SmoSolver {
              int n_cols,
              math_t* y,
              const math_t* sample_weight,
-             math_t** dual_coefs,
-             int* n_support,
-             SupportStorage<math_t>* support_matrix,
-             int** idx,
-             math_t* b,
+             rmm::device_buffer& dual_coefs,
+             int& n_support,
+             SupportStorage& support_matrix,
+             rmm::device_buffer& idx,
+             math_t& b,
              int max_outer_iter = -1,
              int max_inner_iter = 10000);