rapidsai · jperez999 · Jul 14, 2020 · Jul 16, 2020 · Jun 10, 2021 · Sep 16, 2021
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/sparse/matrix/detail/preprocessing.cuh>
+
+#include <optional>
+
+namespace raft::sparse::matrix {
+
+/**
+ * @brief Use BM25 algorithm to encode features in COO sparse matrix
+ * @param handle: raft resource handle
+ * @param coo_in: Input COO matrix
+ * @param values_out: Output values array
+ * @param k_param: K value to use for BM25 algorithm
+ * @param b_param: B value to use for BM25 algorithm
+ */
+template <typename T1, typename T2, typename IdxT>
+void encode_bm25(raft::resources& handle,
+                 raft::device_coo_matrix_view<T2, T1, T1, T1> coo_in,
+                 raft::device_vector_view<T2, IdxT> values_out,
+                 float k_param = 1.6f,
+                 float b_param = 0.75)
+{
+  return matrix::detail::encode_bm25<T1, T2, IdxT>(handle, coo_in, values_out, k_param, b_param);
+}
+
+/**
+ * @brief Use BM25 algorithm to encode features in CSR sparse matrix
+ * @param handle: raft resource handle
+ * @param csr_in: Input CSR matrix
+ * @param values_out: Output values array
+ * @param k_param: K value to use for BM25 algorithm
+ * @param b_param: B value to use for BM25 algorithm
+ */
+template <typename T1, typename T2, typename IdxT>
+void encode_bm25(raft::resources& handle,
+                 raft::device_csr_matrix_view<T2, T1, T1, T1> csr_in,
+                 raft::device_vector_view<T2, IdxT> values_out,
+                 float k_param = 1.6f,
+                 float b_param = 0.75)
+{
+  return matrix::detail::encode_bm25<T1, T2, IdxT>(handle, csr_in, values_out, k_param, b_param);
+}
+
+/**
+ * @brief Use TFIDF algorithm to encode features in COO sparse matrix
+ * @param handle: raft resource handle
+ * @param coo_in: Input COO matrix
+ * @param values_out: Output COO values array
+ */
+template <typename T1, typename T2, typename IdxT>
+void encode_tfidf(raft::resources& handle,
+                  raft::device_coo_matrix_view<T2, T1, T1, T1> coo_in,
+                  raft::device_vector_view<T2, IdxT> values_out)
+{
+  return matrix::detail::encode_tfidf<T1, T2, IdxT>(handle, coo_in, values_out);
+}
+
+/**
+ * @brief Use TFIDF algorithm to encode features in CSR sparse matrix
+ * @param handle: raft resource handle
+ * @param csr_in: Input CSR matrix
+ * @param values_out: Output values array
+ */
+template <typename T1, typename T2, typename IdxT>
+void encode_tfidf(raft::resources& handle,
+                  raft::device_csr_matrix_view<T2, T1, T1, T1> csr_in,
+                  raft::device_vector_view<T2, IdxT> values_out)
+{
+  return matrix::detail::encode_tfidf<T1, T2, IdxT>(handle, csr_in, values_out);
+}
+
+}  // namespace raft::sparse::matrix
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@ namespace raft::sparse::neighbors::brute_force {
 /**
  * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors
  * using some distance implementation
+ * template parameter value_idx is the type of the Indptr and Indices arrays.
+ * template parameter value_t is the type of the Data array.
  * @param[in] idxIndptr csr indptr of the index matrix (size n_idx_rows + 1)
  * @param[in] idxIndices csr column indices array of the index matrix (size n_idx_nnz)
  * @param[in] idxData csr data array of the index matrix (size idxNNZ)

@@ -30,8 +30,11 @@
                   " Please use the sparse/spatial version instead.")
 #endif
 
+#include <raft/core/device_coo_matrix.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
+#include <raft/sparse/convert/csr.cuh>
 #include <raft/sparse/neighbors/brute_force.cuh>
+#include <raft/sparse/op/sort.cuh>
 
 namespace raft::sparse::neighbors {
 
@@ -59,7 +62,7 @@ namespace raft::sparse::neighbors {
  * @param[in] metric distance metric/measure to use
  * @param[in] metricArg potential argument for metric (currently unused)
  */
-template <typename value_idx = int, typename value_t = float, int TPB_X = 32>
+template <typename value_idx = int, typename value_t = float>
 void brute_force_knn(const value_idx* idxIndptr,
                      const value_idx* idxIndices,
                      const value_t* idxData,
@@ -103,4 +106,171 @@ void brute_force_knn(const value_idx* idxIndptr,
                                        metricArg);
 }
 
+/**
+ * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors
+ * using some distance implementation
+ * @param[in] csr_idx index csr matrix
+ * @param[in] csr_query query csr matrix
+ * @param[out] output_indices dense matrix for output indices (size n_query_rows * k)
+ * @param[out] output_dists dense matrix for output distances (size n_query_rows * k)
+ * @param[in] k the number of neighbors to query
+ * @param[in] handle CUDA resource::get_cuda_stream(handle) to order operations with respect to
+ * @param[in] batch_size_index maximum number of rows to use from index matrix per batch
+ * @param[in] batch_size_query maximum number of rows to use from query matrix per batch
+ * @param[in] metric distance metric/measure to use
+ * @param[in] metricArg potential argument for metric (currently unused)
+ */
+template <typename value_idx = int, typename value_t = float>
+void brute_force_knn(raft::device_csr_matrix<value_t,
+                                             value_idx,
+                                             value_idx,
+                                             value_idx,
+                                             raft::device_uvector_policy,
+                                             raft::PRESERVING> csr_idx,
+                     raft::device_csr_matrix<value_t,
+                                             value_idx,
+                                             value_idx,
+                                             value_idx,
+                                             raft::device_uvector_policy,
+                                             raft::PRESERVING> csr_query,
+                     device_vector_view<value_idx> output_indices,
+                     device_vector_view<value_t> output_dists,
+                     int k,
+                     raft::resources const& handle,
+                     size_t batch_size_index             = 2 << 14,  // approx 1M
+                     size_t batch_size_query             = 2 << 14,
+                     raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded,
+                     float metricArg                     = 0)
+{
+  auto idxIndptr  = csr_idx.structure_view().get_indptr();
+  auto idxIndices = csr_idx.structure_view().get_indices();
+  auto idxData    = csr_idx.view().get_elements();
+
+  auto queryIndptr  = csr_query.structure_view().get_indptr();
+  auto queryIndices = csr_query.structure_view().get_indices();
+  auto queryData    = csr_query.view().get_elements();
+
+  brute_force::knn<value_idx, value_t>(idxIndptr.data(),
+                                       idxIndices.data(),
+                                       idxData.data(),
+                                       idxIndices.size(),
+                                       idxIndptr.size() - 1,
+                                       csr_idx.structure_view().get_n_cols(),
+                                       queryIndptr.data(),
+                                       queryIndices.data(),
+                                       queryData.data(),
+                                       queryIndices.size(),
+                                       queryIndptr.size() - 1,
+                                       csr_query.structure_view().get_n_cols(),
+                                       output_indices.data_handle(),
+                                       output_dists.data_handle(),
+                                       k,
+                                       handle,
+                                       batch_size_index,
+                                       batch_size_query,
+                                       metric,
+                                       metricArg);
+}
+
+/**
+ * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors
+ * using some distance implementation
+ * @param[in] coo_idx index coo matrix
+ * @param[in] coo_query query coo matrix
+ * @param[out] output_indices dense matrix for output indices (size n_query_rows * k)
+ * @param[out] output_dists dense matrix for output distances (size n_query_rows * k)
+ * @param[in] k the number of neighbors to query
+ * @param[in] handle CUDA resource::get_cuda_stream(handle) to order operations with respect to
+ * @param[in] batch_size_index maximum number of rows to use from index matrix per batch
+ * @param[in] batch_size_query maximum number of rows to use from query matrix per batch
+ * @param[in] metric distance metric/measure to use
+ * @param[in] metricArg potential argument for metric (currently unused)
+ */
+template <typename value_idx = int, typename value_t = float>
+void brute_force_knn(raft::device_coo_matrix<value_t,
+                                             value_idx,
+                                             value_idx,
+                                             value_idx,
+                                             raft::device_uvector_policy,
+                                             raft::PRESERVING> coo_idx,
+                     raft::device_coo_matrix<value_t,
+                                             value_idx,
+                                             value_idx,
+                                             value_idx,
+                                             raft::device_uvector_policy,
+                                             raft::PRESERVING> coo_query,
+                     device_vector_view<value_idx> output_indices,
+                     device_vector_view<value_t> output_dists,
+                     int k,
+                     raft::resources const& handle,
+                     size_t batch_size_index             = 2 << 14,  // approx 1M
+                     size_t batch_size_query             = 2 << 14,
+                     raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded,
+                     float metricArg                     = 0)
+{
+  cudaStream_t stream = raft::resource::get_cuda_stream(handle);
+
+  auto idxRows = coo_idx.structure_view().get_rows();
+  auto idxCols = coo_idx.structure_view().get_cols();
+  auto idxData = coo_idx.view().get_elements();
+
+  auto queryRows = coo_query.structure_view().get_rows();
+  auto queryCols = coo_query.structure_view().get_cols();
+  auto queryData = coo_query.view().get_elements();
+
+  raft::sparse::op::coo_sort(int(idxRows.size()),
+                             int(idxCols.size()),
+                             int(idxData.size()),
+                             idxRows.data(),
+                             idxCols.data(),
+                             idxRows.data(),
+                             stream);
+
+  raft::sparse::op::coo_sort(int(queryRows.size()),
+                             int(queryCols.size()),
+                             int(queryData.size()),
+                             queryRows.data(),
+                             queryCols.data(),
+                             queryData.data(),
+                             stream);
+  // + 1 is to account for the 0 at the beginning of the csr representation
+  auto idxRowsCsr = raft::make_device_vector<value_idx, int64_t>(
+    handle, coo_query.structure_view().get_n_rows() + 1);
+  auto queryRowsCsr = raft::make_device_vector<value_idx, int64_t>(
+    handle, coo_query.structure_view().get_n_rows() + 1);
+
+  raft::sparse::convert::sorted_coo_to_csr(idxRows.data(),
+                                           int(idxRows.size()),
+                                           idxRowsCsr.data_handle(),
+                                           coo_idx.structure_view().get_n_rows() + 1,
+                                           stream);
+
+  raft::sparse::convert::sorted_coo_to_csr(queryRows.data(),
+                                           int(queryRows.size()),
+                                           queryRowsCsr.data_handle(),
+                                           coo_query.structure_view().get_n_rows() + 1,
+                                           stream);
+
+  brute_force::knn<value_idx, value_t>(idxRowsCsr.data_handle(),
+                                       idxCols.data(),
+                                       idxData.data(),
+                                       idxCols.size(),
+                                       idxRowsCsr.size() - 1,
+                                       coo_idx.structure_view().get_n_cols(),
+                                       queryRowsCsr.data_handle(),
+                                       queryCols.data(),
+                                       queryData.data(),
+                                       queryCols.size(),
+                                       queryRowsCsr.size() - 1,
+                                       coo_query.structure_view().get_n_cols(),
+                                       output_indices.data_handle(),
+                                       output_dists.data_handle(),
+                                       k,
+                                       handle,
+                                       batch_size_index,
+                                       batch_size_query,
+                                       metric,
+                                       metricArg);
+}
+
 };  // namespace raft::sparse::neighbors
@@ -319,6 +319,8 @@ if(BUILD_TESTS)
     sparse/spgemmi.cu
     sparse/spmm.cu
     sparse/symmetrize.cu
+    sparse/preprocess_csr.cu
+    sparse/preprocess_coo.cu
   )
 
   ConfigureTest(
@@ -327,8 +329,16 @@ if(BUILD_TESTS)
   )
 
   ConfigureTest(
-    NAME SPARSE_NEIGHBORS_TEST PATH sparse/neighbors/cross_component_nn.cu
-    sparse/neighbors/brute_force.cu sparse/neighbors/knn_graph.cu LIB EXPLICIT_INSTANTIATE_ONLY
+    NAME
+    SPARSE_NEIGHBORS_TEST
+    PATH
+    sparse/neighbors/cross_component_nn.cu
+    sparse/neighbors/brute_force.cu
+    sparse/neighbors/brute_force_coo.cu
+    sparse/neighbors/brute_force_csr.cu
+    sparse/neighbors/knn_graph.cu
+    LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(