Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.12' into rjzamora/re…
Browse files Browse the repository at this point in the history
…factor-evaluate
  • Loading branch information
wence- committed Oct 25, 2024
2 parents 30ee2e0 + e98e6b9 commit 65fe592
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 15 deletions.
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary
ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)

ConfigureNVBench(
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/normalize.cpp
text/replace.cpp text/tokenize.cpp text/vocab.cpp
)

# ##################################################################################################
Expand Down
24 changes: 18 additions & 6 deletions cpp/include/nvtext/minhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ namespace CUDF_EXPORT nvtext {
*
* This function uses MurmurHash3_x86_32 for the hash algorithm.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if the width < 2
*
* @param input Strings column to compute minhash
Expand All @@ -51,7 +53,7 @@ namespace CUDF_EXPORT nvtext {
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Minhash values for each string in input
*/
std::unique_ptr<cudf::column> minhash(
[[deprecated]] std::unique_ptr<cudf::column> minhash(
cudf::strings_column_view const& input,
cudf::numeric_scalar<uint32_t> seed = 0,
cudf::size_type width = 4,
Expand All @@ -71,6 +73,8 @@ std::unique_ptr<cudf::column> minhash(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12 - to be replaced in a future release
*
* @throw std::invalid_argument if the width < 2
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
Expand All @@ -83,7 +87,7 @@ std::unique_ptr<cudf::column> minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash(
[[deprecated]] std::unique_ptr<cudf::column> minhash(
cudf::strings_column_view const& input,
cudf::device_span<uint32_t const> seeds,
cudf::size_type width = 4,
Expand All @@ -102,6 +106,8 @@ std::unique_ptr<cudf::column> minhash(
* The hash function returns 2 uint64 values but only the first value
* is used with the minhash calculation.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if the width < 2
*
* @param input Strings column to compute minhash
Expand All @@ -112,7 +118,7 @@ std::unique_ptr<cudf::column> minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Minhash values as UINT64 for each string in input
*/
std::unique_ptr<cudf::column> minhash64(
[[deprecated]] std::unique_ptr<cudf::column> minhash64(
cudf::strings_column_view const& input,
cudf::numeric_scalar<uint64_t> seed = 0,
cudf::size_type width = 4,
Expand All @@ -132,6 +138,8 @@ std::unique_ptr<cudf::column> minhash64(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12 - to be replaced in a future release
*
* @throw std::invalid_argument if the width < 2
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
Expand All @@ -144,7 +152,7 @@ std::unique_ptr<cudf::column> minhash64(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash64(
[[deprecated]] std::unique_ptr<cudf::column> minhash64(
cudf::strings_column_view const& input,
cudf::device_span<uint64_t const> seeds,
cudf::size_type width = 4,
Expand All @@ -164,6 +172,8 @@ std::unique_ptr<cudf::column> minhash64(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
*
Expand All @@ -173,7 +183,7 @@ std::unique_ptr<cudf::column> minhash64(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> word_minhash(
[[deprecated]] std::unique_ptr<cudf::column> word_minhash(
cudf::lists_column_view const& input,
cudf::device_span<uint32_t const> seeds,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand All @@ -193,6 +203,8 @@ std::unique_ptr<cudf::column> word_minhash(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
*
Expand All @@ -202,7 +214,7 @@ std::unique_ptr<cudf::column> word_minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> word_minhash64(
[[deprecated]] std::unique_ptr<cudf::column> word_minhash64(
cudf::lists_column_view const& input,
cudf::device_span<uint64_t const> seeds,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand Down
9 changes: 3 additions & 6 deletions cpp/src/strings/convert/convert_durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/convert/convert_durations.hpp>
#include <cudf/strings/detail/convert/int_to_string.cuh>
#include <cudf/strings/detail/strings_children.cuh>
Expand Down Expand Up @@ -152,12 +153,8 @@ struct format_compiler {
}

// create program in device memory
d_items.resize(items.size(), stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(d_items.data(),
items.data(),
items.size() * sizeof(items[0]),
cudaMemcpyDefault,
stream.value()));
d_items = cudf::detail::make_device_uvector_sync(
items, stream, cudf::get_current_device_resource_ref());
}

format_item const* compiled_format_items() { return d_items.data(); }
Expand Down
1 change: 0 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,6 @@ ConfigureTest(
text/bpe_tests.cpp
text/edit_distance_tests.cpp
text/jaccard_tests.cpp
text/minhash_tests.cpp
text/ngrams_tests.cpp
text/ngrams_tokenize_tests.cpp
text/normalize_tests.cpp
Expand Down

0 comments on commit 65fe592

Please sign in to comment.