Skip to content

Commit

Permalink
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…perf-read_csv-register-mmap
  • Loading branch information
vuule committed Aug 3, 2023
2 parents f3385f6 + b7994bc commit 1daa664
Show file tree
Hide file tree
Showing 89 changed files with 458 additions and 444 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void BM_contiguous_split_strings(benchmark::State& state, ContiguousSplitImpl& i
}

int64_t const total_bytes =
total_desired_bytes + ((num_rows + 1) * sizeof(cudf::offset_type)) +
total_desired_bytes + ((num_rows + 1) * sizeof(cudf::size_type)) +
(include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols)
: 0);

Expand Down
12 changes: 6 additions & 6 deletions cpp/benchmarks/lists/copying/scatter_lists.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,26 @@ void BM_lists_scatter(::benchmark::State& state)
target_base_col->mutable_view().end<TypeParam>());

auto source_offsets =
make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
num_rows + 1,
cudf::mask_state::UNALLOCATED,
stream,
mr);
auto target_offsets =
make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
num_rows + 1,
cudf::mask_state::UNALLOCATED,
stream,
mr);

thrust::sequence(rmm::exec_policy(stream),
source_offsets->mutable_view().begin<cudf::offset_type>(),
source_offsets->mutable_view().end<cudf::offset_type>(),
source_offsets->mutable_view().begin<cudf::size_type>(),
source_offsets->mutable_view().end<cudf::size_type>(),
0,
num_elements_per_row);
thrust::sequence(rmm::exec_policy(stream),
target_offsets->mutable_view().begin<cudf::offset_type>(),
target_offsets->mutable_view().end<cudf::offset_type>(),
target_offsets->mutable_view().begin<cudf::size_type>(),
target_offsets->mutable_view().end<cudf::size_type>(),
0,
num_elements_per_row);

Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/strings/custom_prealloc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ __global__ void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::string_view redaction,
char* working_memory,
cudf::offset_type const* d_offsets,
cudf::size_type const* d_offsets,
cudf::string_view* d_output)
{
// The row index is resolved from the CUDA thread/block objects
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,9 @@ std::pair<std::unique_ptr<column>, size_type> make_offsets_child_column(
{
auto count = static_cast<size_type>(std::distance(begin, end));
auto offsets_column = make_numeric_column(
data_type{type_to_id<offset_type>()}, count + 1, mask_state::UNALLOCATED, stream, mr);
data_type{type_to_id<size_type>()}, count + 1, mask_state::UNALLOCATED, stream, mr);
auto offsets_view = offsets_column->mutable_view();
auto d_offsets = offsets_view.template data<offset_type>();
auto d_offsets = offsets_view.template data<size_type>();

// The number of offsets is count+1 so to build the offsets from the sizes
// using exclusive-scan technically requires count+1 input values even though
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,11 @@ std::unique_ptr<column> scatter(scalar const& slr,
rmm::device_buffer null_mask =
slr_valid ? cudf::detail::create_null_mask(1, mask_state::UNALLOCATED, stream, mr)
: cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream, mr);
auto offset_column = make_numeric_column(
data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr);
auto offset_column =
make_numeric_column(data_type{type_to_id<size_type>()}, 2, mask_state::UNALLOCATED, stream, mr);
thrust::sequence(rmm::exec_policy_nosync(stream),
offset_column->mutable_view().begin<offset_type>(),
offset_column->mutable_view().end<offset_type>(),
offset_column->mutable_view().begin<size_type>(),
offset_column->mutable_view().end<size_type>(),
0,
lv->view().size());
auto wrapped = column_view(data_type{type_id::LIST},
Expand Down
6 changes: 2 additions & 4 deletions cpp/include/cudf/lists/lists_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ class lists_column_view : private column_view {
using column_view::null_mask;
using column_view::offset;
using column_view::size;
static_assert(std::is_same_v<offset_type, size_type>,
"offset_type is expected to be the same as size_type.");
using offset_iterator = offset_type const*; ///< Iterator type for offsets
using offset_iterator = size_type const*; ///< Iterator type for offsets

/**
* @brief Returns the parent column.
Expand Down Expand Up @@ -119,7 +117,7 @@ class lists_column_view : private column_view {
*/
[[nodiscard]] offset_iterator offsets_begin() const noexcept
{
return offsets().begin<offset_type>() + offset();
return offsets().begin<size_type>() + offset();
}

/**
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/strings/strings_column_view.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,8 +67,8 @@ class strings_column_view : private column_view {
using column_view::offset;
using column_view::size;

using offset_iterator = offset_type const*; ///< offsets iterator type
using chars_iterator = char const*; ///< character iterator type
using offset_iterator = size_type const*; ///< offsets iterator type
using chars_iterator = char const*; ///< character iterator type

/**
* @brief Returns the parent column.
Expand Down
4 changes: 1 addition & 3 deletions cpp/include/cudf/tdigest/tdigest_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ class tdigest_column_view : private column_view {
tdigest_column_view& operator=(tdigest_column_view&&) = default;

using column_view::size;
static_assert(std::is_same_v<offset_type, size_type>,
"offset_type is expected to be the same as size_type.");
using offset_iterator = offset_type const*; ///< Iterator over offsets
using offset_iterator = size_type const*; ///< Iterator over offsets

// mean and weight column indices within tdigest inner struct columns
static constexpr size_type mean_column_index{0}; ///< Mean column index
Expand Down
1 change: 0 additions & 1 deletion cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ class mutable_table_view;
using size_type = int32_t; ///< Row index type for columns and tables
using bitmask_type = uint32_t; ///< Bitmask type stored as 32-bit unsigned integer
using valid_type = uint8_t; ///< Valid type in host memory
using offset_type = int32_t; ///< Offset type for column offsets
using thread_index_type = int64_t; ///< Thread index type in kernels

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf_test/column_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to
cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
cudf::get_default_stream());
auto const h_offsets = cudf::detail::make_std_vector_sync(
cudf::device_span<cudf::offset_type const>(
scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
scv.size() + 1),
cudf::get_default_stream());

// build std::string vector from chars and offsets
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1507,7 +1507,7 @@ class lists_column_wrapper : public detail::column_wrapper {
*/
static lists_column_wrapper<T> make_one_empty_row_column(bool valid = true)
{
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0};
cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0};
cudf::test::fixed_width_column_wrapper<int> values{};
return lists_column_wrapper<T>(
1,
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -408,15 +408,15 @@ void traverse_children::operator()<cudf::string_view>(host_span<column_view cons
return a + (scv.is_empty() ? 0
// if the column is unsliced, skip the offset retrieval.
: scv.offset() > 0
? cudf::detail::get_value<offset_type>(
? cudf::detail::get_value<size_type>(
scv.offsets(), scv.offset() + scv.size(), stream) -
cudf::detail::get_value<offset_type>(scv.offsets(), scv.offset(), stream)
cudf::detail::get_value<size_type>(scv.offsets(), scv.offset(), stream)
// if the offset() is 0, it can still be sliced to a shorter length. in this case
// we only need to read a single offset. otherwise just return the full length
// (chars_size())
: scv.size() + 1 == scv.offsets().size()
? scv.chars_size()
: cudf::detail::get_value<offset_type>(scv.offsets(), scv.size(), stream));
: cudf::detail::get_value<size_type>(scv.offsets(), scv.size(), stream));
});
CUDF_EXPECTS(total_char_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"Total number of concatenated chars exceeds the column size limit",
Expand Down
15 changes: 7 additions & 8 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ struct batch_byte_size_function {
* @brief Get the input buffer index given the output buffer index.
*/
struct out_to_in_index_function {
offset_type const* const batch_offsets;
size_type const* const batch_offsets;
int const num_bufs;
__device__ int operator()(size_type i) const
{
Expand Down Expand Up @@ -1312,7 +1312,7 @@ std::unique_ptr<packed_partition_buf_size_and_dst_buf_info> compute_splits(
*/
struct chunk_iteration_state {
chunk_iteration_state(rmm::device_uvector<dst_buf_info> _d_batched_dst_buf_info,
rmm::device_uvector<offset_type> _d_batch_offsets,
rmm::device_uvector<size_type> _d_batch_offsets,
std::vector<std::size_t>&& _h_num_buffs_per_iteration,
std::vector<std::size_t>&& _h_size_of_buffs_per_iteration,
std::size_t total_size)
Expand Down Expand Up @@ -1375,11 +1375,10 @@ struct chunk_iteration_state {
bool has_more_copies() const { return current_iteration < num_iterations; }

rmm::device_uvector<dst_buf_info> d_batched_dst_buf_info; ///< dst_buf_info per 1MB batch
rmm::device_uvector<offset_type> const
d_batch_offsets; ///< Offset within a batch per dst_buf_info
std::size_t const total_size; ///< The aggregate size of all iterations
int const num_iterations; ///< The total number of iterations
int current_iteration; ///< Marks the current iteration being worked on
rmm::device_uvector<size_type> const d_batch_offsets; ///< Offset within a batch per dst_buf_info
std::size_t const total_size; ///< The aggregate size of all iterations
int const num_iterations; ///< The total number of iterations
int current_iteration; ///< Marks the current iteration being worked on

private:
std::size_t starting_batch; ///< Starting batch index for the current iteration
Expand All @@ -1398,7 +1397,7 @@ std::unique_ptr<chunk_iteration_state> chunk_iteration_state::create(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* temp_mr)
{
rmm::device_uvector<offset_type> d_batch_offsets(num_bufs + 1, stream, temp_mr);
rmm::device_uvector<size_type> d_batch_offsets(num_bufs + 1, stream, temp_mr);

auto const buf_count_iter = cudf::detail::make_counting_transform_iterator(
0, [num_bufs, num_batches = num_batches_func{batches.begin()}] __device__(size_type i) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ struct empty_column_constructor {

if constexpr (k == aggregation::Kind::COLLECT_LIST || k == aggregation::Kind::COLLECT_SET) {
return make_lists_column(
0, make_empty_column(type_to_id<offset_type>()), empty_like(values), 0, {});
0, make_empty_column(type_to_id<size_type>()), empty_like(values), 0, {});
}

if constexpr (k == aggregation::Kind::RANK) {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/groupby/sort/group_collect.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ std::unique_ptr<column> group_collect(column_view const& values,
auto [child_column,
offsets_column] = [null_handling, num_groups, &values, &group_offsets, stream, mr] {
auto offsets_column = make_numeric_column(
data_type(type_to_id<offset_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
data_type(type_to_id<size_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);

thrust::copy(rmm::exec_policy(stream),
group_offsets.begin(),
group_offsets.end(),
offsets_column->mutable_view().template begin<offset_type>());
offsets_column->mutable_view().template begin<size_type>());

// If column of grouped values contains null elements, and null_policy == EXCLUDE,
// those elements must be filtered out, and offsets recomputed.
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/groupby/sort/group_merge_lists.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,7 +38,7 @@ std::unique_ptr<column> group_merge_lists(column_view const& values,
"Input to `group_merge_lists` must be a non-nullable lists column.");

auto offsets_column = make_numeric_column(
data_type(type_to_id<offset_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
data_type(type_to_id<size_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);

// Generate offsets of the output lists column by gathering from the provided group offsets and
// the input list offsets.
Expand All @@ -54,7 +54,7 @@ std::unique_ptr<column> group_merge_lists(column_view const& values,
group_offsets.begin(),
group_offsets.end(),
lists_column_view(values).offsets_begin(),
offsets_column->mutable_view().template begin<offset_type>());
offsets_column->mutable_view().template begin<size_type>());

// The child column of the output lists column is just copied from the input column.
auto child_column =
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ namespace {
struct escape_strings_fn {
column_device_view const d_column;
string_view const d_delimiter; // check for column delimiter
offset_type* d_offsets{};
size_type* d_offsets{};
char* d_chars{};

__device__ void write_char(char_utf8 chr, char*& d_buffer, offset_type& bytes)
__device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
{
if (d_buffer)
d_buffer += cudf::strings::detail::from_char_utf8(chr, d_buffer);
Expand All @@ -105,8 +105,8 @@ struct escape_strings_fn {
return chr == quote || chr == new_line || chr == d_delimiter[0];
});

char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr;
offset_type bytes = 0;
char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr;
size_type bytes = 0;

if (quote_row) write_char(quote, d_buffer, bytes);
for (auto chr : d_str) {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,8 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
cudf::get_default_stream());
auto const h_offsets = cudf::detail::make_std_vector_sync(
cudf::device_span<cudf::offset_type const>(
scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
scv.size() + 1),
cudf::get_default_stream());

// build std::string vector from chars and offsets
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ namespace {
struct escape_strings_fn {
column_device_view const d_column;
bool const append_colon{false};
offset_type* d_offsets{};
size_type* d_offsets{};
char* d_chars{};

__device__ void write_char(char_utf8 chr, char*& d_buffer, offset_type& bytes)
__device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
{
if (d_buffer)
d_buffer += cudf::strings::detail::from_char_utf8(chr, d_buffer);
Expand All @@ -91,7 +91,7 @@ struct escape_strings_fn {
return nibble < 10 ? '0' + nibble : 'a' + nibble - 10;
}

__device__ void write_utf8_codepoint(uint16_t codepoint, char*& d_buffer, offset_type& bytes)
__device__ void write_utf8_codepoint(uint16_t codepoint, char*& d_buffer, size_type& bytes)
{
if (d_buffer) {
d_buffer[0] = '\\';
Expand All @@ -106,7 +106,7 @@ struct escape_strings_fn {
}
}

__device__ void write_utf16_codepoint(uint32_t codepoint, char*& d_buffer, offset_type& bytes)
__device__ void write_utf16_codepoint(uint32_t codepoint, char*& d_buffer, size_type& bytes)
{
constexpr uint16_t UTF16_HIGH_SURROGATE_BEGIN = 0xD800;
constexpr uint16_t UTF16_LOW_SURROGATE_BEGIN = 0xDC00;
Expand All @@ -130,8 +130,8 @@ struct escape_strings_fn {
constexpr char_utf8 const quote = '\"'; // wrap quotes
bool constexpr quote_row = true;

char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr;
offset_type bytes = 0;
char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr;
size_type bytes = 0;

if (quote_row) write_char(quote, d_buffer, bytes);
for (auto utf8_char : d_str) {
Expand Down
12 changes: 9 additions & 3 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,15 @@ __global__ void __launch_bounds__(128)
max_RLE_page_size(col_g.num_def_level_bits(), num_vals) +
max_RLE_page_size(col_g.num_rep_level_bits(), num_vals));

if (num_rows >= ck_g.num_rows ||
(values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) ||
rows_in_page + frag_g.num_rows > max_page_size_rows) {
// checks to see when we need to close the current page and start a new one
auto const is_last_chunk = num_rows >= ck_g.num_rows;
auto const is_page_bytes_exceeded = page_size + fragment_data_size > this_max_page_size;
auto const is_page_rows_exceeded = rows_in_page + frag_g.num_rows > max_page_size_rows;
// only check for limit overflow if there's already at least one fragment for this page
auto const is_page_too_big =
values_in_page > 0 && (is_page_bytes_exceeded || is_page_rows_exceeded);

if (is_last_chunk || is_page_too_big) {
if (ck_g.use_dictionary) {
// Additional byte to store entry bit width
page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,7 @@ struct row_size_functor {
template <>
__device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
{
auto const offset_size = sizeof(offset_type);
auto const offset_size = sizeof(size_type);
// NOTE: Adding the + 1 offset here isn't strictly correct. There will only be 1 extra offset
// for the entire column, whereas this is adding an extra offset per page. So we will get a
// small over-estimate of the real size of the order : # of pages * 4 bytes. It seems better
Expand All @@ -1036,7 +1036,7 @@ __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, boo
{
// only returns the size of offsets and validity. the size of the actual string chars
// is tracked separately.
auto const offset_size = sizeof(offset_type);
auto const offset_size = sizeof(size_type);
// see note about offsets in the list_view template.
return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
}
Expand Down
Loading

0 comments on commit 1daa664

Please sign in to comment.