From a97020f9c7e4e2be86788b5f7d83608839d3207b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 27 Sep 2023 13:33:48 -0400 Subject: [PATCH] Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 (#14108) Compile issues found by compiling libcudf with the `rapidsai/devcontainers:23.10-cpp-gcc9-cuda12.2-ubuntu20.04` docker container. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Mark Harris (https://github.com/harrism) - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/14108 --- cpp/src/io/avro/avro_common.hpp | 3 +- cpp/src/io/comp/unsnap.cu | 18 +++--- cpp/src/io/orc/orc_gpu.hpp | 39 +++++------- cpp/src/io/orc/stats_enc.cu | 10 +-- cpp/src/io/orc/stripe_init.cu | 29 ++++----- cpp/src/io/parquet/page_decode.cuh | 67 +++++++++++---------- cpp/src/io/parquet/page_hdr.cu | 12 ++-- cpp/src/io/parquet/parquet_gpu.hpp | 56 ++++++++--------- cpp/src/io/statistics/column_statistics.cuh | 12 ++-- cpp/src/io/statistics/statistics.cuh | 30 ++++----- 10 files changed, 138 insertions(+), 138 deletions(-) diff --git a/cpp/src/io/avro/avro_common.hpp b/cpp/src/io/avro/avro_common.hpp index ff8ee206dd4..0058d236d8c 100644 --- a/cpp/src/io/avro/avro_common.hpp +++ b/cpp/src/io/avro/avro_common.hpp @@ -25,7 +25,8 @@ namespace cudf { namespace io { namespace avro { struct block_desc_s { - block_desc_s() {} + block_desc_s() = default; // required to compile on ctk-12.2 + aarch64 + explicit constexpr block_desc_s( size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_) : offset(offset_), diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index c699502317f..504a2fe377c 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -52,6 +52,8 @@ struct unsnap_batch_s { * @brief Queue structure used to exchange data between warps */ struct unsnap_queue_s { + unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64 + uint32_t prefetch_wrpos; ///< Prefetcher write position uint32_t prefetch_rdpos; ///< Prefetch consumer read position int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher) @@ -64,13 +66,15 @@ struct unsnap_queue_s { * @brief snappy decompression state */ struct unsnap_state_s { - uint8_t const* base; ///< base ptr of compressed stream - uint8_t const* end; ///< end of compressed stream - uint32_t uncompressed_size; ///< uncompressed stream size - uint32_t bytes_left; ///< remaining bytes to decompress - int32_t error; ///< current error status - uint32_t tstart; ///< start time for perf logging - volatile unsnap_queue_s q; ///< queue for cross-warp communication + constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64 + + uint8_t const* base{}; ///< base ptr of compressed stream + uint8_t const* end{}; ///< end of compressed stream + uint32_t uncompressed_size{}; ///< uncompressed stream size + uint32_t bytes_left{}; ///< remaining bytes to decompress + int32_t error{}; ///< current error status + uint32_t tstart{}; ///< start time for perf logging + volatile unsnap_queue_s q{}; ///< queue for cross-warp communication device_span src; ///< input for current block device_span dst; ///< output for current block }; diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 9b8df50a22a..dba7a9ffda5 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -59,31 +59,24 @@ struct CompressedStreamInfo { explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_) : compressed_data(compressed_data_), uncompressed_data(nullptr), - compressed_data_size(compressed_size_), - dec_in_ctl(nullptr), - dec_out_ctl(nullptr), - copy_in_ctl(nullptr), - copy_out_ctl(nullptr), - num_compressed_blocks(0), - num_uncompressed_blocks(0), - max_uncompressed_size(0), - max_uncompressed_block_size(0) + compressed_data_size(compressed_size_) { } - uint8_t const* compressed_data; // [in] base ptr to compressed stream data - uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size; // [in] compressed data size for this stream - device_span* dec_in_ctl; // [in] input buffer to decompress - device_span* dec_out_ctl; // [in] output buffer to decompress into - device_span dec_res; // [in] results of decompression - device_span* copy_in_ctl; // [out] input buffer to copy - device_span* copy_out_ctl; // [out] output buffer to copy to - uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed - // blocks(out) - uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of - // uncompressed blocks(out) - uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream - uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream + uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data + uint8_t* + uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet + size_t compressed_data_size{}; // [in] compressed data size for this stream + device_span* dec_in_ctl{}; // [in] input buffer to decompress + device_span* dec_out_ctl{}; // [in] output buffer to decompress into + device_span dec_res{}; // [in] results of decompression + device_span* copy_in_ctl{}; // [out] input buffer to copy + device_span* copy_out_ctl{}; // [out] output buffer to copy to + uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of + // compressed blocks(out) + uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of + // uncompressed blocks(out) + uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream + uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream }; enum StreamIndexType { diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 69d7ec95acd..95f1db5bfd1 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -134,11 +134,11 @@ __global__ void __launch_bounds__(block_size, 1) } struct stats_state_s { - uint8_t* base; ///< Output buffer start - uint8_t* end; ///< Output buffer end - statistics_chunk chunk; - statistics_merge_group group; - statistics_dtype stats_dtype; //!< Statistics data type for this column + uint8_t* base{}; ///< Output buffer start + uint8_t* end{}; ///< Output buffer end + statistics_chunk chunk{}; + statistics_merge_group group{}; + statistics_dtype stats_dtype{}; //!< Statistics data type for this column }; /* diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index d8a60350356..8eeca504121 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -30,14 +30,14 @@ namespace orc { namespace gpu { struct comp_in_out { - uint8_t const* in_ptr; - size_t in_size; - uint8_t* out_ptr; - size_t out_size; + uint8_t const* in_ptr{}; + size_t in_size{}; + uint8_t* out_ptr{}; + size_t out_size{}; }; struct compressed_stream_s { - CompressedStreamInfo info; - comp_in_out ctl; + CompressedStreamInfo info{}; + comp_in_out ctl{}; }; // blockDim {128,1,1} @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8) * @brief Shared mem state for gpuParseRowGroupIndex */ struct rowindex_state_s { - ColumnDesc chunk; - uint32_t rowgroup_start; - uint32_t rowgroup_end; - int is_compressed; - uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 - CompressedStreamInfo strm_info[2]; - RowGroup rowgroups[128]; - uint32_t compressed_offset[128][2]; + ColumnDesc chunk{}; + uint32_t rowgroup_start{}; + uint32_t rowgroup_end{}; + int is_compressed{}; + uint32_t row_index_entry[3] + [CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 + CompressedStreamInfo strm_info[2]{}; + RowGroup rowgroups[128]{}; + uint32_t compressed_offset[128][2]{}; }; enum row_entry_state_e { diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 26e3c951b2e..5e66885d746 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -26,48 +26,49 @@ namespace cudf::io::parquet::gpu { struct page_state_s { - uint8_t const* data_start; - uint8_t const* data_end; - uint8_t const* lvl_end; - uint8_t const* dict_base; // ptr to dictionary page data - int32_t dict_size; // size of dictionary data - int32_t first_row; // First row in page to output - int32_t num_rows; // Rows in page to decode (including rows to be skipped) - int32_t first_output_value; // First value in page to output - int32_t num_input_values; // total # of input/level values in the page - int32_t dtype_len; // Output data type length - int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit - int32_t dict_bits; // # of bits to store dictionary indices - uint32_t dict_run; - int32_t dict_val; - uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep] - int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep] - int32_t error; - PageInfo page; - ColumnChunkDesc col; + constexpr page_state_s() noexcept {} + uint8_t const* data_start{}; + uint8_t const* data_end{}; + uint8_t const* lvl_end{}; + uint8_t const* dict_base{}; // ptr to dictionary page data + int32_t dict_size{}; // size of dictionary data + int32_t first_row{}; // First row in page to output + int32_t num_rows{}; // Rows in page to decode (including rows to be skipped) + int32_t first_output_value{}; // First value in page to output + int32_t num_input_values{}; // total # of input/level values in the page + int32_t dtype_len{}; // Output data type length + int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit + int32_t dict_bits{}; // # of bits to store dictionary indices + uint32_t dict_run{}; + int32_t dict_val{}; + uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t error{}; + PageInfo page{}; + ColumnChunkDesc col{}; // (leaf) value decoding - int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer) - int32_t dict_pos; // write position of dictionary indices - int32_t src_pos; // input read position of final output value - int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale + int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer) + int32_t dict_pos{}; // write position of dictionary indices + int32_t src_pos{}; // input read position of final output value + int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale // repetition/definition level decoding - int32_t input_value_count; // how many values of the input we've processed - int32_t input_row_count; // how many rows of the input we've processed - int32_t input_leaf_count; // how many leaf values of the input we've processed - uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep] - int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded - int32_t row_index_lower_bound; // lower bound of row indices we should process + int32_t input_value_count{}; // how many values of the input we've processed + int32_t input_row_count{}; // how many rows of the input we've processed + int32_t input_leaf_count{}; // how many leaf values of the input we've processed + uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded + int32_t row_index_lower_bound{}; // lower bound of row indices we should process // a shared-memory cache of frequently used data when decoding. The source of this data is // normally stored in global memory which can yield poor performance. So, when possible // we copy that info here prior to decoding - PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]; + PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{}; // points to either nesting_decode_cache above when possible, or to the global source otherwise - PageNestingDecodeInfo* nesting_info; + PageNestingDecodeInfo* nesting_info{}; }; // buffers only used in the decode kernel. separated from page_state_s to keep diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 0d611643b46..6f8b2f50443 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0, ST_FLD_LIST}; struct byte_stream_s { - uint8_t const* cur; - uint8_t const* end; - uint8_t const* base; + uint8_t const* cur{}; + uint8_t const* end{}; + uint8_t const* base{}; // Parsed symbols - PageType page_type; - PageInfo page; - ColumnChunkDesc ck; + PageType page_type{}; + PageInfo page{}; + ColumnChunkDesc ck{}; }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index a3cc37dee4f..a760c2448dc 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -228,7 +228,7 @@ struct PageInfo { * @brief Struct describing a particular chunk of column data */ struct ColumnChunkDesc { - ColumnChunkDesc() = default; + constexpr ColumnChunkDesc() noexcept {}; explicit ColumnChunkDesc(size_t compressed_size_, uint8_t* compressed_data_, size_t num_values_, @@ -275,34 +275,34 @@ struct ColumnChunkDesc { { } - uint8_t const* compressed_data; // pointer to compressed column chunk data - size_t compressed_size; // total compressed data size for this chunk - size_t num_values; // total number of values in this column - size_t start_row; // starting row of this chunk - uint32_t num_rows; // number of rows in this chunk - int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level - int16_t max_nesting_depth; // max nesting depth of the output - uint16_t data_type; // basic column data type, ((type_length << 3) | - // parquet::Type) + uint8_t const* compressed_data{}; // pointer to compressed column chunk data + size_t compressed_size{}; // total compressed data size for this chunk + size_t num_values{}; // total number of values in this column + size_t start_row{}; // starting row of this chunk + uint32_t num_rows{}; // number of rows in this chunk + int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level + int16_t max_nesting_depth{}; // max nesting depth of the output + uint16_t data_type{}; // basic column data type, ((type_length << 3) | + // parquet::Type) uint8_t - level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels - int32_t num_data_pages; // number of data pages - int32_t num_dict_pages; // number of dictionary pages - int32_t max_num_pages; // size of page_info array - PageInfo* page_info; // output page info for up to num_dict_pages + - // num_data_pages (dictionary pages first) - string_index_pair* str_dict_index; // index for string dictionary - bitmask_type** valid_map_base; // base pointers of valid bit map for this column - void** column_data_base; // base pointers of column data - void** column_string_base; // base pointers of column string data - int8_t codec; // compressed codec enum - int8_t converted_type; // converted type enum - LogicalType logical_type; // logical type - int8_t decimal_precision; // Decimal precision - int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) - - int32_t src_col_index; // my input column index - int32_t src_col_schema; // my schema index in the file + level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels + int32_t num_data_pages{}; // number of data pages + int32_t num_dict_pages{}; // number of dictionary pages + int32_t max_num_pages{}; // size of page_info array + PageInfo* page_info{}; // output page info for up to num_dict_pages + + // num_data_pages (dictionary pages first) + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + int8_t codec{}; // compressed codec enum + int8_t converted_type{}; // converted type enum + LogicalType logical_type{}; // logical type + int8_t decimal_precision{}; // Decimal precision + int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + + int32_t src_col_index{}; // my input column index + int32_t src_col_schema{}; // my schema index in the file }; /** diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index 28e77f62a43..f71fb95949f 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -34,18 +34,18 @@ namespace io { * @brief shared state for statistics calculation kernel */ struct stats_state_s { - stats_column_desc col; ///< Column information - statistics_group group; ///< Group description - statistics_chunk ck; ///< Output statistics chunk + stats_column_desc col{}; ///< Column information + statistics_group group{}; ///< Group description + statistics_chunk ck{}; ///< Output statistics chunk }; /** * @brief shared state for statistics merge kernel */ struct merge_state_s { - stats_column_desc col; ///< Column information - statistics_merge_group group; ///< Group description - statistics_chunk ck; ///< Resulting statistics chunk + stats_column_desc col{}; ///< Column information + statistics_merge_group group{}; ///< Group description + statistics_chunk ck{}; ///< Resulting statistics chunk }; template diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index 805ca43553e..b6e698fee11 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -98,27 +98,27 @@ union statistics_val { }; struct statistics_chunk { - uint32_t non_nulls; //!< number of non-null values in chunk - uint32_t null_count; //!< number of null values in chunk - statistics_val min_value; //!< minimum value in chunk - statistics_val max_value; //!< maximum value in chunk - statistics_val sum; //!< sum of chunk - uint8_t has_minmax; //!< Nonzero if min_value and max_values are valid - uint8_t has_sum; //!< Nonzero if sum is valid + uint32_t non_nulls{}; //!< number of non-null values in chunk + uint32_t null_count{}; //!< number of null values in chunk + statistics_val min_value{}; //!< minimum value in chunk + statistics_val max_value{}; //!< maximum value in chunk + statistics_val sum{}; //!< sum of chunk + uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid + uint8_t has_sum{}; //!< Nonzero if sum is valid }; struct statistics_group { - stats_column_desc const* col; //!< Column information - uint32_t start_row; //!< Start row of this group - uint32_t num_rows; //!< Number of rows in group - uint32_t non_leaf_nulls; //!< Number of null non-leaf values in the group + stats_column_desc const* col{}; //!< Column information + uint32_t start_row{}; //!< Start row of this group + uint32_t num_rows{}; //!< Number of rows in group + uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group }; struct statistics_merge_group { - data_type col_dtype; //!< Column data type - statistics_dtype stats_dtype; //!< Statistics data type for this column - uint32_t start_chunk; //!< Start chunk of this group - uint32_t num_chunks; //!< Number of chunks in group + data_type col_dtype; //!< Column data type + statistics_dtype stats_dtype{dtype_none}; //!< Statistics data type for this column + uint32_t start_chunk{}; //!< Start chunk of this group + uint32_t num_chunks{}; //!< Number of chunks in group }; template >* = nullptr>