From 2656de143e049d5d0eda6b3a535e8f75f7801ca3 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 20 Jul 2023 05:33:17 -0700 Subject: [PATCH 01/22] fprintf --- cpp/CMakeLists.txt | 5 +++++ cpp/src/io/parquet/writer_impl.cu | 4 ++++ java/src/main/native/src/TableJni.cpp | 2 ++ 3 files changed, 11 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0742d039092..8ba117277f6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -643,6 +643,11 @@ set_source_files_properties( PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64" ) +set_source_files_properties( + src/io/parquet/writer_impl.cu + PROPERTIES COMPILE_FLAGS "-g" +) + set_target_properties( cudf PROPERTIES BUILD_RPATH "\$ORIGIN" diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 17a0a903a47..e25ae34cc75 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1451,6 +1451,8 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, rmm::cuda_stream_view stream) { auto vec = table_to_linked_columns(input); + fprintf(stderr, "GERA_DEBUG (L%d): %d\n", __LINE__, int(input.column(0).type().id())); + auto schema_tree = construct_schema_tree(vec, table_meta, write_mode, int96_timestamps); // Construct parquet_column_views from the schema tree leaf nodes. std::vector parquet_columns; @@ -2045,6 +2047,7 @@ void writer::impl::update_compression_statistics( void writer::impl::write(table_view const& input, std::vector const& partitions) { + fprintf(stderr, "GERA_DEBUG io::detail:parquet::write\n"); _last_write_successful = false; CUDF_EXPECTS(not _closed, "Data has already been flushed to out and closed"); @@ -2303,6 +2306,7 @@ writer::writer(std::vector> sinks, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sinks), options, mode, stream)) { + fprintf(stderr, "GERA_DEBUG (L%d) writer::writer int96_ts=%d\n", __LINE__, options.is_enabled_int96_timestamps()); } // Destructor within this translation unit diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 6360ae6093a..ae581a90c4b 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1673,6 +1673,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env, JNI_NULL_CHECK(env, j_table, "null table", ); JNI_NULL_CHECK(env, j_state, "null state", ); + fprintf(stderr, "GERA_DEBUG (L%d) Java_ai_rapids_cudf_Table_writeParquetChunk\n", __LINE__); + using namespace cudf::io; cudf::table_view *tview_with_empty_nullmask = reinterpret_cast(j_table); cudf::table_view tview = cudf::jni::remove_validity_if_needed(tview_with_empty_nullmask); From 0e3981186d6f568db9df083e456e0eddd0e0fc72 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 24 Jul 2023 16:47:09 -0700 Subject: [PATCH 02/22] printf debugging --- cpp/src/io/parquet/page_data.cu | 2 ++ cpp/src/io/parquet/page_enc.cu | 8 ++++++++ cpp/src/io/parquet/writer_impl.cu | 6 +++--- java/src/main/native/src/TableJni.cpp | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index e49378485fc..50b5ab45b59 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -147,6 +147,8 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, uint8_t const* src8; uint32_t dict_pos, dict_size = s->dict_size, ofs; + printf("\nGERA_DEBUG __device__ void gpuOutputInt96Timestamp\n"); + if (s->dict_base) { // Dictionary dict_pos = (s->dict_bits > 0) ? sb->dict_idx[rolling_index(src_pos)] : 0; diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 5136cba3ac0..60ae7032bdb 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -16,6 +16,7 @@ #include "parquet_gpu.cuh" +#include #include #include @@ -948,6 +949,9 @@ static __device__ std::pair convert_nanoseconds(timesta auto const julian_days = gregorian_days + ceil(julian_calendar_epoch_diff()); auto const last_day_ticks = nanosecond_ticks - gregorian_days; + printf("\nGERA_DEBUG static convert_nanoseconds last_day_ticks=%lld juli_days=%lld\n", + static_cast(last_day_ticks.count()), + static_cast(julian_days.count())); return {last_day_ticks, julian_days}; } @@ -1226,6 +1230,7 @@ __global__ void __launch_bounds__(128, 8) dst[pos + 7] = v >> 56; } break; case INT96: { + printf("\nGERA_DEBUG INT96 gpuEncodePages val_idx=%d\n", val_idx ); int64_t v = s->col.leaf_column->element(val_idx); int32_t ts_scale = s->col.ts_scale; if (ts_scale != 0) { @@ -1240,13 +1245,16 @@ __global__ void __launch_bounds__(128, 8) switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: case type_id::TIMESTAMP_MILLISECONDS: { + printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MILLI)SECONDS\n"); return timestamp_ns{duration_ms{v}}; } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { + printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MICRO/NANO)SECONDS\n"); return timestamp_ns{duration_us{v}}; } break; } + printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds return\n"); return timestamp_ns{duration_ns{0}}; }()); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index e25ae34cc75..51e1046ce9a 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1451,7 +1451,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, rmm::cuda_stream_view stream) { auto vec = table_to_linked_columns(input); - fprintf(stderr, "GERA_DEBUG (L%d): %d\n", __LINE__, int(input.column(0).type().id())); + fprintf(stderr, "\nGERA_DEBUG (L%d): %d\n", __LINE__, int(input.column(0).type().id())); auto schema_tree = construct_schema_tree(vec, table_meta, write_mode, int96_timestamps); // Construct parquet_column_views from the schema tree leaf nodes. @@ -2047,7 +2047,7 @@ void writer::impl::update_compression_statistics( void writer::impl::write(table_view const& input, std::vector const& partitions) { - fprintf(stderr, "GERA_DEBUG io::detail:parquet::write\n"); + fprintf(stderr, "\nGERA_DEBUG io::detail:parquet::write\n"); _last_write_successful = false; CUDF_EXPECTS(not _closed, "Data has already been flushed to out and closed"); @@ -2306,7 +2306,7 @@ writer::writer(std::vector> sinks, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sinks), options, mode, stream)) { - fprintf(stderr, "GERA_DEBUG (L%d) writer::writer int96_ts=%d\n", __LINE__, options.is_enabled_int96_timestamps()); + fprintf(stderr, "\nGERA_DEBUG (L%d) writer::writer int96_ts=%d\n", __LINE__, options.is_enabled_int96_timestamps()); } // Destructor within this translation unit diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ae581a90c4b..23c77c5f1d0 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1673,7 +1673,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env, JNI_NULL_CHECK(env, j_table, "null table", ); JNI_NULL_CHECK(env, j_state, "null state", ); - fprintf(stderr, "GERA_DEBUG (L%d) Java_ai_rapids_cudf_Table_writeParquetChunk\n", __LINE__); + fprintf(stderr, "\nGERA_DEBUG (L%d) Java_ai_rapids_cudf_Table_writeParquetChunk\n", __LINE__); using namespace cudf::io; cudf::table_view *tview_with_empty_nullmask = reinterpret_cast(j_table); From 9e54fd50e6ded25791df4880aa6dc0c17c753df3 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 25 Jul 2023 00:21:29 -0700 Subject: [PATCH 03/22] printf again Signed-off-by: Gera Shegalov --- cpp/src/io/parquet/page_enc.cu | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 60ae7032bdb..91c7adeaa7e 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -16,7 +16,6 @@ #include "parquet_gpu.cuh" -#include #include #include @@ -949,7 +948,8 @@ static __device__ std::pair convert_nanoseconds(timesta auto const julian_days = gregorian_days + ceil(julian_calendar_epoch_diff()); auto const last_day_ticks = nanosecond_ticks - gregorian_days; - printf("\nGERA_DEBUG static convert_nanoseconds last_day_ticks=%lld juli_days=%lld\n", + printf("\nGERA_DEBUG static convert_nanoseconds sizeof(ns)=%lu last_day_ticks=%lu juli_days=%lu\n", + sizeof(ns), static_cast(last_day_ticks.count()), static_cast(julian_days.count())); return {last_day_ticks, julian_days}; @@ -1230,8 +1230,10 @@ __global__ void __launch_bounds__(128, 8) dst[pos + 7] = v >> 56; } break; case INT96: { - printf("\nGERA_DEBUG INT96 gpuEncodePages val_idx=%d\n", val_idx ); int64_t v = s->col.leaf_column->element(val_idx); + uint64_t gera_uv = s->col.leaf_column->element(val_idx); + printf("\nGERA_DEBUG INT96 gpuEncodePages v=%ld uv=%lu\n", v, gera_uv); + int32_t ts_scale = s->col.ts_scale; if (ts_scale != 0) { if (ts_scale < 0) { From 0edee2c8283137e7e72f56dc605cea015bb51df3 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 25 Jul 2023 09:48:16 -0700 Subject: [PATCH 04/22] redo --- cpp/src/io/parquet/page_enc.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 91c7adeaa7e..fc597f20e36 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -922,14 +922,14 @@ static __device__ void PlainBoolEncode(page_enc_state_s* s, * @brief Determines the difference between the Proleptic Gregorian Calendar epoch (1970-01-01 * 00:00:00 UTC) and the Julian date epoch (-4713-11-24 12:00:00 UTC). * - * @return The difference between two epochs in `cuda::std::chrono::duration` format with a period - * of hours. + * @return The difference between two epochs in `cuda::std::chrono::duration` in the whole number + * of days. */ -constexpr auto julian_calendar_epoch_diff() +constexpr auto julian_calendar_epoch_diff_in_days() { using namespace cuda::std::chrono; using namespace cuda::std::chrono_literals; - return sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h); + return ceil(sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h)); } /** @@ -945,16 +945,17 @@ static __device__ std::pair convert_nanoseconds(timesta using namespace cuda::std::chrono; auto const nanosecond_ticks = ns.time_since_epoch(); auto const gregorian_days = floor(nanosecond_ticks); - auto const julian_days = gregorian_days + ceil(julian_calendar_epoch_diff()); + auto const julian_days = gregorian_days + julian_calendar_epoch_diff_in_days(); auto const last_day_ticks = nanosecond_ticks - gregorian_days; - printf("\nGERA_DEBUG static convert_nanoseconds sizeof(ns)=%lu last_day_ticks=%lu juli_days=%lu\n", - sizeof(ns), + printf("\nGERA_DEBUG static convert_nanoseconds sizeof(nanosecond_ticks)=%lu last_day_ticks=%lu juli_days=%lu\n", + sizeof(nanosecond_ticks), static_cast(last_day_ticks.count()), static_cast(julian_days.count())); return {last_day_ticks, julian_days}; } + // blockDim(128, 1, 1) template __global__ void __launch_bounds__(128, 8) From c86d8f7a12011e1a509533113b97f07314c662ea Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 25 Jul 2023 17:47:59 -0700 Subject: [PATCH 05/22] wip --- cpp/src/io/parquet/page_enc.cu | 54 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index fc597f20e36..7b324d2fce8 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -940,19 +940,28 @@ constexpr auto julian_calendar_epoch_diff_in_days() * @return std::pair where nanoseconds is the number of nanoseconds * elapsed in the day and days is the number of days from Julian epoch. */ -static __device__ std::pair convert_nanoseconds(timestamp_ns const ns) +static __device__ std::pair convert_nanoseconds(type_id tid, int64_t const v) { using namespace cuda::std::chrono; - auto const nanosecond_ticks = ns.time_since_epoch(); - auto const gregorian_days = floor(nanosecond_ticks); - auto const julian_days = gregorian_days + julian_calendar_epoch_diff_in_days(); - - auto const last_day_ticks = nanosecond_ticks - gregorian_days; - printf("\nGERA_DEBUG static convert_nanoseconds sizeof(nanosecond_ticks)=%lu last_day_ticks=%lu juli_days=%lu\n", - sizeof(nanosecond_ticks), - static_cast(last_day_ticks.count()), - static_cast(julian_days.count())); - return {last_day_ticks, julian_days}; + duration_D gregorian_days [[maybe_unused]]; + duration_ns time_of_day [[maybe_unused]]; + switch (tid) { + case type_id::TIMESTAMP_SECONDS: + case type_id::TIMESTAMP_MILLISECONDS: { + printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MILLI)SECONDS\n"); + gregorian_days = floor(duration_ms{v}); + time_of_day = duration_cast(duration_ms(v) - gregorian_days); + } break; + case type_id::TIMESTAMP_MICROSECONDS: + case type_id::TIMESTAMP_NANOSECONDS: { + printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MICRO/NANO)SECONDS\n"); + gregorian_days = floor(duration_us{v}); + time_of_day = duration_cast(duration_us(v) - gregorian_days); + } break; + } + + // auto const last_day_ticks = nanosecond_ticks - gregorian_days; + return {time_of_day, gregorian_days + julian_calendar_epoch_diff_in_days()}; } @@ -1232,9 +1241,7 @@ __global__ void __launch_bounds__(128, 8) } break; case INT96: { int64_t v = s->col.leaf_column->element(val_idx); - uint64_t gera_uv = s->col.leaf_column->element(val_idx); - printf("\nGERA_DEBUG INT96 gpuEncodePages v=%ld uv=%lu\n", v, gera_uv); - + uint64_t gera_uv = s->col.leaf_column->element(val_idx); int32_t ts_scale = s->col.ts_scale; if (ts_scale != 0) { if (ts_scale < 0) { @@ -1243,23 +1250,10 @@ __global__ void __launch_bounds__(128, 8) v *= ts_scale; } } + printf("\nGERA_DEBUG INT96 gpuEncodePages scale=%d v=%ld orig_uv=%lu\n", + ts_scale, v, gera_uv); - auto const ret = convert_nanoseconds([&]() { - switch (s->col.leaf_column->type().id()) { - case type_id::TIMESTAMP_SECONDS: - case type_id::TIMESTAMP_MILLISECONDS: { - printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MILLI)SECONDS\n"); - return timestamp_ns{duration_ms{v}}; - } break; - case type_id::TIMESTAMP_MICROSECONDS: - case type_id::TIMESTAMP_NANOSECONDS: { - printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MICRO/NANO)SECONDS\n"); - return timestamp_ns{duration_us{v}}; - } break; - } - printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds return\n"); - return timestamp_ns{duration_ns{0}}; - }()); + auto const ret = convert_nanoseconds(s->col.leaf_column->type().id(), v); // the 12 bytes of fixed length data. v = ret.first.count(); From a34239cdc31d3c566af4b488296da1e304c9696b Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 08:02:50 -0700 Subject: [PATCH 06/22] revert gera --- cpp/src/io/parquet/page_data.cu | 2 -- cpp/src/io/parquet/page_enc.cu | 5 ----- cpp/src/io/parquet/writer_impl.cu | 4 ---- java/src/main/native/src/TableJni.cpp | 2 -- 4 files changed, 13 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 4217da44560..a870d973dc1 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -148,8 +148,6 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, uint8_t const* src8; uint32_t dict_pos, dict_size = s->dict_size, ofs; - printf("\nGERA_DEBUG __device__ void gpuOutputInt96Timestamp\n"); - if (s->dict_base) { // Dictionary dict_pos = (s->dict_bits > 0) ? sb->dict_idx[rolling_index(src_pos)] : 0; diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 7b324d2fce8..b92d9f7b68a 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -948,13 +948,11 @@ static __device__ std::pair convert_nanoseconds(type_id switch (tid) { case type_id::TIMESTAMP_SECONDS: case type_id::TIMESTAMP_MILLISECONDS: { - printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MILLI)SECONDS\n"); gregorian_days = floor(duration_ms{v}); time_of_day = duration_cast(duration_ms(v) - gregorian_days); } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { - printf("\nGERA_DEBUG INT96 gpuEncodePages convert_nanoseconds TIMESTAMP_(MICRO/NANO)SECONDS\n"); gregorian_days = floor(duration_us{v}); time_of_day = duration_cast(duration_us(v) - gregorian_days); } break; @@ -1241,7 +1239,6 @@ __global__ void __launch_bounds__(128, 8) } break; case INT96: { int64_t v = s->col.leaf_column->element(val_idx); - uint64_t gera_uv = s->col.leaf_column->element(val_idx); int32_t ts_scale = s->col.ts_scale; if (ts_scale != 0) { if (ts_scale < 0) { @@ -1250,8 +1247,6 @@ __global__ void __launch_bounds__(128, 8) v *= ts_scale; } } - printf("\nGERA_DEBUG INT96 gpuEncodePages scale=%d v=%ld orig_uv=%lu\n", - ts_scale, v, gera_uv); auto const ret = convert_nanoseconds(s->col.leaf_column->type().id(), v); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 51e1046ce9a..17a0a903a47 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1451,8 +1451,6 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, rmm::cuda_stream_view stream) { auto vec = table_to_linked_columns(input); - fprintf(stderr, "\nGERA_DEBUG (L%d): %d\n", __LINE__, int(input.column(0).type().id())); - auto schema_tree = construct_schema_tree(vec, table_meta, write_mode, int96_timestamps); // Construct parquet_column_views from the schema tree leaf nodes. std::vector parquet_columns; @@ -2047,7 +2045,6 @@ void writer::impl::update_compression_statistics( void writer::impl::write(table_view const& input, std::vector const& partitions) { - fprintf(stderr, "\nGERA_DEBUG io::detail:parquet::write\n"); _last_write_successful = false; CUDF_EXPECTS(not _closed, "Data has already been flushed to out and closed"); @@ -2306,7 +2303,6 @@ writer::writer(std::vector> sinks, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sinks), options, mode, stream)) { - fprintf(stderr, "\nGERA_DEBUG (L%d) writer::writer int96_ts=%d\n", __LINE__, options.is_enabled_int96_timestamps()); } // Destructor within this translation unit diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 68907f597b7..d6ef2a1e26c 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1673,8 +1673,6 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env, JNI_NULL_CHECK(env, j_table, "null table", ); JNI_NULL_CHECK(env, j_state, "null state", ); - fprintf(stderr, "\nGERA_DEBUG (L%d) Java_ai_rapids_cudf_Table_writeParquetChunk\n", __LINE__); - using namespace cudf::io; cudf::table_view *tview_with_empty_nullmask = reinterpret_cast(j_table); cudf::table_view tview = cudf::jni::remove_validity_if_needed(tview_with_empty_nullmask); From 19555fa6920f8eccaf2996a177459513d33b320f Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 08:05:01 -0700 Subject: [PATCH 07/22] revert CMake --- cpp/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 48a177cdd38..d6e4fbeade5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -647,11 +647,6 @@ set_source_files_properties( PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64" ) -set_source_files_properties( - src/io/parquet/writer_impl.cu - PROPERTIES COMPILE_FLAGS "-g" -) - set_target_properties( cudf PROPERTIES BUILD_RPATH "\$ORIGIN" From 1cc14c93bf74b60712e48b840d392c7ab2af40ea Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 13:34:43 -0700 Subject: [PATCH 08/22] Cast last day's time to nano avoiding int64 overflow Signed-off-by: Gera Shegalov --- cpp/src/io/parquet/page_enc.cu | 58 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index b92d9f7b68a..312f50539f6 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -932,37 +932,6 @@ constexpr auto julian_calendar_epoch_diff_in_days() return ceil(sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h)); } -/** - * @brief Converts a timestamp_ns into a pair with nanoseconds since midnight and number of Julian - * days. Does not deal with time zones. Used by INT96 code. - * - * @param ns number of nanoseconds since epoch - * @return std::pair where nanoseconds is the number of nanoseconds - * elapsed in the day and days is the number of days from Julian epoch. - */ -static __device__ std::pair convert_nanoseconds(type_id tid, int64_t const v) -{ - using namespace cuda::std::chrono; - duration_D gregorian_days [[maybe_unused]]; - duration_ns time_of_day [[maybe_unused]]; - switch (tid) { - case type_id::TIMESTAMP_SECONDS: - case type_id::TIMESTAMP_MILLISECONDS: { - gregorian_days = floor(duration_ms{v}); - time_of_day = duration_cast(duration_ms(v) - gregorian_days); - } break; - case type_id::TIMESTAMP_MICROSECONDS: - case type_id::TIMESTAMP_NANOSECONDS: { - gregorian_days = floor(duration_us{v}); - time_of_day = duration_cast(duration_us(v) - gregorian_days); - } break; - } - - // auto const last_day_ticks = nanosecond_ticks - gregorian_days; - return {time_of_day, gregorian_days + julian_calendar_epoch_diff_in_days()}; -} - - // blockDim(128, 1, 1) template __global__ void __launch_bounds__(128, 8) @@ -1248,10 +1217,31 @@ __global__ void __launch_bounds__(128, 8) } } - auto const ret = convert_nanoseconds(s->col.leaf_column->type().id(), v); + auto const [gregorian_days, last_day_nanos] = ([&]() { + using namespace cuda::std::chrono; + switch (s->col.leaf_column->type().id()) { + case type_id::TIMESTAMP_SECONDS: + case type_id::TIMESTAMP_MILLISECONDS: { + auto const tmp_millis = duration_ms{v}; + auto const tmp_days = floor(tmp_millis); + auto const tmp_nanos = duration_cast(tmp_millis - tmp_days); + return std::pair{tmp_days, tmp_nanos}; + } break; + case type_id::TIMESTAMP_MICROSECONDS: + case type_id::TIMESTAMP_NANOSECONDS: { + auto const tmp_micros = duration_us{v}; + auto const tmp_days = floor(tmp_micros); + auto const tmp_nanos = duration_cast(tmp_micros - tmp_days); + return std::pair{tmp_days, tmp_nanos}; + } break; + } + return std::pair{duration_D::zero(), duration_ns::zero()}; + }()); + + auto const julian_days = gregorian_days + julian_calendar_epoch_diff_in_days(); // the 12 bytes of fixed length data. - v = ret.first.count(); + v = last_day_nanos.count(); dst[pos + 0] = v; dst[pos + 1] = v >> 8; dst[pos + 2] = v >> 16; @@ -1260,7 +1250,7 @@ __global__ void __launch_bounds__(128, 8) dst[pos + 5] = v >> 40; dst[pos + 6] = v >> 48; dst[pos + 7] = v >> 56; - uint32_t w = ret.second.count(); + uint32_t w = julian_days.count(); dst[pos + 8] = w; dst[pos + 9] = w >> 8; dst[pos + 10] = w >> 16; From edb0ede01329661a513e45bc1cb72e1ab4bf7f97 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 14:06:06 -0700 Subject: [PATCH 09/22] clang-format Signed-off-by: Gera Shegalov --- cpp/src/io/parquet/page_enc.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 312f50539f6..bf89d433dfb 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -1223,15 +1223,15 @@ __global__ void __launch_bounds__(128, 8) case type_id::TIMESTAMP_SECONDS: case type_id::TIMESTAMP_MILLISECONDS: { auto const tmp_millis = duration_ms{v}; - auto const tmp_days = floor(tmp_millis); - auto const tmp_nanos = duration_cast(tmp_millis - tmp_days); + auto const tmp_days = floor(tmp_millis); + auto const tmp_nanos = duration_cast(tmp_millis - tmp_days); return std::pair{tmp_days, tmp_nanos}; } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { auto const tmp_micros = duration_us{v}; - auto const tmp_days = floor(tmp_micros); - auto const tmp_nanos = duration_cast(tmp_micros - tmp_days); + auto const tmp_days = floor(tmp_micros); + auto const tmp_nanos = duration_cast(tmp_micros - tmp_days); return std::pair{tmp_days, tmp_nanos}; } break; } From 336ac921e85b0479e98450602792234e0ec26626 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 22:27:50 -0700 Subject: [PATCH 10/22] reduce bloat --- cpp/src/io/parquet/page_enc.cu | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index bf89d433dfb..688ecacdbe8 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -932,6 +932,17 @@ constexpr auto julian_calendar_epoch_diff_in_days() return ceil(sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h)); } +template __device__ +auto juilian_days_with_time(int64_t v) { + using namespace cuda::std::chrono; + auto const dur_total = duration{v}; + auto const dur_days = floor(dur_total); + auto const dur_time_of_day = dur_total - dur_days; + auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); + auto const julian_days = dur_days + julian_calendar_epoch_diff_in_days(); + return std::pair{dur_days, dur_time_of_day_nanos}; +} + // blockDim(128, 1, 1) template __global__ void __launch_bounds__(128, 8) @@ -1217,28 +1228,23 @@ __global__ void __launch_bounds__(128, 8) } } - auto const [gregorian_days, last_day_nanos] = ([&]() { + auto const [julian_days, last_day_nanos] = ([&]() { using namespace cuda::std::chrono; switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: case type_id::TIMESTAMP_MILLISECONDS: { - auto const tmp_millis = duration_ms{v}; - auto const tmp_days = floor(tmp_millis); - auto const tmp_nanos = duration_cast(tmp_millis - tmp_days); - return std::pair{tmp_days, tmp_nanos}; + return juilian_days_with_time(v); } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { - auto const tmp_micros = duration_us{v}; - auto const tmp_days = floor(tmp_micros); - auto const tmp_nanos = duration_cast(tmp_micros - tmp_days); - return std::pair{tmp_days, tmp_nanos}; + return juilian_days_with_time(v); } break; } return std::pair{duration_D::zero(), duration_ns::zero()}; }()); - auto const julian_days = gregorian_days + julian_calendar_epoch_diff_in_days(); + printf("\nGERA_DEBUG julian_days=%d last_day_nanos=%lld\n", + julian_days.count(), last_day_nanos.count()); // the 12 bytes of fixed length data. v = last_day_nanos.count(); From f00e065aae744d6fbf3ea1998b937f124453289f Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 26 Jul 2023 23:45:28 -0700 Subject: [PATCH 11/22] refactor --- cpp/src/io/parquet/page_enc.cu | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 688ecacdbe8..234e75b5777 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -16,6 +16,7 @@ #include "parquet_gpu.cuh" +#include #include #include @@ -23,6 +24,7 @@ #include #include +#include #include #include @@ -932,15 +934,20 @@ constexpr auto julian_calendar_epoch_diff_in_days() return ceil(sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h)); } -template __device__ -auto juilian_days_with_time(int64_t v) { - using namespace cuda::std::chrono; - auto const dur_total = duration{v}; - auto const dur_days = floor(dur_total); - auto const dur_time_of_day = dur_total - dur_days; - auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); - auto const julian_days = dur_days + julian_calendar_epoch_diff_in_days(); - return std::pair{dur_days, dur_time_of_day_nanos}; +template +__device__ std::pair juilian_days_with_time( + int64_t v) +{ + using namespace cuda::std::chrono; + auto const dur_total = duration{v}; + auto const dur_days = floor(dur_total); + auto const dur_time_of_day = dur_total - dur_days; + auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); + auto const julian_days = dur_days + julian_calendar_epoch_diff_in_days(); + printf("\n######\nGERA_DEBUG julian_days=%d last_day_nanos=%lld\n######\n", + julian_days.count(), + dur_time_of_day_nanos.count()); + return std::pair{dur_days, dur_time_of_day_nanos}; } // blockDim(128, 1, 1) @@ -1228,7 +1235,7 @@ __global__ void __launch_bounds__(128, 8) } } - auto const [julian_days, last_day_nanos] = ([&]() { + auto const& [julian_days, last_day_nanos] = ([&]() { using namespace cuda::std::chrono; switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: @@ -1240,12 +1247,9 @@ __global__ void __launch_bounds__(128, 8) return juilian_days_with_time(v); } break; } - return std::pair{duration_D::zero(), duration_ns::zero()}; + return juilian_days_with_time(0); }()); - printf("\nGERA_DEBUG julian_days=%d last_day_nanos=%lld\n", - julian_days.count(), last_day_nanos.count()); - // the 12 bytes of fixed length data. v = last_day_nanos.count(); dst[pos + 0] = v; From 7c9abbc3c6fbb911a4f0b84e82f6fd0b8d26a09d Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 27 Jul 2023 01:30:41 -0700 Subject: [PATCH 12/22] bug fix --- cpp/src/io/parquet/page_enc.cu | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 234e75b5777..6296c2dd473 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -16,7 +16,6 @@ #include "parquet_gpu.cuh" -#include #include #include @@ -24,7 +23,6 @@ #include #include -#include #include #include @@ -924,30 +922,34 @@ static __device__ void PlainBoolEncode(page_enc_state_s* s, * @brief Determines the difference between the Proleptic Gregorian Calendar epoch (1970-01-01 * 00:00:00 UTC) and the Julian date epoch (-4713-11-24 12:00:00 UTC). * - * @return The difference between two epochs in `cuda::std::chrono::duration` in the whole number - * of days. + * @return The difference between two epochs in `cuda::std::chrono::duration` format with a period + * of hours. */ -constexpr auto julian_calendar_epoch_diff_in_days() +constexpr auto julian_calendar_epoch_diff() { using namespace cuda::std::chrono; using namespace cuda::std::chrono_literals; - return ceil(sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h)); + return sys_days{January / 1 / 1970} - (sys_days{November / 24 / -4713} + 12h); } +/** + * @brief Converts number of periods Per into a pair with nanoseconds since midnight + * and number of Julian days. Does not deal with time zones. Used by INT96 code. + * + * @param v int64_t count of ticks since epoch + * @return std::pair where nanoseconds is the number of nanoseconds + * elapsed in the day and days is the number of days from Julian epoch. + */ template -__device__ std::pair juilian_days_with_time( - int64_t v) +__device__ auto juilian_days_with_time(int64_t v) { using namespace cuda::std::chrono; auto const dur_total = duration{v}; auto const dur_days = floor(dur_total); auto const dur_time_of_day = dur_total - dur_days; auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); - auto const julian_days = dur_days + julian_calendar_epoch_diff_in_days(); - printf("\n######\nGERA_DEBUG julian_days=%d last_day_nanos=%lld\n######\n", - julian_days.count(), - dur_time_of_day_nanos.count()); - return std::pair{dur_days, dur_time_of_day_nanos}; + auto const julian_days = dur_days + ceil(julian_calendar_epoch_diff()); + return std::make_pair(dur_time_of_day_nanos, julian_days); } // blockDim(128, 1, 1) @@ -1235,7 +1237,7 @@ __global__ void __launch_bounds__(128, 8) } } - auto const& [julian_days, last_day_nanos] = ([&]() { + auto const& [last_day_nanos, julian_days] = ([&]() { using namespace cuda::std::chrono; switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: From 857158184a470952d658d017e0f6a5beb25a5d5b Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 29 Jul 2023 13:31:35 -0700 Subject: [PATCH 13/22] gtest --- cpp/tests/io/parquet_test.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index ff8d308318a..59b3e5d4147 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -5618,4 +5619,26 @@ TEST_F(ParquetWriterTest, NoNullsAsNonNullable) EXPECT_NO_THROW(cudf::io::write_parquet(out_opts)); } +TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow) +{ + using namespace cudf::io; + + // used to be corrupted by round-tripping via INT96 + // 3023-07-14T07:38:45.418688Z + column_wrapper big_ts_col{33246229125418688}; + table_view expected({big_ts_col}); + auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet"); + + auto const out_opts = parquet_writer_options::builder(sink_info{filepath}, expected) + .int96_timestamps(true).build(); + write_parquet(out_opts); + + auto const in_opts = parquet_reader_options::builder(source_info(filepath)) + .timestamp_type(cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS)) + .build(); + auto const result = read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + CUDF_TEST_PROGRAM_MAIN() From 2c15c0eab853b66e604f3f7a7c43050c3ab51ae4 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 29 Jul 2023 13:35:38 -0700 Subject: [PATCH 14/22] clang-format --- cpp/tests/io/parquet_test.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 59b3e5d4147..76125bcf041 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -5621,24 +5621,24 @@ TEST_F(ParquetWriterTest, NoNullsAsNonNullable) TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow) { - using namespace cudf::io; + using namespace cudf::io; - // used to be corrupted by round-tripping via INT96 - // 3023-07-14T07:38:45.418688Z - column_wrapper big_ts_col{33246229125418688}; - table_view expected({big_ts_col}); - auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet"); + // used to be corrupted by round-tripping via INT96 + // 3023-07-14T07:38:45.418688Z + column_wrapper big_ts_col{33246229125418688}; + table_view expected({big_ts_col}); + auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet"); - auto const out_opts = parquet_writer_options::builder(sink_info{filepath}, expected) - .int96_timestamps(true).build(); - write_parquet(out_opts); + auto const out_opts = + parquet_writer_options::builder(sink_info{filepath}, expected).int96_timestamps(true).build(); + write_parquet(out_opts); - auto const in_opts = parquet_reader_options::builder(source_info(filepath)) - .timestamp_type(cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS)) - .build(); - auto const result = read_parquet(in_opts); + auto const in_opts = parquet_reader_options::builder(source_info(filepath)) + .timestamp_type(cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS)) + .build(); + auto const result = read_parquet(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } CUDF_TEST_PROGRAM_MAIN() From b6b65baf05c0978b4650c21f8609a103e6d83a82 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 01:33:09 -0700 Subject: [PATCH 15/22] literal dates in test --- cpp/tests/io/parquet_test.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 72429510500..3615f1c28a9 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -6414,11 +6415,14 @@ TEST_F(ParquetReaderTest, FilterFloatNAN) TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow) { + using namespace cuda::std::chrono; using namespace cudf::io; - // used to be corrupted by round-tripping via INT96 - // 3023-07-14T07:38:45.418688Z - column_wrapper big_ts_col{33246229125418688}; + column_wrapper big_ts_col{ + sys_days{year{3023} / month{7} / day{14}} + 7h + 38min + 45s + 418688us, + sys_days{year{723} / month{3} / day{21}} + 14h + 20min + 13s + microseconds{781ms} + }; + table_view expected({big_ts_col}); auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet"); From 5c8579087038cddccd03a727a1f9399086aaf3db Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 02:12:36 -0700 Subject: [PATCH 16/22] clang-format --- cpp/tests/io/parquet_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 3615f1c28a9..4e28f536728 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -6420,8 +6419,7 @@ TEST_F(ParquetWriterTest, TimestampMicrosINT96NoOverflow) column_wrapper big_ts_col{ sys_days{year{3023} / month{7} / day{14}} + 7h + 38min + 45s + 418688us, - sys_days{year{723} / month{3} / day{21}} + 14h + 20min + 13s + microseconds{781ms} - }; + sys_days{year{723} / month{3} / day{21}} + 14h + 20min + 13s + microseconds{781ms}}; table_view expected({big_ts_col}); auto filepath = temp_env->get_temp_filepath("BigINT96Timestamp.parquet"); From 22d00bacc915df734997b3322a8eda56658887c5 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 09:27:06 -0700 Subject: [PATCH 17/22] Apply suggestions from code review Co-authored-by: Bradley Dice --- cpp/src/io/parquet/page_enc.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 6296c2dd473..70567b2e55e 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -936,12 +936,12 @@ constexpr auto julian_calendar_epoch_diff() * @brief Converts number of periods Per into a pair with nanoseconds since midnight * and number of Julian days. Does not deal with time zones. Used by INT96 code. * - * @param v int64_t count of ticks since epoch - * @return std::pair where nanoseconds is the number of nanoseconds + * @param v count of ticks since epoch + * @return A pair of (nanoseconds, days) where nanoseconds is the number of nanoseconds * elapsed in the day and days is the number of days from Julian epoch. */ template -__device__ auto juilian_days_with_time(int64_t v) +__device__ auto julian_days_with_time(int64_t v) { using namespace cuda::std::chrono; auto const dur_total = duration{v}; @@ -1242,7 +1242,7 @@ __global__ void __launch_bounds__(128, 8) switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: case type_id::TIMESTAMP_MILLISECONDS: { - return juilian_days_with_time(v); + return julian_days_with_time(v); } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { From 613112ad1125fc6f96761d17abc8aad1293ba570 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 11:05:56 -0700 Subject: [PATCH 18/22] reviews --- cpp/src/io/parquet/page_enc.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 70567b2e55e..aada23d229b 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -936,15 +936,16 @@ constexpr auto julian_calendar_epoch_diff() * @brief Converts number of periods Per into a pair with nanoseconds since midnight * and number of Julian days. Does not deal with time zones. Used by INT96 code. * + * @tparam Period_t a ratio representing the tick period in duration * @param v count of ticks since epoch * @return A pair of (nanoseconds, days) where nanoseconds is the number of nanoseconds * elapsed in the day and days is the number of days from Julian epoch. */ -template +template __device__ auto julian_days_with_time(int64_t v) { using namespace cuda::std::chrono; - auto const dur_total = duration{v}; + auto const dur_total = duration{v}; auto const dur_days = floor(dur_total); auto const dur_time_of_day = dur_total - dur_days; auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); @@ -1246,10 +1247,10 @@ __global__ void __launch_bounds__(128, 8) } break; case type_id::TIMESTAMP_MICROSECONDS: case type_id::TIMESTAMP_NANOSECONDS: { - return juilian_days_with_time(v); + return julian_days_with_time(v); } break; } - return juilian_days_with_time(0); + return julian_days_with_time(0); }()); // the 12 bytes of fixed length data. From 11973d17ffdedc6e71c187baa4681cc8eed23635 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 11:14:59 -0700 Subject: [PATCH 19/22] Review --- cpp/src/io/parquet/page_enc.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index aada23d229b..060e2a50bbb 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -936,16 +936,16 @@ constexpr auto julian_calendar_epoch_diff() * @brief Converts number of periods Per into a pair with nanoseconds since midnight * and number of Julian days. Does not deal with time zones. Used by INT96 code. * - * @tparam Period_t a ratio representing the tick period in duration + * @tparam PeriodT a ratio representing the tick period in duration * @param v count of ticks since epoch * @return A pair of (nanoseconds, days) where nanoseconds is the number of nanoseconds * elapsed in the day and days is the number of days from Julian epoch. */ -template +template __device__ auto julian_days_with_time(int64_t v) { using namespace cuda::std::chrono; - auto const dur_total = duration{v}; + auto const dur_total = duration{v}; auto const dur_days = floor(dur_total); auto const dur_time_of_day = dur_total - dur_days; auto const dur_time_of_day_nanos = duration_cast(dur_time_of_day); From 6a046049fe43eb83646a6c44b27cd8f913068b0c Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 11:49:17 -0700 Subject: [PATCH 20/22] Empty commit to retrigger CI. From 618b26a35976c490b463f78287dd74d149f790e2 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 14:12:26 -0700 Subject: [PATCH 21/22] Apply suggestions from code review Co-authored-by: MithunR --- cpp/src/io/parquet/page_enc.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 060e2a50bbb..8a731d2706d 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -933,7 +933,7 @@ constexpr auto julian_calendar_epoch_diff() } /** - * @brief Converts number of periods Per into a pair with nanoseconds since midnight + * @brief Converts number `v` of periods of type `PeriodT` into a pair with nanoseconds since midnight * and number of Julian days. Does not deal with time zones. Used by INT96 code. * * @tparam PeriodT a ratio representing the tick period in duration From 9c783cd969f331195d6249773663c137fe2394dd Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 1 Aug 2023 15:09:04 -0700 Subject: [PATCH 22/22] review --- cpp/src/io/parquet/page_enc.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 8a731d2706d..05f8bba7477 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -933,8 +933,8 @@ constexpr auto julian_calendar_epoch_diff() } /** - * @brief Converts number `v` of periods of type `PeriodT` into a pair with nanoseconds since midnight - * and number of Julian days. Does not deal with time zones. Used by INT96 code. + * @brief Converts number `v` of periods of type `PeriodT` into a pair with nanoseconds since + * midnight and number of Julian days. Does not deal with time zones. Used by INT96 code. * * @tparam PeriodT a ratio representing the tick period in duration * @param v count of ticks since epoch @@ -1238,7 +1238,7 @@ __global__ void __launch_bounds__(128, 8) } } - auto const& [last_day_nanos, julian_days] = ([&]() { + auto const [last_day_nanos, julian_days] = [&] { using namespace cuda::std::chrono; switch (s->col.leaf_column->type().id()) { case type_id::TIMESTAMP_SECONDS: @@ -1251,7 +1251,7 @@ __global__ void __launch_bounds__(128, 8) } break; } return julian_days_with_time(0); - }()); + }(); // the 12 bytes of fixed length data. v = last_day_nanos.count();