From c8c8c36b3cbd11f9a8af4f15429382254ff83169 Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Thu, 1 Aug 2024 16:39:42 -0700 Subject: [PATCH] Remove old time zone ID based API from Timestamp (#10632) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10632 With the recent refactor, all conversions can now be done through the TimeZone pointer, so there is not need to do multiple hops of conversion from ID to name, from name to time zone object. Removing the legacy APIs and cleaning up callsites. Differential Revision: D60477527 --- velox/expression/PrestoCastHooks.cpp | 4 +- velox/functions/prestosql/DateTimeFunctions.h | 46 ++++++------ .../prestosql/tests/DateTimeFunctionsTest.cpp | 6 +- .../types/TimestampWithTimeZoneType.cpp | 35 ++++----- velox/functions/sparksql/DateTimeFunctions.h | 75 ++++++++----------- velox/functions/sparksql/MakeTimestamp.cpp | 28 +++---- velox/type/Timestamp.cpp | 36 --------- velox/type/Timestamp.h | 6 -- velox/type/TimestampConversion.cpp | 13 ++-- velox/type/TimestampConversion.h | 16 ++-- velox/type/tests/TimestampConversionTest.cpp | 64 ++++++++-------- velox/type/tz/TimeZoneMap.cpp | 26 ++++--- velox/type/tz/TimeZoneMap.h | 8 +- 13 files changed, 165 insertions(+), 198 deletions(-) diff --git a/velox/expression/PrestoCastHooks.cpp b/velox/expression/PrestoCastHooks.cpp index 6585be5338e0..4876ef31aa3c 100644 --- a/velox/expression/PrestoCastHooks.cpp +++ b/velox/expression/PrestoCastHooks.cpp @@ -54,8 +54,8 @@ Expected PrestoCastHooks::castStringToTimestamp( // If the parsed string has timezone information, convert the timestamp at // GMT at that time. For example, "1970-01-01 00:00:00 -00:01" is 60 seconds // at GMT. - if (result.second != -1) { - result.first.toGMT(result.second); + if (result.second != nullptr) { + result.first.toGMT(*result.second); } // If no timezone information is available in the input string, check if we diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 51eb5f03d89f..bb09e7ebd3a4 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -116,7 +116,8 @@ struct TimestampWithTimezoneSupport { bool asGMT = false) { auto timestamp = unpackTimestampUtc(timestampWithTimezone); if (!asGMT) { - timestamp.toTimezone(unpackZoneKeyId(timestampWithTimezone)); + timestamp.toTimezone( + *tz::locateZone(unpackZoneKeyId(timestampWithTimezone))); } return timestamp; @@ -129,7 +130,7 @@ struct TimestampWithTimezoneSupport { Timestamp inputTimeStamp = this->toTimestamp(timestampWithTimezone); // Create a copy of inputTimeStamp and convert it to GMT auto gmtTimeStamp = inputTimeStamp; - gmtTimeStamp.toGMT(unpackZoneKeyId(timestampWithTimezone)); + gmtTimeStamp.toGMT(*tz::locateZone(unpackZoneKeyId(timestampWithTimezone))); // Get offset in seconds with GMT and convert to hour return (inputTimeStamp.getSeconds() - gmtTimeStamp.getSeconds()); @@ -1166,7 +1167,7 @@ struct DateTruncFunction : public TimestampWithTimezoneSupport { adjustDateTime(dateTime, unit); timestamp = Timestamp::fromMillis(Timestamp::calendarUtcToEpoch(dateTime) * 1000); - timestamp.toGMT(unpackZoneKeyId(timestampWithTimezone)); + timestamp.toGMT(*tz::locateZone(unpackZoneKeyId(timestampWithTimezone))); result = pack(timestamp, unpackZoneKeyId(timestampWithTimezone)); } @@ -1472,7 +1473,7 @@ struct FromIso8601Timestamp { const arg_type* /*input*/) { auto sessionTzName = config.sessionTimezone(); if (!sessionTzName.empty()) { - sessionTzID_ = tz::getTimeZoneID(sessionTzName); + sessionTimeZone_ = tz::locateZone(sessionTzName); } } @@ -1485,19 +1486,19 @@ struct FromIso8601Timestamp { return castResult.error(); } - auto [ts, tzID] = castResult.value(); + auto [ts, timeZone] = castResult.value(); // Input string may not contain a timezone - if so, it is interpreted in // session timezone. - if (tzID == -1) { - tzID = sessionTzID_; + if (timeZone == nullptr) { + timeZone = sessionTimeZone_; } - ts.toGMT(tzID); - result = pack(ts, tzID); + ts.toGMT(*timeZone); + result = pack(ts, timeZone->id()); return Status::OK(); } private: - int16_t sessionTzID_{0}; + const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. }; template @@ -1505,7 +1506,9 @@ struct DateParseFunction { VELOX_DEFINE_FUNCTION_TYPES(T); std::shared_ptr format_; - std::optional sessionTzID_; + + // By default, assume 0 (GMT). + const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; bool isConstFormat_ = false; FOLLY_ALWAYS_INLINE void initialize( @@ -1521,7 +1524,7 @@ struct DateParseFunction { auto sessionTzName = config.sessionTimezone(); if (!sessionTzName.empty()) { - sessionTzID_ = tz::getTimeZoneID(sessionTzName); + sessionTimeZone_ = tz::locateZone(sessionTzName); } } @@ -1539,10 +1542,7 @@ struct DateParseFunction { return dateTimeResult.error(); } - // Since MySql format has no timezone specifier, simply check if session - // timezone was provided. If not, fallback to 0 (GMT). - int16_t timezoneId = sessionTzID_.value_or(0); - dateTimeResult->timestamp.toGMT(timezoneId); + dateTimeResult->timestamp.toGMT(*sessionTimeZone_); result = dateTimeResult->timestamp; return Status::OK(); } @@ -1623,7 +1623,7 @@ struct ParseDateTimeFunction { VELOX_DEFINE_FUNCTION_TYPES(T); std::shared_ptr format_; - std::optional sessionTzID_; + const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // GMT bool isConstFormat_ = false; FOLLY_ALWAYS_INLINE void initialize( @@ -1639,7 +1639,7 @@ struct ParseDateTimeFunction { auto sessionTzName = config.sessionTimezone(); if (!sessionTzName.empty()) { - sessionTzID_ = tz::getTimeZoneID(sessionTzName); + sessionTimeZone_ = tz::locateZone(sessionTzName); } } @@ -1659,11 +1659,11 @@ struct ParseDateTimeFunction { // If timezone was not parsed, fallback to the session timezone. If there's // no session timezone, fallback to 0 (GMT). - int16_t timezoneId = dateTimeResult->timezoneId != -1 - ? dateTimeResult->timezoneId - : sessionTzID_.value_or(0); - dateTimeResult->timestamp.toGMT(timezoneId); - result = pack(dateTimeResult->timestamp, timezoneId); + const auto* timeZone = dateTimeResult->timezoneId != -1 + ? tz::locateZone(dateTimeResult->timezoneId) + : sessionTimeZone_; + dateTimeResult->timestamp.toGMT(*timeZone); + result = pack(dateTimeResult->timestamp, timeZone->id()); return Status::OK(); } }; diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 474ced3ae20d..b93b0a8fab11 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -4461,14 +4461,14 @@ TEST_F(DateTimeFunctionsTest, toISO8601Timestamp) { TEST_F(DateTimeFunctionsTest, toISO8601TimestampWithTimezone) { const auto toIso = [&](const char* timestamp, const char* timezone) { - const auto tzId = tz::getTimeZoneID(timezone); + const auto* timeZone = tz::locateZone(timezone); auto ts = parseTimestamp(timestamp); - ts.toGMT(tzId); + ts.toGMT(*timeZone); return evaluateOnce( "to_iso8601(c0)", TIMESTAMP_WITH_TIME_ZONE(), - std::make_optional(pack(ts.toMillis(), tzId))); + std::make_optional(pack(ts.toMillis(), timeZone->id()))); }; EXPECT_EQ( diff --git a/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp b/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp index 038e10c8a8cf..bf8bd4a0cbd4 100644 --- a/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp +++ b/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp @@ -23,14 +23,14 @@ namespace facebook::velox { namespace { -int64_t getTimeZoneIDFromConfig(const core::QueryConfig& config) { +const tz::TimeZone* getTimeZoneFromConfig(const core::QueryConfig& config) { const auto sessionTzName = config.sessionTimezone(); if (!sessionTzName.empty()) { - return tz::getTimeZoneID(sessionTzName); + return tz::locateZone(sessionTzName); } - return 0; + return tz::locateZone(0); // GMT } void castFromTimestamp( @@ -39,7 +39,7 @@ void castFromTimestamp( const SelectivityVector& rows, int64_t* rawResults) { const auto& config = context.execCtx()->queryCtx()->queryConfig(); - int64_t sessionTzID = getTimeZoneIDFromConfig(config); + const auto* sessionTimeZone = getTimeZoneFromConfig(config); const auto adjustTimestampToTimezone = config.adjustTimestampToTimezone(); @@ -49,9 +49,9 @@ void castFromTimestamp( // Treat TIMESTAMP as wall time in session time zone. This means that in // order to get its UTC representation we need to shift the value by the // offset of the time zone. - ts.toGMT(sessionTzID); + ts.toGMT(*sessionTimeZone); } - rawResults[row] = pack(ts.toMillis(), sessionTzID); + rawResults[row] = pack(ts.toMillis(), sessionTimeZone->id()); }); } @@ -61,17 +61,17 @@ void castFromDate( const SelectivityVector& rows, int64_t* rawResults) { const auto& config = context.execCtx()->queryCtx()->queryConfig(); - int64_t sessionTzID = getTimeZoneIDFromConfig(config); + const auto* sessionTimeZone = getTimeZoneFromConfig(config); static const int64_t kSecondsInDay = 86400; context.applyToSelectedNoThrow(rows, [&](auto row) { const auto days = inputVector.valueAt(row); Timestamp ts{days * kSecondsInDay, 0}; - if (sessionTzID != 0) { - ts.toGMT(sessionTzID); + if (sessionTimeZone != nullptr) { + ts.toGMT(*sessionTimeZone); } - rawResults[row] = pack(ts.toMillis(), sessionTzID); + rawResults[row] = pack(ts.toMillis(), sessionTimeZone->id()); }); } @@ -88,15 +88,15 @@ void castFromString( if (castResult.hasError()) { context.setStatus(row, castResult.error()); } else { - auto [ts, tzID] = castResult.value(); + auto [ts, timeZone] = castResult.value(); // Input string may not contain a timezone - if so, it is interpreted in // session timezone. - if (tzID == -1) { + if (timeZone == nullptr) { const auto& config = context.execCtx()->queryCtx()->queryConfig(); - tzID = getTimeZoneIDFromConfig(config); + timeZone = getTimeZoneFromConfig(config); } - ts.toGMT(tzID); - rawResults[row] = pack(ts.toMillis(), tzID); + ts.toGMT(*timeZone); + rawResults[row] = pack(ts.toMillis(), timeZone->id()); } }); } @@ -146,7 +146,7 @@ void castToTimestamp( auto ts = unpackTimestampUtc(timestampWithTimezone); if (!adjustTimestampToTimezone) { // Convert UTC to the given time zone. - ts.toTimezone(unpackZoneKeyId(timestampWithTimezone)); + ts.toTimezone(*tz::locateZone(unpackZoneKeyId(timestampWithTimezone))); } flatResult->set(row, ts); }); @@ -163,7 +163,8 @@ void castToDate( context.applyToSelectedNoThrow(rows, [&](auto row) { auto timestampWithTimezone = timestampVector->valueAt(row); auto timestamp = unpackTimestampUtc(timestampWithTimezone); - timestamp.toTimezone(unpackZoneKeyId(timestampWithTimezone)); + timestamp.toTimezone( + *tz::locateZone(unpackZoneKeyId(timestampWithTimezone))); const auto days = util::toDate(timestamp, nullptr); flatResult->set(row, days); diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 7b9652e586b9..a5a70e0b568b 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -163,13 +163,12 @@ struct UnixTimestampParseFunction { FOLLY_ALWAYS_INLINE bool call( int64_t& result, const arg_type& input) { - auto dateTimeResult = - format_->parse(std::string_view(input.data(), input.size())); + auto dateTimeResult = format_->parse(std::string_view(input)); // Return null if could not parse. if (dateTimeResult.hasError()) { return false; } - (*dateTimeResult).timestamp.toGMT(getTimezoneId(*dateTimeResult)); + (*dateTimeResult).timestamp.toGMT(*getTimeZone(*dateTimeResult)); result = (*dateTimeResult).timestamp.getSeconds(); return true; } @@ -178,21 +177,20 @@ struct UnixTimestampParseFunction { void setTimezone(const core::QueryConfig& config) { auto sessionTzName = config.sessionTimezone(); if (!sessionTzName.empty()) { - sessionTzID_ = tz::getTimeZoneID(sessionTzName); + sessionTimeZone_ = tz::locateZone(sessionTzName); } } - int16_t getTimezoneId(const DateTimeResult& result) { - // If timezone was not parsed, fallback to the session timezone. If there's - // no session timezone, fallback to 0 (GMT). - return result.timezoneId != -1 ? result.timezoneId - : sessionTzID_.value_or(0); + const tz::TimeZone* getTimeZone(const DateTimeResult& result) { + // If timezone was not parsed, fallback to the session timezone. + return result.timezoneId != -1 ? tz::locateZone(result.timezoneId) + : sessionTimeZone_; } // Default if format is not specified, as per Spark documentation. constexpr static std::string_view kDefaultFormat_{"yyyy-MM-dd HH:mm:ss"}; std::shared_ptr format_; - std::optional sessionTzID_; + const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // fallback to GMT. }; template @@ -242,7 +240,7 @@ struct UnixTimestampParseWithFormatFunction if (dateTimeResult.hasError()) { return false; } - (*dateTimeResult).timestamp.toGMT(this->getTimezoneId(*dateTimeResult)); + (*dateTimeResult).timestamp.toGMT(*this->getTimeZone(*dateTimeResult)); result = (*dateTimeResult).timestamp.getSeconds(); return true; } @@ -307,8 +305,7 @@ struct ToUtcTimestampFunction { const arg_type* /*input*/, const arg_type* timezone) { if (timezone) { - tzID_ = tz::getTimeZoneID( - std::string_view((*timezone).data(), (*timezone).size()), false); + timeZone_ = tz::locateZone(std::string_view(*timezone), false); } } @@ -317,17 +314,16 @@ struct ToUtcTimestampFunction { const arg_type& timestamp, const arg_type& timezone) { result = timestamp; - auto fromTimezoneID = tzID_ - ? tzID_.value() - : tz::getTimeZoneID( - std::string_view(timezone.data(), timezone.size()), false); - VELOX_USER_CHECK_NE( - fromTimezoneID, -1, "Unknown time zone: '{}'", timezone); - result.toGMT(fromTimezoneID); + const auto* fromTimezone = timeZone_ != nullptr + ? timeZone_ + : tz::locateZone(std::string_view(timezone), false); + VELOX_USER_CHECK_NOT_NULL( + fromTimezone, "Unknown time zone: '{}'", timezone); + result.toGMT(*fromTimezone); } private: - std::optional tzID_{std::nullopt}; + const tz::TimeZone* timeZone_{nullptr}; }; template @@ -340,8 +336,7 @@ struct FromUtcTimestampFunction { const arg_type* /*input*/, const arg_type* timezone) { if (timezone) { - tzID_ = tz::getTimeZoneID( - std::string_view((*timezone).data(), (*timezone).size()), false); + timeZone_ = tz::locateZone(std::string_view(*timezone), false); } } @@ -350,16 +345,15 @@ struct FromUtcTimestampFunction { const arg_type& timestamp, const arg_type& timezone) { result = timestamp; - auto toTimezoneID = tzID_ - ? tzID_.value() - : tz::getTimeZoneID( - std::string_view(timezone.data(), timezone.size()), false); - VELOX_USER_CHECK_NE(toTimezoneID, -1, "Unknown time zone: '{}'", timezone); - result.toTimezone(toTimezoneID); + const auto* toTimeZone = timeZone_ != nullptr + ? timeZone_ + : tz::locateZone(std::string_view(timezone), false); + VELOX_USER_CHECK_NOT_NULL(toTimeZone, "Unknown time zone: '{}'", timezone); + result.toTimezone(*toTimeZone); } private: - std::optional tzID_{std::nullopt}; + const tz::TimeZone* timeZone_{nullptr}; }; /// Converts date string to Timestmap type. @@ -374,11 +368,10 @@ struct GetTimestampFunction { const arg_type* format) { auto sessionTimezoneName = config.sessionTimezone(); if (!sessionTimezoneName.empty()) { - sessionTimezoneId_ = tz::getTimeZoneID(sessionTimezoneName); + sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - formatter_ = buildJodaDateTimeFormatter( - std::string_view(format->data(), format->size())); + formatter_ = buildJodaDateTimeFormatter(std::string_view(*format)); isConstantTimeFormat_ = true; } } @@ -388,31 +381,29 @@ struct GetTimestampFunction { const arg_type& input, const arg_type& format) { if (!isConstantTimeFormat_) { - formatter_ = buildJodaDateTimeFormatter( - std::string_view(format.data(), format.size())); + formatter_ = buildJodaDateTimeFormatter(std::string_view(format)); } - auto dateTimeResult = - formatter_->parse(std::string_view(input.data(), input.size())); + auto dateTimeResult = formatter_->parse(std::string_view(input)); // Null as result for parsing error. if (dateTimeResult.hasError()) { return false; } - (*dateTimeResult).timestamp.toGMT(getTimezoneId(*dateTimeResult)); + (*dateTimeResult).timestamp.toGMT(*getTimeZone(*dateTimeResult)); result = (*dateTimeResult).timestamp; return true; } private: - int16_t getTimezoneId(const DateTimeResult& result) const { + const tz::TimeZone* getTimeZone(const DateTimeResult& result) const { // If timezone was not parsed, fallback to the session timezone. If there's // no session timezone, fallback to 0 (GMT). - return result.timezoneId != -1 ? result.timezoneId - : sessionTimezoneId_.value_or(0); + return result.timezoneId != -1 ? tz::locateZone(result.timezoneId) + : sessionTimeZone_; } std::shared_ptr formatter_{nullptr}; bool isConstantTimeFormat_{false}; - std::optional sessionTimezoneId_; + const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. }; template diff --git a/velox/functions/sparksql/MakeTimestamp.cpp b/velox/functions/sparksql/MakeTimestamp.cpp index ddfbe8df782a..b2904d25540e 100644 --- a/velox/functions/sparksql/MakeTimestamp.cpp +++ b/velox/functions/sparksql/MakeTimestamp.cpp @@ -72,10 +72,10 @@ void setTimestampOrNull( std::optional timestamp, DecodedVector* timeZoneVector, FlatVector* result) { - const auto timeZone = timeZoneVector->valueAt(row); - const auto tzID = tz::getTimeZoneID(std::string_view(timeZone)); + const auto timeZoneName = timeZoneVector->valueAt(row); + const auto* timeZone = tz::locateZone(std::string_view(timeZoneName)); if (timestamp.has_value()) { - (*timestamp).toGMT(tzID); + timestamp->toGMT(*timeZone); result->set(row, *timestamp); } else { result->setNull(row, true); @@ -85,10 +85,10 @@ void setTimestampOrNull( void setTimestampOrNull( int32_t row, std::optional timestamp, - int64_t tzID, + const tz::TimeZone* timeZone, FlatVector* result) { if (timestamp.has_value()) { - (*timestamp).toGMT(tzID); + timestamp->toGMT(*timeZone); result->set(row, *timestamp); } else { result->setNull(row, true); @@ -97,7 +97,8 @@ void setTimestampOrNull( class MakeTimestampFunction : public exec::VectorFunction { public: - MakeTimestampFunction(int64_t sessionTzID) : sessionTzID_(sessionTzID) {} + MakeTimestampFunction(const tz::TimeZone* sessionTimeZone) + : sessionTimeZone_(sessionTimeZone) {} void apply( const SelectivityVector& rows, @@ -122,9 +123,9 @@ class MakeTimestampFunction : public exec::VectorFunction { if (args[6]->isConstantEncoding()) { auto tz = args[6]->asUnchecked>()->valueAt(0); - int64_t constantTzID; + const tz::TimeZone* constantTimeZone = nullptr; try { - constantTzID = tz::getTimeZoneID(std::string_view(tz)); + constantTimeZone = tz::locateZone(std::string_view(tz)); } catch (const VeloxException&) { context.setErrors(rows, std::current_exception()); return; @@ -132,7 +133,8 @@ class MakeTimestampFunction : public exec::VectorFunction { rows.applyToSelected([&](vector_size_t row) { auto timestamp = makeTimeStampFromDecodedArgs( row, year, month, day, hour, minute, micros); - setTimestampOrNull(row, timestamp, constantTzID, resultFlatVector); + setTimestampOrNull( + row, timestamp, constantTimeZone, resultFlatVector); }); } else { auto* timeZone = decodedArgs.at(6); @@ -147,7 +149,7 @@ class MakeTimestampFunction : public exec::VectorFunction { rows.applyToSelected([&](vector_size_t row) { auto timestamp = makeTimeStampFromDecodedArgs( row, year, month, day, hour, minute, micros); - setTimestampOrNull(row, timestamp, sessionTzID_, resultFlatVector); + setTimestampOrNull(row, timestamp, sessionTimeZone_, resultFlatVector); }); } } @@ -179,7 +181,7 @@ class MakeTimestampFunction : public exec::VectorFunction { } private: - const int64_t sessionTzID_; + const tz::TimeZone* sessionTimeZone_; }; std::shared_ptr createMakeTimestampFunction( @@ -190,7 +192,7 @@ std::shared_ptr createMakeTimestampFunction( VELOX_USER_CHECK( !sessionTzName.empty(), "make_timestamp requires session time zone to be set.") - const auto sessionTzID = tz::getTimeZoneID(sessionTzName); + const auto* sessionTimeZone = tz::locateZone(sessionTzName); const auto& secondsType = inputArgs[5].type; VELOX_USER_CHECK( @@ -204,7 +206,7 @@ std::shared_ptr createMakeTimestampFunction( "Seconds fraction must have 6 digits for microseconds but got {}", secondsScale); - return std::make_shared(sessionTzID); + return std::make_shared(sessionTimeZone); } } // namespace diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 6bcf2e41114a..deb229fbd73e 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -21,20 +21,6 @@ #include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox { -namespace { - -// Assuming tzID is in [1, 1680] range. -// tzID - PrestoDB time zone ID. -inline int64_t getPrestoTZOffsetInSeconds(int16_t tzID) { - // TODO(spershin): Maybe we need something better if we can (we could use - // precomputed vector for PrestoDB timezones, for instance). - - // PrestoDb time zone ids require some custom code. - // Mapping is 1-based and covers [-14:00, +14:00] range without 00:00. - return ((tzID <= 840) ? (tzID - 841) : (tzID - 840)) * 60; -} - -} // namespace // static Timestamp Timestamp::fromDaysAndNanos(int32_t days, int64_t nanos) { @@ -74,17 +60,6 @@ void Timestamp::toGMT(const tz::TimeZone& zone) { seconds_ = sysSeconds.count(); } -void Timestamp::toGMT(int16_t tzID) { - if (tzID == 0) { - // No conversion required for time zone id 0, as it is '+00:00'. - } else if (tzID <= 1680) { - seconds_ -= getPrestoTZOffsetInSeconds(tzID); - } else { - // Other ids go this path. - toGMT(*tz::locateZone(tz::getTimeZoneName(tzID))); - } -} - std::chrono::time_point Timestamp::toTimePointMs(bool allowOverflow) const { using namespace std::chrono; @@ -105,17 +80,6 @@ void Timestamp::toTimezone(const tz::TimeZone& zone) { } } -void Timestamp::toTimezone(int16_t tzID) { - if (tzID == 0) { - // No conversion required for time zone id 0, as it is '+00:00'. - } else if (tzID <= 1680) { - seconds_ += getPrestoTZOffsetInSeconds(tzID); - } else { - // Other ids go this path. - toTimezone(*tz::locateZone(tz::getTimeZoneName(tzID))); - } -} - const tz::TimeZone& Timestamp::defaultTimezone() { static const tz::TimeZone* kDefault = ({ // TODO: We are hard-coding PST/PDT here to be aligned with the current diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index 2e6c330c6c5b..44117bc7886b 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -345,9 +345,6 @@ struct Timestamp { // ts.toString(); // returns January 1, 1970 08:00:00 void toGMT(const tz::TimeZone& zone); - // Same as above, but accepts PrestoDB time zone ID. - void toGMT(int16_t tzID); - /// Assuming the timestamp represents a GMT time, converts it to the time at /// the same moment at zone. For example: /// @@ -356,9 +353,6 @@ struct Timestamp { /// ts.toString(); // returns December 31, 1969 16:00:00 void toTimezone(const tz::TimeZone& zone); - // Same as above, but accepts PrestoDB time zone ID. - void toTimezone(int16_t tzID); - /// A default time zone that is same across the process. static const tz::TimeZone& defaultTimezone(); diff --git a/velox/type/TimestampConversion.cpp b/velox/type/TimestampConversion.cpp index 05a21d4bb1d3..5f14dc6120d8 100644 --- a/velox/type/TimestampConversion.cpp +++ b/velox/type/TimestampConversion.cpp @@ -720,7 +720,8 @@ fromTimestampString(const char* str, size_t len, TimestampParseMode parseMode) { return resultTimestamp; } -Expected> fromTimestampWithTimezoneString( +Expected> +fromTimestampWithTimezoneString( const char* str, size_t len, TimestampParseMode parseMode) { @@ -731,7 +732,7 @@ Expected> fromTimestampWithTimezoneString( return folly::makeUnexpected(parserError(str, len)); } - int16_t timezoneID = -1; + const tz::TimeZone* timeZone = nullptr; if (pos < len && characterIsSpace(str[pos])) { pos++; @@ -752,11 +753,11 @@ Expected> fromTimestampWithTimezoneString( timezonePos++; } - std::string_view timezone(str + pos, timezonePos - pos); + std::string_view timeZoneName(str + pos, timezonePos - pos); - if ((timezoneID = tz::getTimeZoneID(timezone, false)) == -1) { + if ((timeZone = tz::locateZone(timeZoneName, false)) == nullptr) { return folly::makeUnexpected( - Status::UserError("Unknown timezone value: \"{}\"", timezone)); + Status::UserError("Unknown timezone value: \"{}\"", timeZoneName)); } // Skip any spaces at the end. @@ -769,7 +770,7 @@ Expected> fromTimestampWithTimezoneString( return folly::makeUnexpected(parserError(str, len)); } } - return std::make_pair(resultTimestamp, timezoneID); + return std::make_pair(resultTimestamp, timeZone); } int32_t toDate(const Timestamp& timestamp, const tz::TimeZone* timeZone_) { diff --git a/velox/type/TimestampConversion.h b/velox/type/TimestampConversion.h index bd17f42127df..85088c2a4758 100644 --- a/velox/type/TimestampConversion.h +++ b/velox/type/TimestampConversion.h @@ -20,6 +20,10 @@ #include "velox/common/base/Status.h" #include "velox/type/Timestamp.h" +namespace facebook::velox::tz { +class TimeZone; +} + namespace facebook::velox::util { constexpr const int32_t kHoursPerDay{24}; @@ -195,22 +199,24 @@ inline Expected fromTimestampString( /// /// This is a timezone-aware version of the function above /// `fromTimestampString()` which returns both the parsed timestamp and the -/// timezone ID. It is up to the client to do the expected conversion based on -/// these two values. +/// TimeZone pointer. It is up to the client to do the expected conversion based +/// on these two values. /// /// The timezone information at the end of the string may contain a timezone /// name (as defined in velox/type/tz/*), such as "UTC" or /// "America/Los_Angeles", or a timezone offset, like "+06:00" or "-09:30". The /// white space between the hour definition and timestamp is optional. /// -/// -1 means no timezone information was found. Returns Unexpected with +/// `nullptr` means no timezone information was found. Returns Unexpected with /// UserError status in case of parsing errors. -Expected> fromTimestampWithTimezoneString( +Expected> +fromTimestampWithTimezoneString( const char* buf, size_t len, TimestampParseMode parseMode); -inline Expected> fromTimestampWithTimezoneString( +inline Expected> +fromTimestampWithTimezoneString( const StringView& str, TimestampParseMode parseMode) { return fromTimestampWithTimezoneString(str.data(), str.size(), parseMode); diff --git a/velox/type/tests/TimestampConversionTest.cpp b/velox/type/tests/TimestampConversionTest.cpp index 61dfa5007440..93d30ca43234 100644 --- a/velox/type/tests/TimestampConversionTest.cpp +++ b/velox/type/tests/TimestampConversionTest.cpp @@ -40,7 +40,7 @@ int32_t parseDate(const StringView& str, ParseMode mode) { }); } -std::pair parseTimestampWithTimezone( +std::pair parseTimestampWithTimezone( const StringView& str, TimestampParseMode parseMode = TimestampParseMode::kPrestoCast) { return fromTimestampWithTimezoneString(str.data(), str.size(), parseMode) @@ -248,35 +248,35 @@ TEST(DateTimeUtilTest, fromTimestampStringInvalid) { TEST(DateTimeUtilTest, fromTimestampWithTimezoneString) { // -1 means no timezone information. - auto expected = std::make_pair(Timestamp(0, 0), -1); + auto expected = + std::make_pair(Timestamp(0, 0), nullptr); EXPECT_EQ(parseTimestampWithTimezone("1970-01-01 00:00:00"), expected); // Test timezone offsets. EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:00 -02:00"), - std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("-02:00"))); + std::make_pair(Timestamp(0, 0), tz::locateZone("-02:00"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:00+13:36"), - std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("+13:36"))); + std::make_pair(Timestamp(0, 0), tz::locateZone("+13:36"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:00 -11"), - std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("-11:00"))); + std::make_pair(Timestamp(0, 0), tz::locateZone("-11:00"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:00 +0000"), - std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("+00:00"))); + std::make_pair(Timestamp(0, 0), tz::locateZone("+00:00"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:00Z"), - std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("UTC"))); + std::make_pair(Timestamp(0, 0), tz::locateZone("UTC"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:01:00 UTC"), - std::make_pair(Timestamp(60, 0), tz::getTimeZoneID("UTC"))); + std::make_pair(Timestamp(60, 0), tz::locateZone("UTC"))); EXPECT_EQ( parseTimestampWithTimezone("1970-01-01 00:00:01 America/Los_Angeles"), - std::make_pair( - Timestamp(1, 0), tz::getTimeZoneID("America/Los_Angeles"))); + std::make_pair(Timestamp(1, 0), tz::locateZone("America/Los_Angeles"))); } TEST(DateTimeUtilTest, toGMT) { @@ -371,7 +371,7 @@ TEST(DateTimeUtilTest, toTimezone) { TEST(DateTimeUtilTest, toGMTFromID) { // The GMT time when LA gets to "1970-01-01 00:00:00" (8h ahead). auto ts = parseTimestamp("1970-01-01 00:00:00"); - ts.toGMT(tz::getTimeZoneID("America/Los_Angeles")); + ts.toGMT(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("1970-01-01 08:00:00")); // Set on a random date/time and try some variations. @@ -379,56 +379,56 @@ TEST(DateTimeUtilTest, toGMTFromID) { // To LA: auto tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("America/Los_Angeles")); + tsCopy.toGMT(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 11:23:37")); // To Sao Paulo: tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("America/Sao_Paulo")); + tsCopy.toGMT(*tz::locateZone("America/Sao_Paulo")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 07:23:37")); // Moscow: tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("Europe/Moscow")); + tsCopy.toGMT(*tz::locateZone("Europe/Moscow")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 01:23:37")); // Numerical time zones: +HH:MM and -HH:MM tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("+14:00")); + tsCopy.toGMT(*tz::locateZone("+14:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-22 14:23:37")); tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("-14:00")); + tsCopy.toGMT(*tz::locateZone("-14:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 18:23:37")); tsCopy = ts; - tsCopy.toGMT(0); // "+00:00" is not in the time zone id map + tsCopy.toGMT(*tz::locateZone("+00:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:23:37")); tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("-00:01")); + tsCopy.toGMT(*tz::locateZone("-00:01")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:24:37")); tsCopy = ts; - tsCopy.toGMT(tz::getTimeZoneID("+00:01")); + tsCopy.toGMT(*tz::locateZone("+00:01")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:22:37")); // Probe LA's daylight savings boundary (starts at 2021-13-14 02:00am). // Before it starts, 8h offset: ts = parseTimestamp("2021-03-14 00:00:00"); - ts.toGMT(tz::getTimeZoneID("America/Los_Angeles")); + ts.toGMT(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("2021-03-14 08:00:00")); // After it starts, 7h offset: ts = parseTimestamp("2021-03-15 00:00:00"); - ts.toGMT(tz::getTimeZoneID("America/Los_Angeles")); + ts.toGMT(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("2021-03-15 07:00:00")); } TEST(DateTimeUtilTest, toTimezoneFromID) { // The LA time when GMT gets to "1970-01-01 00:00:00" (8h behind). auto ts = parseTimestamp("1970-01-01 00:00:00"); - ts.toTimezone(tz::getTimeZoneID("America/Los_Angeles")); + ts.toTimezone(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("1969-12-31 16:00:00")); // Set on a random date/time and try some variations. @@ -436,49 +436,49 @@ TEST(DateTimeUtilTest, toTimezoneFromID) { // To LA: auto tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("America/Los_Angeles")); + tsCopy.toTimezone(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-22 21:23:37")); // To Sao Paulo: tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("America/Sao_Paulo")); + tsCopy.toTimezone(*tz::locateZone("America/Sao_Paulo")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 01:23:37")); // Moscow: tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("Europe/Moscow")); + tsCopy.toTimezone(*tz::locateZone("Europe/Moscow")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 07:23:37")); // Numerical time zones: +HH:MM and -HH:MM tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("+14:00")); + tsCopy.toTimezone(*tz::locateZone("+14:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 18:23:37")); tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("-14:00")); + tsCopy.toTimezone(*tz::locateZone("-14:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-22 14:23:37")); tsCopy = ts; - tsCopy.toTimezone(0); // "+00:00" is not in the time zone id map + tsCopy.toTimezone(*tz::locateZone("+00:00")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:23:37")); tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("-00:01")); + tsCopy.toTimezone(*tz::locateZone("-00:01")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:22:37")); tsCopy = ts; - tsCopy.toTimezone(tz::getTimeZoneID("+00:01")); + tsCopy.toTimezone(*tz::locateZone("+00:01")); EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 04:24:37")); // Probe LA's daylight savings boundary (starts at 2021-13-14 02:00am). // Before it starts, 8h offset: ts = parseTimestamp("2021-03-14 00:00:00"); - ts.toTimezone(tz::getTimeZoneID("America/Los_Angeles")); + ts.toTimezone(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("2021-03-13 16:00:00")); // After it starts, 7h offset: ts = parseTimestamp("2021-03-15 00:00:00"); - ts.toTimezone(tz::getTimeZoneID("America/Los_Angeles")); + ts.toTimezone(*tz::locateZone("America/Los_Angeles")); EXPECT_EQ(ts, parseTimestamp("2021-03-14 17:00:00")); } diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 995c50a1f801..8f1ee5ff68e3 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -228,20 +228,22 @@ void validateRange(time_point timePoint) { } std::string getTimeZoneName(int64_t timeZoneID) { + return locateZone(timeZoneID, true)->name(); +} + +const TimeZone* locateZone(int16_t timeZoneID, bool failOnError) { const auto& timeZoneDatabase = getTimeZoneDatabase(); - VELOX_CHECK_LT( - timeZoneID, - timeZoneDatabase.size(), - "Unable to resolve timeZoneID '{}'", - timeZoneID); - - // Check if timeZoneID is not one of the "holes". - VELOX_CHECK_NOT_NULL( - timeZoneDatabase[timeZoneID], - "Unable to resolve timeZoneID '{}'", - timeZoneID); - return timeZoneDatabase[timeZoneID]->name(); + // Check if timeZoneID does not exceed the vector size or is one of the + // "holes". + if (timeZoneID > timeZoneDatabase.size() || + timeZoneDatabase[timeZoneID] == nullptr) { + if (failOnError) { + VELOX_FAIL("Unable to resolve timeZoneID '{}'", timeZoneID); + } + return nullptr; + } + return timeZoneDatabase[timeZoneID].get(); } const TimeZone* locateZone(std::string_view timeZone, bool failOnError) { diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 591019569e6b..4a90e75b7e82 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -39,12 +39,18 @@ namespace facebook::velox::tz { class TimeZone; -/// Looks up a TimeZone pointer based on a time zone name. This makes an hash +/// Returns a TimeZone pointer based on a time zone name. This makes an hash /// map access, and will construct the index on the first access. `failOnError` /// controls whether to throw or return nullptr in case the time zone was not /// found. const TimeZone* locateZone(std::string_view timeZone, bool failOnError = true); +/// Returns a TimeZone pointer based on a time zone ID. This makes a simple +/// vector access, and will construct the index on the first access. +/// `failOnError` controls whether to throw or return nullptr in case the time +/// zone ID was not valid. +const TimeZone* locateZone(int16_t timeZoneID, bool failOnError = true); + /// Returns the timezone name associated with timeZoneID. std::string getTimeZoneName(int64_t timeZoneID);