Skip to content

Commit

Permalink
Add normalization rules for time zone offsets
Browse files Browse the repository at this point in the history
Summary:
When resolving time zone offsets, minutes or the ':' separator may be
omitted (check inline code comments).

Reviewed By: amitkdutta

Differential Revision: D60432366
  • Loading branch information
pedroerp authored and facebook-github-bot committed Jul 30, 2024
1 parent c8ac8f7 commit 7213b74
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 0 deletions.
4 changes: 4 additions & 0 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,8 @@ TEST_F(CastExprTest, stringToTimestamp) {
"1970-01-01 00:00:00",
"2000-01-01 12:21:56",
"1970-01-01 00:00:00-02:00",
"1970-01-01 00:00:00 +02",
"1970-01-01 00:00:00 -0101",
std::nullopt,
};
std::vector<std::optional<Timestamp>> expected{
Expand All @@ -574,6 +576,8 @@ TEST_F(CastExprTest, stringToTimestamp) {
Timestamp(0, 0),
Timestamp(946729316, 0),
Timestamp(7200, 0),
Timestamp(-7200, 0),
Timestamp(3660, 0),
std::nullopt,
};
testCast<std::string, Timestamp>("timestamp", input, expected);
Expand Down
6 changes: 6 additions & 0 deletions velox/type/tests/TimestampConversionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,12 @@ TEST(DateTimeUtilTest, fromTimestampWithTimezoneString) {
EXPECT_EQ(
parseTimestampWithTimezone("1970-01-01 00:00:00+13:36"),
std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("+13:36")));
EXPECT_EQ(
parseTimestampWithTimezone("1970-01-01 00:00:00 -11"),
std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("-11:00")));
EXPECT_EQ(
parseTimestampWithTimezone("1970-01-01 00:00:00 +0000"),
std::make_pair(Timestamp(0, 0), tz::getTimeZoneID("+00:00")));

EXPECT_EQ(
parseTimestampWithTimezone("1970-01-01 00:00:00Z"),
Expand Down
33 changes: 33 additions & 0 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ inline bool startsWith(std::string_view str, const char* prefix) {
return str.rfind(prefix, 0) == 0;
}

inline bool isTimeZoneOffset(std::string_view str) {
return str.size() >= 3 && (str[0] == '+' || str[0] == '-');
}

// The timezone parsing logic follows what is defined here:
// https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
inline bool isUtcEquivalentName(std::string_view zone) {
Expand All @@ -118,7 +122,36 @@ inline bool isUtcEquivalentName(std::string_view zone) {
return utcSet.find(zone) != utcSet.end();
}

// This function tries to apply two normalization rules to time zone offsets:
//
// 1. If the offset only defines the hours portion, assume minutes are zeroed
// out (e.g. "+00" -> "+00:00")
//
// 2. Check if the ':' in between in missing; if so, correct the offset string
// (e.g. "+0000" -> "+00:00").
//
// This function assumes the first character is either '+' or '-'.
std::string normalizeTimeZoneOffset(const std::string& zoneOffset) {
if (zoneOffset.size() == 3 && isDigit(zoneOffset[1]) &&
isDigit(zoneOffset[2])) {
return zoneOffset + ":00";
} else if (
zoneOffset.size() == 5 && isDigit(zoneOffset[1]) &&
isDigit(zoneOffset[2]) && isDigit(zoneOffset[3]) &&
isDigit(zoneOffset[4])) {
return zoneOffset.substr(0, 3) + ':' + zoneOffset.substr(3, 2);
}
return zoneOffset;
}

std::string normalizeTimeZone(const std::string& originalZoneId) {
// If this is an offset that hasn't matched, check if this is an incomplete
// offset.
if (isTimeZoneOffset(originalZoneId)) {
return normalizeTimeZoneOffset(originalZoneId);
}

// Otherwise, try other time zone name normalizations.
std::string_view zoneId = originalZoneId;
const bool startsWithEtc = startsWith(zoneId, "etc/");

Expand Down
9 changes: 9 additions & 0 deletions velox/type/tz/tests/TimeZoneMapTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,19 @@ TEST(TimeZoneMapTest, getTimeZoneID) {
// (+/-)XX:MM format.
EXPECT_EQ(840, getTimeZoneID("-00:01"));
EXPECT_EQ(0, getTimeZoneID("+00:00"));
EXPECT_EQ(0, getTimeZoneID("-00:00"));
EXPECT_EQ(454, getTimeZoneID("-06:27"));
EXPECT_EQ(541, getTimeZoneID("-05:00"));
EXPECT_EQ(1140, getTimeZoneID("+05:00"));

// Incomplete time zone offsets.
EXPECT_EQ(1140, getTimeZoneID("+05"));
EXPECT_EQ(1140, getTimeZoneID("+0500"));
EXPECT_EQ(1150, getTimeZoneID("+0510"));
EXPECT_EQ(181, getTimeZoneID("-1100"));
EXPECT_EQ(181, getTimeZoneID("-11"));
EXPECT_EQ(0, getTimeZoneID("+0000"));

EXPECT_EQ(0, getTimeZoneID("etc/GMT+0"));
EXPECT_EQ(0, getTimeZoneID("etc/GMT-0"));
EXPECT_EQ(1020, getTimeZoneID("etc/GMT-3"));
Expand Down

0 comments on commit 7213b74

Please sign in to comment.