Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22763 MF2: Handle time zones correctly in :datetime #3012

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4bc0354
ICU-22763 MF2: Handle time zones correctly in :datetime
catamorphism Apr 29, 2024
fa6278e
Tests pass with new DateInfo struct
catamorphism Jun 18, 2024
8451a30
Eliminate GregorianCalendar usage
catamorphism Jun 18, 2024
b4cc119
Docs
catamorphism Jun 18, 2024
eeb534b
Remove obsolete comment
catamorphism Jun 18, 2024
8a1638b
Fix comment
catamorphism Jun 18, 2024
60a66d9
Add comments
catamorphism Jun 18, 2024
a65362f
Avoid creating time zone where not necessary
catamorphism Jun 18, 2024
3d78033
Fix doc comment
catamorphism Jun 18, 2024
804ab9e
Fix MSVC compilation errors
catamorphism Jun 18, 2024
1c68d51
Make createTimeZone() a static method instead of a method on DateInfo
catamorphism Jun 24, 2024
5d15835
Update icu4c/source/test/testdata/message2/more-functions.json
catamorphism Jul 17, 2024
82f9ddc
Add comment about meaning of calendarName
catamorphism Jul 17, 2024
34bcb51
Address review feedback
catamorphism Aug 8, 2024
197caef
Add clarifying comments to DateInfo struct; rename zoneName field to …
catamorphism Aug 8, 2024
9800253
Removed calendarName field from DateInfo
catamorphism Aug 8, 2024
2ecf406
Remove calendar name harder
catamorphism Aug 8, 2024
c63c6b5
Detect 'Z' literal correctly; add test case
catamorphism Aug 8, 2024
8d4da42
Cache DateFormat objects in the DateTimeFactory object and lazily ini…
catamorphism Aug 13, 2024
4a7da29
Add comment on hasTzOffset()
catamorphism Aug 13, 2024
da9307c
Share DateFormat objects across threads
catamorphism Sep 11, 2024
1b8e43f
Fix use-after-free in TimeZoneDelegate destructor
catamorphism Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions icu4c/source/i18n/messageformat2_formattable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#include "unicode/messageformat2_formattable.h"
#include "unicode/smpdtfmt.h"
#include "messageformat2_allocation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"

#include "limits.h"
Expand All @@ -35,7 +37,6 @@ namespace message2 {

Formattable::Formattable(const Formattable& other) {
contents = other.contents;
holdsDate = other.holdsDate;
}

Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
Expand All @@ -53,7 +54,7 @@ namespace message2 {

UFormattableType Formattable::getType() const {
if (std::holds_alternative<double>(contents)) {
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
return UFMT_DOUBLE;
}
if (std::holds_alternative<int64_t>(contents)) {
return UFMT_INT64;
Expand All @@ -74,6 +75,9 @@ namespace message2 {
}
}
}
if (isDate()) {
return UFMT_DATE;
}
if (std::holds_alternative<const FormattableObject*>(contents)) {
return UFMT_OBJECT;
}
Expand Down Expand Up @@ -223,14 +227,6 @@ namespace message2 {
return df.orphan();
}

void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);

LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);
df->format(date, result, 0, errorCode);
}

// Called when output is required and the contents are an unevaluated `Formattable`;
// formats the source `Formattable` to a string with defaults, if it can be
// formatted with a default formatter
Expand Down Expand Up @@ -259,9 +255,9 @@ namespace message2 {
switch (type) {
case UFMT_DATE: {
UnicodeString result;
UDate d = toFormat.getDate(status);
const DateInfo* dateInfo = toFormat.getDate(status);
U_ASSERT(U_SUCCESS(status));
formatDateWithDefaults(locale, d, result, status);
formatDateWithDefaults(locale, *dateInfo, result, status);
catamorphism marked this conversation as resolved.
Show resolved Hide resolved
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
case UFMT_DOUBLE: {
Expand Down
251 changes: 220 additions & 31 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
#include "unicode/dtptngen.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/simpletz.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "messageformat2_allocation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
#include "hash.h"
#include "mutex.h"
#include "number_types.h"
#include "ucln_in.h"
#include "uvector.h" // U_ASSERT

#include <inttypes.h>
Expand Down Expand Up @@ -905,6 +908,195 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc
return result;
}

// DateFormat parsers that are shared across threads
static DateFormat* dateParser = nullptr;
static DateFormat* dateTimeParser = nullptr;
static DateFormat* dateTimeUTCParser = nullptr;
static DateFormat* dateTimeZoneParser = nullptr;
static icu::UInitOnce gMF2DateParsersInitOnce {};

// Clean up shared DateFormat objects
static UBool mf2_date_parsers_cleanup() {
if (dateParser != nullptr) {
delete dateParser;
dateParser = nullptr;
}
if (dateTimeParser != nullptr) {
delete dateTimeParser;
dateTimeParser = nullptr;
}
if (dateTimeUTCParser != nullptr) {
delete dateTimeUTCParser;
dateTimeUTCParser = nullptr;
}
if (dateTimeZoneParser != nullptr) {
delete dateTimeZoneParser;
dateTimeZoneParser = nullptr;
}
return true;
}

// Initialize DateFormat objects used for parsing date literals
static void initDateParsersOnce(UErrorCode& errorCode) {
U_ASSERT(dateParser == nullptr);
U_ASSERT(dateTimeParser == nullptr);
U_ASSERT(dateTimeUTCParser == nullptr);
U_ASSERT(dateTimeZoneParser == nullptr);

// Handles ISO 8601 date
dateParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd"), errorCode);
// Handles ISO 8601 datetime without time zone
dateTimeParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ss"), errorCode);
// Handles ISO 8601 datetime with 'Z' to denote UTC
dateTimeUTCParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode);
// Handles ISO 8601 datetime with timezone offset; 'zzzz' denotes timezone offset
dateTimeZoneParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode);
ucln_i18n_registerCleanup(UCLN_I18N_MF2_DATE_PARSERS, mf2_date_parsers_cleanup);
}

// Lazily initialize DateFormat objects used for parsing date literals
static void initDateParsers(UErrorCode& errorCode) {
CHECK_ERROR(errorCode);

umtx_initOnce(gMF2DateParsersInitOnce, &initDateParsersOnce, errorCode);
}

// From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands :
// "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or
// an ISO 8601 datetime optionally followed by a timezone offset."
UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr,
UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return 0;
}
// Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case
// where a timezone offset follows)
if (sourceStr.length() > 10) {
return dateTimeParser->parse(sourceStr, errorCode);
}
// Handle ISO 8601 date
return dateParser->parse(sourceStr, errorCode);
}

// See comment on tryPatterns() for spec reference
UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sourceStr,
UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return 0;
}
int32_t len = sourceStr.length();
if (len > 0 && sourceStr[len] == 'Z') {
return dateTimeUTCParser->parse(sourceStr, errorCode);
}
return dateTimeZoneParser->parse(sourceStr, errorCode);
}

static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);

TimeZone* tz;
if (dateInfo.zoneId.isEmpty()) {
// Floating time value -- use default time zone
tz = TimeZone::createDefault();
} else {
tz = TimeZone::createTimeZone(dateInfo.zoneId);
}
if (tz == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return tz;
}

// Returns true iff `sourceStr` ends in an offset like +03:30 or -06:00
// (This function is just used to determine whether to call tryPatterns()
// or tryTimeZonePatterns(); tryTimeZonePatterns() checks fully that the
// string matches the expected format)
static bool hasTzOffset(const UnicodeString& sourceStr) {
int32_t len = sourceStr.length();

if (len <= 6) {
return false;
}
return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment in the beginning of this function to explain what is the expected input value and the output

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 4a7da29

&& sourceStr[len - 3] == COLON);
}

// Note: `calendar` option to :datetime not implemented yet;
// Gregorian calendar is assumed
DateInfo StandardFunctions::DateTime::createDateInfoFromString(const UnicodeString& sourceStr,
UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return {};
}

UDate absoluteDate;

// Check if the string has a time zone part
int32_t indexOfZ = sourceStr.indexOf('Z');
int32_t indexOfPlus = sourceStr.lastIndexOf('+');
int32_t indexOfMinus = sourceStr.lastIndexOf('-');
int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus;
bool isTzOffset = hasTzOffset(sourceStr);
bool isGMT = indexOfZ > 0;
UnicodeString offsetPart;
bool hasTimeZone = isTzOffset || isGMT;

if (!hasTimeZone) {
// No time zone; parse the date and time
absoluteDate = tryPatterns(sourceStr, errorCode);
} else {
// Try to split into time zone and non-time-zone parts
UnicodeString dateTimePart;
if (isGMT) {
dateTimePart = sourceStr.tempSubString(0, indexOfZ);
} else {
dateTimePart = sourceStr.tempSubString(0, indexOfSign);
}
// Parse the date from the date/time part
tryPatterns(dateTimePart, errorCode);
// Failure -- can't parse this string
if (U_FAILURE(errorCode)) {
return {};
}
// Success -- now parse the time zone part
if (isGMT) {
dateTimePart += UnicodeString("GMT");
absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode);
} else {
// Try to parse time zone in offset format: [+-]nn:nn
absoluteDate = tryTimeZonePatterns(sourceStr, errorCode);
offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length());
}
}

// If the time zone was provided, get its canonical ID,
// in order to return it in the DateInfo
UnicodeString canonicalID;
if (hasTimeZone) {
UnicodeString tzID("GMT");
if (!isGMT) {
tzID += offsetPart;
}
TimeZone::getCanonicalID(tzID, canonicalID, errorCode);
}

return { absoluteDate, canonicalID };
}

void formatDateWithDefaults(const Locale& locale,
const DateInfo& dateInfo,
UnicodeString& result,
UErrorCode& errorCode) {
CHECK_ERROR(errorCode);

LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);

df->adoptTimeZone(createTimeZone(dateInfo, errorCode));
CHECK_ERROR(errorCode);
df->format(dateInfo.date, result, nullptr, errorCode);
}

FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
FunctionOptions&& opts,
UErrorCode& errorCode) const {
Expand Down Expand Up @@ -1091,44 +1283,41 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
const Formattable& source = toFormat.asFormattable();
switch (source.getType()) {
case UFMT_STRING: {
// Lazily initialize date parsers used for parsing date literals
initDateParsers(errorCode);
if (U_FAILURE(errorCode)) {
return {};
}

const UnicodeString& sourceStr = source.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
// Pattern for ISO 8601 format - datetime
UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));

DateInfo dateInfo = createDateInfoFromString(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
// Parse the date
UDate d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
// Pattern for ISO 8601 format - date
UnicodeString pattern("YYYY-MM-dd");
errorCode = U_ZERO_ERROR;
dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
}
}
// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
toFormat.getFallback());
df->format(d, result, 0, errorCode);
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return {};
}
df->adoptTimeZone(createTimeZone(dateInfo, errorCode));

// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
df->format(dateInfo.date, result, 0, errorCode);
toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)),
toFormat.getFallback());
break;
}
case UFMT_DATE: {
df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
const DateInfo* dateInfo = source.getDate(errorCode);
if (U_SUCCESS(errorCode)) {
// If U_SUCCESS(errorCode), then source.getDate() returned
// a non-null pointer
df->adoptTimeZone(createTimeZone(*dateInfo, errorCode));
df->format(dateInfo->date, result, 0, errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
}
}
break;
Expand Down
10 changes: 8 additions & 2 deletions icu4c/source/i18n/messageformat2_function_registry_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,15 @@ namespace message2 {
const Locale& locale;
const DateTimeFactory::DateTimeType type;
friend class DateTimeFactory;
DateTime(const Locale& l, DateTimeFactory::DateTimeType t) : locale(l), type(t) {}
DateTime(const Locale& l, DateTimeFactory::DateTimeType t)
: locale(l), type(t) {}
const LocalPointer<icu::DateFormat> icuFormatter;

// Methods for parsing date literals
UDate tryPatterns(const UnicodeString&, UErrorCode&) const;
UDate tryTimeZonePatterns(const UnicodeString&, UErrorCode&) const;
DateInfo createDateInfoFromString(const UnicodeString&, UErrorCode&) const;

/*
Looks up an option by name, first checking `opts`, then the cached options
in `toFormat` if applicable, and finally using a default
Expand Down Expand Up @@ -211,7 +217,7 @@ namespace message2 {
};
};

extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode);
extern void formatDateWithDefaults(const Locale& locale, const DateInfo& date, UnicodeString&, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode);
extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode);
Expand Down
Loading