Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22908 MF2: Update spec tests and update implementation for recent spec changes #3198

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions icu4c/source/i18n/messageformat2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,13 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());

const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
// 1. Let res be a new empty list of resolved values that support selection.
// (Implicit, since `res` is an out-parameter)
// 2. For each expression exp of the message's selectors
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// 2i. Let rv be the resolved value of exp.
ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
ResolvedSelector rv = formatSelector(env, selectors[i], context, status);
if (rv.hasSelector()) {
// 2ii. If selection is supported for rv:
// (True if this code has been reached)
Expand Down Expand Up @@ -614,7 +614,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo


// Evaluate the operand
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
const Operand& rand,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}
Expand All @@ -628,7 +631,13 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons
}

// Must be variable
const VariableName& var = rand.asVariable();
return resolveVariables(env, rand.asVariable(), context, status);
}

ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
const VariableName& var,
MessageContext& context,
UErrorCode &status) const {
// Resolve the variable
if (env.has(var)) {
const Closure& referent = env.lookup(var);
Expand Down Expand Up @@ -691,13 +700,16 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
}
}

ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
ResolvedSelector MessageFormatter::formatSelector(const Environment& globalEnv,
const VariableName& var,
MessageContext& context,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return {};
}

// Resolve expression to determine if it's a function call
ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
ResolvedSelector exprResult = resolveVariables(globalEnv, var, context, status);

DynamicErrors& err = context.getErrors();

Expand Down
21 changes: 9 additions & 12 deletions icu4c/source/i18n/messageformat2_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ TypeEnvironment::~TypeEnvironment() {}

// ---------------------

UnicodeString Checker::normalizeNFC(const Key& k) const {
Key Checker::normalizeNFC(const Key& k) const {
if (k.isWildcard()) {
return UnicodeString("*");
return k;
}
return context.normalizeNFC(k.asLiteral().unquoted());
return Key(Literal(k.asLiteral().isQuoted(),
context.normalizeNFC(k.asLiteral().unquoted())));
}

static bool areDefaultKeys(const Key* keys, int32_t len) {
Expand Down Expand Up @@ -216,18 +217,14 @@ void Checker::checkVariants(UErrorCode& status) {
}
}

void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
void Checker::requireAnnotated(const TypeEnvironment& t,
const VariableName& selectorVar,
UErrorCode& status) {
CHECK_ERROR(status);

if (selectorExpr.isFunctionCall()) {
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
return; // No error
}
const Operand& rand = selectorExpr.getOperand();
if (rand.isVariable()) {
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
return; // No error
}
}
// If this code is reached, an error was detected
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
}
Expand All @@ -237,7 +234,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {

// Check each selector; if it's not annotated, emit a
// "missing selector annotation" error
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
requireAnnotated(t, selectors[i], status);
}
Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/i18n/messageformat2_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ namespace message2 {
: dataModel(d), errors(e), context(mf) {}
private:

UnicodeString normalizeNFC(const Key&) const;
Key normalizeNFC(const Key&) const;

void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);
Expand Down
26 changes: 13 additions & 13 deletions icu4c/source/i18n/messageformat2_data_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,9 @@ Matcher::Matcher(const Matcher& other) {
numSelectors = other.numSelectors;
numVariants = other.numVariants;
UErrorCode localErrorCode = U_ZERO_ERROR;
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
numVariants,
localErrorCode));
Expand All @@ -704,7 +704,7 @@ Matcher::Matcher(const Matcher& other) {
}
}

Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}

Matcher::~Matcher() {}
Expand All @@ -726,7 +726,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
return bindings.getAlias();
}

const Expression* MFDataModel::getSelectorsInternal() const {
const VariableName* MFDataModel::getSelectorsInternal() const {
U_ASSERT(!bogus);
U_ASSERT(!hasPattern());
return std::get_if<Matcher>(&body)->selectors.getAlias();
Expand Down Expand Up @@ -788,15 +788,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
return *this;
}

/*
selector must be non-null
*/
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
UErrorCode& status) {
THIS_ON_ERROR(status);

buildSelectorsMessage(status);
U_ASSERT(selectors != nullptr);
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);

return *this;
}
Expand Down Expand Up @@ -832,11 +830,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
if (other.hasPattern()) {
body = *std::get_if<Pattern>(&other.body);
} else {
const Expression* otherSelectors = other.getSelectorsInternal();
const VariableName* otherSelectors = other.getSelectorsInternal();
const Variant* otherVariants = other.getVariantsInternal();
int32_t numSelectors = other.numSelectors();
int32_t numVariants = other.numVariants();
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
if (U_FAILURE(localErrorCode)) {
bogus = true;
Expand Down Expand Up @@ -865,7 +863,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
int32_t numVariants = builder.variants->size();
int32_t numSelectors = builder.selectors->size();
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
errorCode),
errorCode);
if (U_FAILURE(errorCode)) {
bogus = true;
return;
Expand Down
95 changes: 49 additions & 46 deletions icu4c/source/i18n/messageformat2_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,21 +512,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
VariableName result;

U_ASSERT(inBounds());
// If the '$' is missing, we don't want a binding
// for this variable to be created.
bool valid = peek() == DOLLAR;

parseToken(DOLLAR, errorCode);
if (!inBounds()) {
ERROR(errorCode);
return result;
}
UnicodeString varName = parseName(errorCode);
// Set the name to "" if the variable wasn't
// declared correctly
if (!valid) {
varName.remove();
}
return VariableName(varName);
return VariableName(parseName(errorCode));
}

/*
Expand Down Expand Up @@ -863,27 +855,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
parseTokenWithWhitespace(EQUALS, errorCode);

UnicodeString rhsStr;
// Parse RHS, which is either a literal or variable
switch (peek()) {
case DOLLAR: {
rand = Operand(parseVariableName(errorCode));
break;
}
default: {
// Must be a literal
rand = Operand(parseLiteral(errorCode));
break;
}
}
U_ASSERT(!rand.isNull());
// Parse RHS, which must be a literal
// attribute = "@" identifier [o "=" o literal]
rand = Operand(parseLiteral(errorCode));
} else {
// attribute -> "@" identifier [[s] "=" [s]]
// Use null operand, which `rand` is already set to
// "Backtrack" by restoring the whitespace (if there was any)
index = savedIndex;
}

attrAdder.addAttribute(lhs, std::move(rand), errorCode);
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
}

/*
Expand Down Expand Up @@ -1722,6 +1704,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
return result.build(status);
}

void Parser::parseVariant(UErrorCode& status) {
CHECK_ERROR(status);

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
}

/*
Consume a `selectors` (matching the nonterminal in the grammar),
Expand All @@ -1741,22 +1739,25 @@ void Parser::parseSelectors(UErrorCode& status) {
// Parse selectors
// "Backtracking" is required here. It's not clear if whitespace is
// (`[s]` selector) or (`[s]` variant)
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
parseOptionalWhitespace(status);
while (isWhitespace(peek()) || peek() == DOLLAR) {
int32_t whitespaceStart = index;
parseRequiredWhitespace(status);
// Restore precondition
CHECK_BOUNDS(status);
if (peek() != LEFT_CURLY_BRACE) {
if (peek() != DOLLAR) {
// This is not necessarily an error, but rather,
// means the whitespace we parsed was the optional
// whitespace preceding the first variant, not the
// optional whitespace preceding a subsequent expression.
// required whitespace preceding a subsequent variable.
// In that case, "push back" the whitespace.
normalizedInput.truncate(normalizedInput.length() - 1);
index = whitespaceStart;
break;
}
Expression expression;
expression = parseExpression(status);
VariableName var = parseVariableName(status);
empty = false;

dataModel.addSelector(std::move(expression), status);
dataModel.addSelector(std::move(var), status);
CHECK_ERROR(status);
}

Expand All @@ -1772,27 +1773,29 @@ void Parser::parseSelectors(UErrorCode& status) {
} \

// Parse variants
// matcher = match-statement s variant *(o variant)

// Parse first variant
parseRequiredWhitespace(status);
if (!inBounds()) {
ERROR(status);
return;
}
parseVariant(status);
if (!inBounds()) {
// Not an error; there might be only one variant
return;
}

while (isWhitespace(peek()) || isKeyStart(peek())) {
// Trailing whitespace is allowed
parseOptionalWhitespace(status);
// Restore the precondition.
// Trailing whitespace is allowed.
if (!inBounds()) {
return;
}

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

CHECK_ERROR(status);

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
parseVariant(status);

// Restore the precondition, *without* erroring out if we've
// reached the end of input. That's because it's valid for the
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/i18n/messageformat2_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ namespace message2 {
void parseUnsupportedStatement(UErrorCode&);
void parseLocalDeclaration(UErrorCode&);
void parseInputDeclaration(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseVariant(UErrorCode&);

void parseWhitespaceMaybeRequired(bool, UErrorCode&);
void parseRequiredWhitespace(UErrorCode&);
Expand Down
6 changes: 4 additions & 2 deletions icu4c/source/i18n/messageformat2_serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,20 @@ void Serializer::serializeDeclarations() {

void Serializer::serializeSelectors() {
U_ASSERT(!dataModel.hasPattern());
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();

emit(ID_MATCH);
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// No whitespace needed here -- see `selectors` in the grammar
whitespace();
emit(DOLLAR);
emit(selectors[i]);
}
}

void Serializer::serializeVariants() {
U_ASSERT(!dataModel.hasPattern());
const Variant* variants = dataModel.getVariantsInternal();
whitespace();
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const Variant& v = variants[i];
emit(v.getKeys());
Expand Down
Loading