Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Array Support POC #282

Draft
wants to merge 5 commits into
base: integ-array-support-poc
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,8 @@ public static String format(final String format, Object... args) {
private static boolean isQuoted(String text, String mark) {
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
}

public static String removeParenthesis(String qualifier) {
return qualifier.replaceAll("\\[\\d+\\]", "");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a regex? Do you want to match : or \d+:\d+ (or even \d*:\d*)?
Add a comment for this function please.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.stream.Collectors;
import lombok.Getter;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.analysis.symbol.Namespace;
import org.opensearch.sql.analysis.symbol.Symbol;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.AggregateFunction;
import org.opensearch.sql.ast.expression.AllFields;
import org.opensearch.sql.ast.expression.And;
import org.opensearch.sql.ast.expression.ArrayQualifiedName;
import org.opensearch.sql.ast.expression.Between;
import org.opensearch.sql.ast.expression.Case;
import org.opensearch.sql.ast.expression.Cast;
Expand All @@ -49,14 +52,15 @@
import org.opensearch.sql.ast.expression.When;
import org.opensearch.sql.ast.expression.WindowFunction;
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.ArrayReferenceExpression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.NamedExpression;
Expand Down Expand Up @@ -368,8 +372,28 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) {
public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);

// check for reserved words in the identifier
for (String part : node.getParts()) {
Expression reserved = checkForReservedIdentifier(node.getParts(), context, qualifierAnalyzer, node);
if (reserved != null) {
return reserved;
}

return visitIdentifier(qualifierAnalyzer.unqualified(node), context);
}

@Override
public Expression visitArrayQualifiedName(ArrayQualifiedName node, AnalysisContext context) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);

Expression reserved = checkForReservedIdentifier(node.getParts(), context, qualifierAnalyzer, node);
if (reserved != null) {
return reserved;
}

return visitArrayIdentifier(qualifierAnalyzer.unqualified(node), context, node.getPartsAndIndexes());
}

private Expression checkForReservedIdentifier(List<String> parts, AnalysisContext context, QualifierAnalyzer qualifierAnalyzer, QualifiedName node) {
for (String part : parts) {
for (TypeEnvironment typeEnv = context.peek();
typeEnv != null;
typeEnv = typeEnv.getParent()) {
Expand All @@ -384,7 +408,7 @@ public Expression visitQualifiedName(QualifiedName node, AnalysisContext context
}
}
}
return visitIdentifier(qualifierAnalyzer.unqualified(node), context);
return null;
}

@Override
Expand Down Expand Up @@ -422,9 +446,27 @@ private Expression visitIdentifier(String ident, AnalysisContext context) {
}

TypeEnvironment typeEnv = context.peek();
var type = typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident));
ReferenceExpression ref = DSL.ref(ident,
typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident)));
typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident)));

if (type.equals(ExprCoreType.ARRAY)) {
return new ArrayReferenceExpression(ref);
}
return ref;
}

private Expression visitArrayIdentifier(String ident, AnalysisContext context,
List<Pair<String, OptionalInt>> partsAndIndexes) {
// ParseExpression will always override ReferenceExpression when ident conflicts
for (NamedExpression expr : context.getNamedParseExpressions()) {
if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) {
return expr.getDelegated();
}
}

TypeEnvironment typeEnv = context.peek();
return new ArrayReferenceExpression(ident,
typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, StringUtils.removeParenthesis(ident))), partsAndIndexes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
import lombok.RequiredArgsConstructor;
import org.opensearch.sql.analysis.symbol.Namespace;
import org.opensearch.sql.analysis.symbol.Symbol;
import org.opensearch.sql.ast.expression.ArrayQualifiedName;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.exception.SemanticCheckException;

/**
Expand All @@ -38,6 +40,10 @@ public String unqualified(QualifiedName fullName) {
return isQualifierIndexOrAlias(fullName) ? fullName.rest().toString() : fullName.toString();
}

public String unqualified(ArrayQualifiedName fullName) {
return isQualifierIndexOrAlias(fullName) ? fullName.rest().toString() : fullName.toString();
}

private boolean isQualifierIndexOrAlias(QualifiedName fullName) {
Optional<String> qualifier = fullName.first();
if (qualifier.isPresent()) {
Expand All @@ -50,10 +56,22 @@ private boolean isQualifierIndexOrAlias(QualifiedName fullName) {
return false;
}

private boolean isQualifierIndexOrAlias(ArrayQualifiedName fullName) {
Optional<String> qualifier = fullName.first();
if (qualifier.isPresent()) {
if (isFieldName(qualifier.get())) {
return false;
}
resolveQualifierSymbol(fullName, qualifier.get());
return true;
}
return false;
}

private boolean isFieldName(String qualifier) {
try {
// Resolve the qualifier in Namespace.FIELD_NAME
context.peek().resolve(new Symbol(Namespace.FIELD_NAME, qualifier));
context.peek().resolve(new Symbol(Namespace.FIELD_NAME, StringUtils.removeParenthesis(qualifier)));
return true;
} catch (SemanticCheckException e2) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import org.opensearch.sql.ast.expression.NestedAllTupleFields;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.ArrayReferenceExpression;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.ReferenceExpression;

Expand Down Expand Up @@ -105,8 +107,17 @@ public List<NamedExpression> visitAllFields(AllFields node,
AnalysisContext context) {
TypeEnvironment environment = context.peek();
Map<String, ExprType> lookupAllFields = environment.lookupAllFields(Namespace.FIELD_NAME);
return lookupAllFields.entrySet().stream().map(entry -> DSL.named(entry.getKey(),
new ReferenceExpression(entry.getKey(), entry.getValue()))).collect(Collectors.toList());
return lookupAllFields.entrySet().stream().map(entry ->
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add an example in the PR description for SELECT *? Will this return the full array for a column when SELECT * is queried? Since SELECT array still returns just the first value, is this still the case for SELECT * as well?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the functionality for SELECT * will stay the same, otherwise would be a breaking change.

{
ReferenceExpression ref = new ReferenceExpression(entry.getKey(), entry.getValue());
if (entry.getValue().equals(ExprCoreType.ARRAY)) {
return DSL.named(entry.getKey(),
new ArrayReferenceExpression(ref));
} else {
return DSL.named(entry.getKey(), ref);
}
}
).collect(Collectors.toList());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.opensearch.sql.ast.expression.AllFields;
import org.opensearch.sql.ast.expression.And;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.ArrayQualifiedName;
import org.opensearch.sql.ast.expression.AttributeList;
import org.opensearch.sql.ast.expression.Between;
import org.opensearch.sql.ast.expression.Case;
Expand Down Expand Up @@ -194,6 +195,9 @@ public T visitField(Field node, C context) {
public T visitQualifiedName(QualifiedName node, C context) {
return visitChildren(node, context);
}
public T visitArrayQualifiedName(ArrayQualifiedName node, C context) {
return visitChildren(node, context);
}

public T visitRename(Rename node, C context) {
return visitChildren(node, context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ast.expression;


import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.ast.AbstractNodeVisitor;

import java.util.List;
import java.util.OptionalInt;
import java.util.stream.Collectors;

@Getter
@EqualsAndHashCode(callSuper = false)
public class ArrayQualifiedName extends QualifiedName {

private final List<Pair<String, OptionalInt>> partsAndIndexes;

public ArrayQualifiedName(List<Pair<String, OptionalInt>> parts) {
super(parts.stream().map(p -> p.getLeft()).collect(Collectors.toList()));
this.partsAndIndexes = parts;
}

@Override
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitArrayQualifiedName(this, context);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.expression;

import static org.opensearch.sql.utils.ExpressionUtils.PATH_SEP;

import java.util.Arrays;
import java.util.List;
import java.util.OptionalInt;
import java.util.stream.Collectors;

import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprCollectionValue;
import org.opensearch.sql.data.model.ExprMissingValue;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.env.Environment;

@EqualsAndHashCode
public class ArrayReferenceExpression extends ReferenceExpression {
@Getter
private final List<Pair<String, OptionalInt>> partsAndIndexes;
@Getter
private final ExprType type;
public ArrayReferenceExpression(String ref, ExprType type, List<Pair<String, OptionalInt>> partsAndIndexes) {
super(StringUtils.removeParenthesis(ref), type);
this.partsAndIndexes = partsAndIndexes;
this.type = type;
}

public ArrayReferenceExpression(ReferenceExpression ref) {
super(StringUtils.removeParenthesis(ref.toString()), ref.type());
this.partsAndIndexes = Arrays.stream(ref.toString().split("\\.")).map(e -> Pair.of(e, OptionalInt.empty())).collect(
Collectors.toList());
this.type = ref.type();
}

@Override
public ExprValue valueOf(Environment<Expression, ExprValue> env) {
return env.resolve(this);
}

@Override
public ExprType type() {
return type;
}

@Override
public <T, C> T accept(ExpressionNodeVisitor<T, C> visitor, C context) {
return visitor.visitArrayReference(this, context);
}

public ExprValue resolve(ExprTupleValue value) {
return resolve(value, partsAndIndexes);
}

private ExprValue resolve(ExprValue value, List<Pair<String, OptionalInt>> paths) {
List<String> pathsWithoutParenthesis =
paths.stream().map(p -> StringUtils.removeParenthesis(p.getLeft())).collect(Collectors.toList());
ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, pathsWithoutParenthesis));

if (!paths.get(0).getRight().isEmpty()) {
if (value.keyValue(pathsWithoutParenthesis.get(0)) instanceof ExprCollectionValue) { // TODO check array size
wholePathValue = value
.keyValue(pathsWithoutParenthesis.get(0))
.collectionValue()
.get(paths.get(0).getRight().getAsInt());
if (paths.size() != 1) {
return resolve(wholePathValue, paths.subList(1, paths.size()));
}
} else {
return ExprValueUtils.missingValue();
}
} else if (wholePathValue.isMissing()) {
return resolve(value.keyValue(pathsWithoutParenthesis.get(0)), paths.subList(1, paths.size()));
}

if (!wholePathValue.isMissing() || paths.size() == 1) {
return wholePathValue;
} else {
return resolve(value.keyValue(pathsWithoutParenthesis.get(0)), paths.subList(1, paths.size()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ public T visitReference(ReferenceExpression node, C context) {
return visitNode(node, context);
}

public T visitArrayReference(ArrayReferenceExpression node, C context) {
return visitNode(node, context);
}

public T visitParse(ParseExpression node, C context) {
return visitNode(node, context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public ExprValue valueOf(Environment<Expression, ExprValue> valueEnv) {
if (this.type == ExprCoreType.ARRAY) {
refName += "." + StringUtils.unquoteText(getHighlightField().toString());
}
ExprValue value = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING));
ExprValue value = valueEnv.resolve(new ArrayReferenceExpression(DSL.ref(refName, ExprCoreType.STRING)));

// In the event of multiple returned highlights and wildcard being
// used in conjunction with other highlight calls, we need to ensure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,22 @@ private ExprValue resolve(ExprValue value, List<String> paths) {
if (value.type().equals(ExprCoreType.ARRAY)) {
wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0));
}
if (wholePathValue.type().equals(ExprCoreType.ARRAY)) {
return getFirstValueOfCollection(wholePathValue);
}

if (!wholePathValue.isMissing() || paths.size() == 1) {
return wholePathValue;
} else {
return resolve(value.keyValue(paths.get(0)), paths.subList(1, paths.size()));
}
}

private ExprValue getFirstValueOfCollection(ExprValue value) {
ExprValue collectionVal = value;
while(collectionVal.type().equals(ExprCoreType.ARRAY)) {
collectionVal = collectionVal.collectionValue().get(0);
}
return collectionVal;
}
}
Loading