Skip to content

Commit

Permalink
Exclude spatial types from sortability (mv_min/max)
Browse files Browse the repository at this point in the history
Even though geo_point and cartesian_point are backed by LONG, making them implicitly sortable, we exclude them from sorting because the sort order is not particularly useful.
  • Loading branch information
craigtaverner committed Nov 29, 2023
1 parent fb8bf75 commit d07e0ad
Show file tree
Hide file tree
Showing 11 changed files with 55 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ mv_avg |? mv_avg(arg1:?)
mv_concat |"keyword mv_concat(v:text|keyword, delim:text|keyword)" |[v, delim] |["text|keyword", "text|keyword"] |["values to join", "delimiter"] |keyword | "Reduce a multivalued string field to a single valued field by concatenating all values." | [false, false] | false
mv_count |"integer mv_count(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)" |v | "unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point" | "" | integer | "Reduce a multivalued field to a single valued field containing the count of values." | false | false
mv_dedupe |"? mv_dedupe(v:boolean|date|double|ip|text|integer|keyword|version|long)" |v | "boolean|date|double|ip|text|integer|keyword|version|long" | "" |? | "Remove duplicate values from a multivalued field." | false | false
mv_max |"? mv_max(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)" |v | "unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point" | "" |? | "Reduce a multivalued field to a single valued field containing the maximum value." | false | false
mv_max |"? mv_max(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long)" |v | "unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long" | "" |? | "Reduce a multivalued field to a single valued field containing the maximum value." | false | false
mv_median |? mv_median(arg1:?) |arg1 |? | "" |? | "" | false | false
mv_min |"? mv_min(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)" |v | "unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point" | "" |? | "Reduce a multivalued field to a single valued field containing the minimum value." | false | false
mv_min |"? mv_min(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long)" |v | "unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long" | "" |? | "Reduce a multivalued field to a single valued field containing the minimum value." | false | false
mv_sum |? mv_sum(arg1:?) |arg1 |? | "" |? | "" | false | false
now |? now() | null |null | null |? | "" | null | false
percentile |? percentile(arg1:?, arg2:?) |[arg1, arg2] |[?, ?] |["", ""] |? | "" | [false, false] | false
Expand Down Expand Up @@ -140,9 +140,9 @@ synopsis:keyword
"keyword mv_concat(v:text|keyword, delim:text|keyword)"
"integer mv_count(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)"
"? mv_dedupe(v:boolean|date|double|ip|text|integer|keyword|version|long)"
"? mv_max(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)"
"? mv_max(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long)"
? mv_median(arg1:?)
"? mv_min(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long|geo_point|cartesian_point)"
"? mv_min(v:unsigned_long|date|boolean|double|ip|text|integer|keyword|version|long)"
? mv_sum(arg1:?)
? now()
? percentile(arg1:?, arg2:?)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;

import java.util.List;

import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isRepresentable;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isSpatial;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType;

/**
Expand All @@ -33,32 +34,20 @@ public MvMax(
Source source,
@Param(
name = "v",
type = {
"unsigned_long",
"date",
"boolean",
"double",
"ip",
"text",
"integer",
"keyword",
"version",
"long",
"geo_point",
"cartesian_point" }
type = { "unsigned_long", "date", "boolean", "double", "ip", "text", "integer", "keyword", "version", "long" }
) Expression v
) {
super(source, v);
}

@Override
protected TypeResolution resolveFieldType() {
return isType(field(), EsqlDataTypes::isRepresentable, sourceText(), null, "representable");
return isType(field(), t -> isSpatial(t) == false && isRepresentable(t), sourceText(), null, "representableNonSpatial");
}

@Override
protected ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
return switch (LocalExecutionPlanner.toElementType(field().dataType())) {
return switch (LocalExecutionPlanner.toSortableElementType(field().dataType())) {
case BOOLEAN -> new MvMaxBooleanEvaluator.Factory(fieldEval);
case BYTES_REF -> new MvMaxBytesRefEvaluator.Factory(fieldEval);
case DOUBLE -> new MvMaxDoubleEvaluator.Factory(fieldEval);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;

import java.util.List;

import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isRepresentable;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isSpatial;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType;

/**
Expand All @@ -33,32 +34,20 @@ public MvMin(
Source source,
@Param(
name = "v",
type = {
"unsigned_long",
"date",
"boolean",
"double",
"ip",
"text",
"integer",
"keyword",
"version",
"long",
"geo_point",
"cartesian_point" }
type = { "unsigned_long", "date", "boolean", "double", "ip", "text", "integer", "keyword", "version", "long" }
) Expression field
) {
super(source, field);
}

@Override
protected TypeResolution resolveFieldType() {
return isType(field(), EsqlDataTypes::isRepresentable, sourceText(), null, "representable");
return isType(field(), t -> isSpatial(t) == false && isRepresentable(t), sourceText(), null, "representableNonSpatial");
}

@Override
protected ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
return switch (LocalExecutionPlanner.toElementType(field().dataType())) {
return switch (LocalExecutionPlanner.toSortableElementType(field().dataType())) {
case BOOLEAN -> new MvMinBooleanEvaluator.Factory(fieldEval);
case BYTES_REF -> new MvMinBytesRefEvaluator.Factory(fieldEval);
case DOUBLE -> new MvMinDoubleEvaluator.Factory(fieldEval);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,18 @@ public static ElementType toElementType(DataType dataType) {
throw EsqlIllegalArgumentException.illegalDataType(dataType);
}

/**
* Map QL's {@link DataType} to the compute engine's {@link ElementType}, for sortable types only.
* This specifically excludes GEO_POINT and CARTESIAN_POINT, which are backed by DataType.LONG
* but are not themselves sortable (the long can be sorted, but the sort order is not usually useful).
*/
public static ElementType toSortableElementType(DataType dataType) {
if (dataType == EsqlDataTypes.GEO_POINT || dataType == EsqlDataTypes.CARTESIAN_POINT) {
return ElementType.UNKNOWN;
}
return toElementType(dataType);
}

private PhysicalOperation planOutput(OutputExec outputExec, LocalExecutionPlannerContext context) {
PhysicalOperation source = plan(outputExec.child(), context);
var output = outputExec.output();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ public static boolean isTemporalAmount(DataType t) {
return t == DATE_PERIOD || t == TIME_DURATION;
}

public static boolean isSpatial(DataType t) {
return t == GEO_POINT || t == CARTESIAN_POINT;
}

/**
* Supported types that can be contained in a block.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@

import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
import static org.elasticsearch.xpack.esql.SerializationTestUtils.assertSerialization;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isSpatial;
import static org.elasticsearch.xpack.ql.util.SpatialCoordinateTypes.Cartesian;
import static org.elasticsearch.xpack.ql.util.SpatialCoordinateTypes.Geo;
import static org.hamcrest.Matchers.either;
Expand Down Expand Up @@ -800,7 +801,9 @@ private static String typeErrorMessage(boolean includeOrdinal, List<Set<DataType
Map.entry(Set.of(DataTypes.LONG, DataTypes.INTEGER, DataTypes.UNSIGNED_LONG, DataTypes.DOUBLE, DataTypes.NULL), "numeric"),
Map.entry(Set.of(DataTypes.KEYWORD, DataTypes.TEXT, DataTypes.VERSION, DataTypes.NULL), "keyword, text or version"),
Map.entry(Set.of(DataTypes.KEYWORD, DataTypes.TEXT, DataTypes.NULL), "string"),
Map.entry(Set.of(DataTypes.IP, DataTypes.KEYWORD, DataTypes.NULL), "ip or keyword")
Map.entry(Set.of(DataTypes.IP, DataTypes.KEYWORD, DataTypes.NULL), "ip or keyword"),
Map.entry(Set.copyOf(Arrays.asList(representableTypes())), "representable"),
Map.entry(Set.copyOf(Arrays.asList(representableNonSpatialTypes())), "representableNonSpatial")
);

private static String expectedType(Set<DataType> validTypes) {
Expand All @@ -818,10 +821,22 @@ private static String expectedType(Set<DataType> validTypes) {
return named;
}

private static Stream<DataType> representable() {
protected static Stream<DataType> representable() {
return EsqlDataTypes.types().stream().filter(EsqlDataTypes::isRepresentable);
}

protected static DataType[] representableTypes() {
return representable().toArray(DataType[]::new);
}

protected static Stream<DataType> representableNonSpatial() {
return representable().filter(t -> isSpatial(t) == false);
}

protected static DataType[] representableNonSpatialTypes() {
return representableNonSpatial().toArray(DataType[]::new);
}

@AfterClass
public static void renderSignature() throws IOException {
if (System.getProperty("generateDocs") == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,6 @@ protected final DataType[] representableNumerics() {
return EsqlDataTypes.types().stream().filter(DataType::isNumeric).filter(EsqlDataTypes::isRepresentable).toArray(DataType[]::new);
}

protected final DataType[] representable() {
return EsqlDataTypes.types().stream().filter(EsqlDataTypes::isRepresentable).toArray(DataType[]::new);
}

protected record ArgumentSpec(boolean optional, Set<DataType> validTypes) {}

public final void testResolveType() {
Expand Down Expand Up @@ -187,9 +183,12 @@ private String expectedTypeName(Set<DataType> validTypes) {
if (withoutNull.equals(negations)) {
return "numeric, date_period or time_duration";
}
if (validTypes.equals(Set.copyOf(Arrays.asList(representable())))) {
if (validTypes.equals(Set.copyOf(Arrays.asList(representableTypes())))) {
return "representable";
}
if (validTypes.equals(Set.copyOf(Arrays.asList(representableNonSpatialTypes())))) {
return "representableNonSpatial";
}
throw new IllegalArgumentException("can't guess expected type for " + validTypes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ protected Expression build(Source source, Expression field) {

@Override
protected DataType[] supportedTypes() {
return representable();
return representableTypes();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ protected Expression build(Source source, Expression field) {

@Override
protected DataType[] supportedTypes() {
return representable();
return representableTypes();
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ public static Iterable<Object[]> parameters() {
(size, values) -> equalTo(NumericUtils.asLongUnsigned(values.reduce(BigInteger::max).get()))
);
dateTimes(cases, "mv_max", "MvMax", (size, values) -> equalTo(values.max().getAsLong()));
geoPoints(cases, "mv_max", "MvMax", (size, values) -> equalTo(values.max().getAsLong()));
cartesianPoints(cases, "mv_max", "MvMax", (size, values) -> equalTo(values.max().getAsLong()));
return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(false, cases)));
}

Expand All @@ -56,6 +54,6 @@ protected Expression build(Source source, Expression field) {

@Override
protected DataType[] supportedTypes() {
return representable();
return representableNonSpatialTypes();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ public static Iterable<Object[]> parameters() {
(size, values) -> equalTo(NumericUtils.asLongUnsigned(values.reduce(BigInteger::min).get()))
);
dateTimes(cases, "mv_min", "MvMin", (size, values) -> equalTo(values.min().getAsLong()));
geoPoints(cases, "mv_min", "MvMin", (size, values) -> equalTo(values.min().getAsLong()));
cartesianPoints(cases, "mv_min", "MvMin", (size, values) -> equalTo(values.min().getAsLong()));
return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(false, cases)));
}

Expand All @@ -56,6 +54,6 @@ protected Expression build(Source source, Expression field) {

@Override
protected DataType[] supportedTypes() {
return representable();
return representableNonSpatialTypes();
}
}

0 comments on commit d07e0ad

Please sign in to comment.