Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Support Percentile in PPL #2710

Merged
merged 2 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dependencies {
api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}"
api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}"
api group: 'com.google.code.gson', name: 'gson', version: '2.8.9'
api group: 'com.tdunning', name: 't-digest', version: '3.2'
api project(':common')

testImplementation('org.junit.jupiter:junit-jupiter:5.9.3')
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,18 @@ public static Aggregator max(Expression... expressions) {
return aggregate(BuiltinFunctionName.MAX, expressions);
}

/**
* OpenSearch uses T-Digest to approximate percentile, so PERCENTILE and PERCENTILE_APPROX are the
* same function.
*/
public static Aggregator percentile(Expression... expressions) {
return percentileApprox(expressions);
}

public static Aggregator percentileApprox(Expression... expressions) {
return aggregate(BuiltinFunctionName.PERCENTILE_APPROX, expressions);
}

private static Aggregator aggregate(BuiltinFunctionName functionName, Expression... expressions) {
return compile(FunctionProperties.None, functionName, expressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public static void register(BuiltinFunctionRepository repository) {
repository.register(stddevSamp());
repository.register(stddevPop());
repository.register(take());
repository.register(percentileApprox());
}

private static DefaultFunctionResolver avg() {
Expand Down Expand Up @@ -245,4 +246,46 @@ private static DefaultFunctionResolver take() {
.build());
return functionResolver;
}

private static DefaultFunctionResolver percentileApprox() {
FunctionName functionName = BuiltinFunctionName.PERCENTILE_APPROX.getName();
DefaultFunctionResolver functionResolver =
new DefaultFunctionResolver(
functionName,
new ImmutableMap.Builder<FunctionSignature, FunctionBuilder>()
.put(
new FunctionSignature(functionName, ImmutableList.of(INTEGER, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, INTEGER))
.put(
new FunctionSignature(functionName, ImmutableList.of(INTEGER, DOUBLE, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, INTEGER))
.put(
new FunctionSignature(functionName, ImmutableList.of(LONG, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, LONG))
.put(
new FunctionSignature(functionName, ImmutableList.of(LONG, DOUBLE, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, LONG))
.put(
new FunctionSignature(functionName, ImmutableList.of(FLOAT, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, FLOAT))
.put(
new FunctionSignature(functionName, ImmutableList.of(FLOAT, DOUBLE, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, FLOAT))
.put(
new FunctionSignature(functionName, ImmutableList.of(DOUBLE, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, DOUBLE))
.put(
new FunctionSignature(functionName, ImmutableList.of(DOUBLE, DOUBLE, DOUBLE)),
(functionProperties, arguments) ->
PercentileApproximateAggregator.percentileApprox(arguments, DOUBLE))
.build());
return functionResolver;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.aggregation;

import static org.opensearch.sql.data.model.ExprValueUtils.doubleValue;
import static org.opensearch.sql.utils.ExpressionUtils.format;

import com.tdunning.math.stats.AVLTreeDigest;
import java.util.List;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprNullValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

/** Aggregator to calculate approximate percentile. */
public class PercentileApproximateAggregator
extends Aggregator<PercentileApproximateAggregator.PercentileApproximateState> {

public static Aggregator percentileApprox(List<Expression> arguments, ExprCoreType returnType) {
return new PercentileApproximateAggregator(arguments, returnType);
}

public PercentileApproximateAggregator(List<Expression> arguments, ExprCoreType returnType) {
super(BuiltinFunctionName.PERCENTILE_APPROX.getName(), arguments, returnType);
if (!ExprCoreType.numberTypes().contains(returnType)) {
throw new IllegalArgumentException(
String.format("percentile aggregation over %s type is not supported", returnType));
}
}

@Override
public PercentileApproximateState create() {
if (getArguments().size() == 2) {
return new PercentileApproximateState(getArguments().get(1).valueOf().doubleValue());
} else {
return new PercentileApproximateState(
getArguments().get(1).valueOf().doubleValue(),
getArguments().get(2).valueOf().doubleValue());
}
}

@Override
protected PercentileApproximateState iterate(ExprValue value, PercentileApproximateState state) {
state.evaluate(value);
return state;
}

@Override
public String toString() {
return StringUtils.format("%s(%s)", "percentile", format(getArguments()));
}

/**
* PercentileApproximateState is used to store the AVLTreeDigest state for percentile estimation.
*/
protected static class PercentileApproximateState extends AVLTreeDigest
implements AggregationState {
// The compression level for the AVLTreeDigest, keep the same default value as OpenSearch core.
public static final double DEFAULT_COMPRESSION = 100.0;
private final double percent;

PercentileApproximateState(double percent) {
super(DEFAULT_COMPRESSION);
if (percent < 0.0 || percent > 100.0) {
throw new IllegalArgumentException("out of bounds percent value, must be in [0, 100]");
}
this.percent = percent / 100.0;
}

/**
* Constructor for specifying both percent and compression level.
*
* @param percent the percent to compute, must be in [0, 100]
* @param compression the compression factor of the t-digest sketches used
*/
PercentileApproximateState(double percent, double compression) {
super(compression);
if (percent < 0.0 || percent > 100.0) {
throw new IllegalArgumentException("out of bounds percent value, must be in [0, 100]");
}
this.percent = percent / 100.0;
}

public void evaluate(ExprValue value) {
this.add(value.doubleValue());
}

@Override
public ExprValue result() {
return this.size() == 0 ? ExprNullValue.of() : doubleValue(this.quantile(percent));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ public enum BuiltinFunctionName {
STDDEV_POP(FunctionName.of("stddev_pop")),
// take top documents from aggregation bucket.
TAKE(FunctionName.of("take")),
// t-digest percentile which is used in OpenSearch core by default.
PERCENTILE_APPROX(FunctionName.of("percentile_approx")),
// Not always an aggregation query
NESTED(FunctionName.of("nested")),

Expand Down Expand Up @@ -279,6 +281,8 @@ public enum BuiltinFunctionName {
.put("stddev_pop", BuiltinFunctionName.STDDEV_POP)
.put("stddev_samp", BuiltinFunctionName.STDDEV_SAMP)
.put("take", BuiltinFunctionName.TAKE)
.put("percentile", BuiltinFunctionName.PERCENTILE_APPROX)
.put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX)
.build();

public static Optional<BuiltinFunctionName> of(String str) {
Expand Down
Loading
Loading