Skip to content

Commit

Permalink
Unmute ValuesTests and reduce strings memory usage (elastic#111455)
Browse files Browse the repository at this point in the history
Strings tests were taking too much memory in the worst cases. And
VALUES() store all the values.

Fixes elastic#111428 Fixes
elastic#111429
  • Loading branch information
ivancea authored Jul 31, 2024
1 parent cd5a823 commit 1837133
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 43 deletions.
6 changes: 0 additions & 6 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,6 @@ tests:
issue: https://github.com/elastic/elasticsearch/issues/111280
- class: org.elasticsearch.xpack.repositories.metering.azure.AzureRepositoriesMeteringIT
issue: https://github.com/elastic/elasticsearch/issues/111307
- class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests
method: testGroupingAggregate {TestCase=<long unicode TEXTs>}
issue: https://github.com/elastic/elasticsearch/issues/111429
- class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests
method: testGroupingAggregate {TestCase=<long unicode KEYWORDs>}
issue: https://github.com/elastic/elasticsearch/issues/111428
- class: org.elasticsearch.xpack.restart.FullClusterRestartIT
method: testSingleDoc {cluster=UPGRADED}
issue: https://github.com/elastic/elasticsearch/issues/111434
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,36 +469,61 @@ public static List<TypedDataSupplier> cartesianPointCases(int minRows, int maxRo
}

public static List<TypedDataSupplier> stringCases(int minRows, int maxRows, DataType type) {
return List.of(
new TypedDataSupplier("<empty " + type + "s>", () -> randomList(minRows, maxRows, () -> new BytesRef("")), type, false, true),
new TypedDataSupplier(
"<short alpha " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(1, 30))),
type,
false,
true
),
new TypedDataSupplier(
"<long alpha " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(300, 3000))),
type,
false,
true
),
new TypedDataSupplier(
"<short unicode " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(1, 30))),
type,
false,
true
),
new TypedDataSupplier(
"<long unicode " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(300, 3000))),
type,
false,
true
List<TypedDataSupplier> cases = new ArrayList<>();

cases.addAll(
List.of(
new TypedDataSupplier(
"<empty " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef("")),
type,
false,
true
),
new TypedDataSupplier(
"<short alpha " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(1, 30))),
type,
false,
true
),
new TypedDataSupplier(
"<short unicode " + type + "s>",
() -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(1, 30))),
type,
false,
true
)
)
);

if (minRows <= 100) {
var longStringsMaxRows = Math.min(maxRows, 100);

cases.addAll(
List.of(
new TypedDataSupplier(
"<long alpha " + type + "s>",
() -> randomList(minRows, longStringsMaxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(300, 1000))),
type,
false,
true
),
new TypedDataSupplier(
"<long unicode " + type + "s>",
() -> randomList(
minRows,
longStringsMaxRows,
() -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(300, 1000))
),
type,
false,
true
)
)
);
}

return cases;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.booleanCases(1, 1000),
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.versionCases(1, 1000),
// Lower values for strings, as they take more space and may trigger the circuit breaker
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
).flatMap(List::stream).forEach(fieldCaseSupplier -> {
// With precision
for (var precisionCaseSupplier : precisionSuppliers) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.versionCases(1, 1000),
MultiRowTestCaseSupplier.geoPointCases(1, 1000, true),
MultiRowTestCaseSupplier.cartesianPointCases(1, 1000, true),
// Lower values for strings, as they take more space and may trigger the circuit breaker
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
).flatMap(List::stream).map(CountTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

// No rows
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.versionCases(1, 1000),
// Lower values for strings, as they take more space and may trigger the circuit breaker
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.TEXT)
).flatMap(List::stream).map(ValuesTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
Expand Down

0 comments on commit 1837133

Please sign in to comment.