From 4254ca376fb1948daba730315ff0a988374d1c3c Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 4 Dec 2024 13:00:59 -0500 Subject: [PATCH 1/3] Kderusso/sparse vector ci failure (#117930) (#118014) * Fix CI failure in SparseVectorQueryBuilderTests --- muted-tests.yml | 3 --- .../xpack/core/ml/search/SparseVectorQueryBuilderTests.java | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 7c82b1f0c1b92..a09952e878315 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -411,6 +411,3 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117805 - class: org.elasticsearch.xpack.security.authc.ldap.UserAttributeGroupsResolverTests issue: https://github.com/elastic/elasticsearch/issues/116537 -- class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests - method: testToQuery - issue: https://github.com/elastic/elasticsearch/issues/117998 diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java index b5296bef05b77..7774b29bfc971 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java @@ -232,6 +232,12 @@ public void testToQuery() throws IOException { private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutionContext context) throws IOException { Query query = queryBuilder.doToQuery(context); + + // test query builder can randomly have no vectors, which rewrites to a MatchNoneQuery - nothing more to do in this case. + if (query instanceof MatchNoDocsQuery) { + return; + } + assertTrue(query instanceof SparseVectorQueryWrapper); var sparseQuery = (SparseVectorQueryWrapper) query; if (queryBuilder.shouldPruneTokens()) { From ffea5a65fcffe98acb843db4430562d3166c6760 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 4 Dec 2024 14:38:01 -0500 Subject: [PATCH 2/3] Indicate that rescore isn't allowed with retrievers, yet (#118019) (#118022) --- docs/reference/search/retriever.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index b90b7e312c790..cb04d4fb6fbf1 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -765,11 +765,11 @@ clauses in a <>. [[retriever-restrictions]] ==== Restrictions on search parameters when specifying a retriever -When a retriever is specified as part of a search, the following elements are not allowed at the top-level. -Instead they are only allowed as elements of specific retrievers: +When a retriever is specified as part of a search, the following elements are not allowed at the top-level: * <> * <> * <> * <> * <> +* <> From 570a8cb9f77216ac6daed626a21b0593096d27de Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 4 Dec 2024 15:35:18 -0500 Subject: [PATCH 3/3] ESQL: Limit size of query (#117898) (#118024) Queries bigger than a mb tend to take a lot of memory. In the worse case it's an astounding amount of memory. --- docs/changelog/117898.yaml | 5 +++++ .../xpack/esql/heap_attack/HeapAttackIT.java | 21 +++++++++++++++++++ .../xpack/esql/parser/EsqlParser.java | 17 ++++++++++++++- .../xpack/esql/analysis/ParsingTests.java | 8 +++++++ 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/117898.yaml diff --git a/docs/changelog/117898.yaml b/docs/changelog/117898.yaml new file mode 100644 index 0000000000000..c60061abc49ff --- /dev/null +++ b/docs/changelog/117898.yaml @@ -0,0 +1,5 @@ +pr: 117898 +summary: Limit size of query +area: ES|QL +type: bug +issues: [] diff --git a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java index 8b9176a346e30..ace3db377664c 100644 --- a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java +++ b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java @@ -194,6 +194,13 @@ private void assertCircuitBreaks(ThrowingRunnable r) throws IOException { ); } + private void assertParseFailure(ThrowingRunnable r) throws IOException { + ResponseException e = expectThrows(ResponseException.class, r); + Map map = responseAsMap(e.getResponse()); + logger.info("expected parse failure {}", map); + assertMap(map, matchesMap().entry("status", 400).entry("error", matchesMap().extraOk().entry("type", "parsing_exception"))); + } + private Response sortByManyLongs(int count) throws IOException { logger.info("sorting by {} longs", count); return query(makeSortByManyLongs(count).toString(), null); @@ -318,6 +325,13 @@ public void testManyConcatFromRow() throws IOException { assertManyStrings(resp, strings); } + /** + * Fails to parse a huge huge query. + */ + public void testHugeHugeManyConcatFromRow() throws IOException { + assertParseFailure(() -> manyConcat("ROW a=9999, b=9999, c=9999, d=9999, e=9999", 50000)); + } + /** * Tests that generate many moderately long strings. */ @@ -378,6 +392,13 @@ public void testManyRepeatFromRow() throws IOException { assertManyStrings(resp, strings); } + /** + * Fails to parse a huge huge query. + */ + public void testHugeHugeManyRepeatFromRow() throws IOException { + assertParseFailure(() -> manyRepeat("ROW a = 99", 100000)); + } + /** * Tests that generate many moderately long strings. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java index 620a25e0170ea..2e55b4df1e223 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java @@ -33,6 +33,15 @@ public class EsqlParser { private static final Logger log = LogManager.getLogger(EsqlParser.class); + /** + * Maximum number of characters in an ESQL query. Antlr may parse the entire + * query into tokens to make the choices, buffering the world. There's a lot we + * can do in the grammar to prevent that, but let's be paranoid and assume we'll + * fail at preventing antlr from slurping in the world. Instead, let's make sure + * that the world just isn't that big. + */ + public static final int MAX_LENGTH = 1_000_000; + private EsqlConfig config = new EsqlConfig(); public EsqlConfig config() { @@ -60,8 +69,14 @@ private T invokeParser( Function parseFunction, BiFunction result ) { + if (query.length() > MAX_LENGTH) { + throw new org.elasticsearch.xpack.esql.core.ParsingException( + "ESQL statement is too large [{} characters > {}]", + query.length(), + MAX_LENGTH + ); + } try { - // new CaseChangingCharStream() EsqlBaseLexer lexer = new EsqlBaseLexer(CharStreams.fromString(query)); lexer.removeErrorListeners(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java index 3cafd42b731f6..68529e99c6b1b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java @@ -103,6 +103,14 @@ public void testInlineCast() throws IOException { logger.info("Wrote to file: {}", file); } + public void testTooBigQuery() { + StringBuilder query = new StringBuilder("FROM foo | EVAL a = a"); + while (query.length() < EsqlParser.MAX_LENGTH) { + query.append(", a = CONCAT(a, a)"); + } + assertEquals("-1:0: ESQL statement is too large [1000011 characters > 1000000]", error(query.toString())); + } + private String functionName(EsqlFunctionRegistry registry, Expression functionCall) { for (FunctionDefinition def : registry.listFunctions()) { if (functionCall.getClass().equals(def.clazz())) {