Skip to content

Commit

Permalink
ESQL: tests for LOOKUP JOIN with non-unique join keys (elastic#118471)
Browse files Browse the repository at this point in the history
Add a csv dataset and tests for `LOOKUP JOIN` where the join keys are
not unique. In particular, add tests that include MVs and nulls to see
how `LOOKUP JOIN` treats these.
  • Loading branch information
alex-spies authored Dec 13, 2024
1 parent a765f89 commit ccdea4a
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public class CsvTestsDataLoader {
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
.withSetting("languages_lookup-settings.json");
private static final TestsDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key")
.withData("languages_non_unique_key.csv");
private static final TestsDataset ALERTS = new TestsDataset("alerts");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
Expand Down Expand Up @@ -114,6 +116,7 @@ public class CsvTestsDataLoader {
Map.entry(APPS_SHORT.indexName, APPS_SHORT),
Map.entry(LANGUAGES.indexName, LANGUAGES),
Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
language_code:integer,language_name:keyword,country:keyword
1,English,Canada
1,English,
1,,United Kingdom
1,English,United States of America
2,German,[Germany,Austria]
2,German,Switzerland
2,German,
4,Quenya,
5,,Atlantis
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// Reuses the sample dataset and commands from enrich.csv-spec
//

//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
basicOnTheDataNode
required_capability: join_lookup_v5

Expand Down Expand Up @@ -102,6 +101,83 @@ emp_no:integer | language_code:integer | language_name:keyword
10003 | 4 | German
;

nonUniqueLeftKeyOnTheDataNode
required_capability: join_lookup_v5

FROM employees
| WHERE emp_no <= 10030
| EVAL language_code = emp_no % 10
| WHERE language_code < 3
| LOOKUP JOIN languages_lookup ON language_code
| SORT emp_no
| KEEP emp_no, language_code, language_name
;

emp_no:integer | language_code:integer | language_name:keyword
10001 |1 | English
10002 |2 | French
10010 |0 | null
10011 |1 | English
10012 |2 | French
10020 |0 | null
10021 |1 | English
10022 |2 | French
10030 |0 | null
;

nonUniqueRightKeyOnTheDataNode
required_capability: join_lookup_v5

FROM employees
| EVAL language_code = emp_no % 10
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| WHERE emp_no > 10090 AND emp_no < 10096
| SORT emp_no
| EVAL country = MV_SORT(country)
| KEEP emp_no, language_code, language_name, country
;

emp_no:integer | language_code:integer | language_name:keyword | country:keyword
10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
10093 | 3 | null | null
10094 | 4 | Quenya | null
10095 | 5 | null | Atlantis
;

nonUniqueRightKeyOnTheCoordinator
required_capability: join_lookup_v5

FROM employees
| SORT emp_no
| LIMIT 5
| EVAL language_code = emp_no % 10
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| EVAL country = MV_SORT(country)
| KEEP emp_no, language_code, language_name, country
;

emp_no:integer | language_code:integer | language_name:keyword | country:keyword
10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
10003 | 3 | null | null
10004 | 4 | Quenya | null
10005 | 5 | null | Atlantis
;

nonUniqueRightKeyFromRow
required_capability: join_lookup_v5

ROW language_code = 2
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| DROP country.keyword
| EVAL country = MV_SORT(country)
;

language_code:integer | language_name:keyword | country:keyword
2 | [German, German, German] | [Austria, Germany, Switzerland]
;

lookupIPFromRow
required_capability: join_lookup_v5

Expand Down

0 comments on commit ccdea4a

Please sign in to comment.