Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Union query result should work in JDBC format #2757

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,103 @@ public void selectFunctionAsFieldTest() throws IOException {
Assert.assertEquals(1, headers.size());
}

@Test
public void unionTest() throws IOException {
String query =
String.format(
Locale.ROOT,
"SELECT firstname, lastname FROM %s LIMIT 3 "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does UNION works for query with aggregation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, UNION with Agg in legacy engine has bugs by my testing whatever format set. This fixing doesn't cover all existed bugs. You can try below queries.

POST _plugins/_sql?format=csv
{
  "query": "select customer_gender,count(type) from opensearch_dashboards_sample_data_ecommerce group by customer_gender UNION select customer_gender,count(type) from opensearch_dashboards_sample_data_ecommerce group by customer_gender"
}
POST _plugins/_sql?format=csv
{
  "query": "select count(type) from opensearch_dashboards_sample_data_ecommerce group by customer_gender UNION select count(type) from opensearch_dashboards_sample_data_ecommerce group by customer_gender"
}

May be we could open another ticket to fix the problems in Union with Agg? Or migrating to v2?

Copy link
Member Author

@LantaoJin LantaoJin Jun 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And seems current implementation doesn't distinguish UNION and UNION ALL. A better way is to re-implement UNION in v2, PPL also requests multi-search implementation.
Again, this fixing is just for addressing the NPE problem of UNION query in JDBC format (such a common use case).

+ "UNION ALL SELECT firstname, lastname FROM %s LIMIT 3",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT);
CSVResult result = executeCsvRequest(query, false, false, false);
List<String> headers = result.getHeaders();
Assert.assertEquals(2, headers.size());
Assert.assertTrue(headers.contains("firstname"));
Assert.assertTrue(headers.contains("lastname"));

List<String> lines = result.getLines();
Assert.assertEquals(6, lines.size());
assertEquals(lines.get(0), "Amber,Duke");
assertEquals(lines.get(1), "Hattie,Bond");
assertEquals(lines.get(2), "Nanette,Bates");
assertEquals(lines.get(0), "Amber,Duke");
assertEquals(lines.get(1), "Hattie,Bond");
assertEquals(lines.get(2), "Nanette,Bates");
}

@Test
public void unionWithAliasLeftTest() throws IOException {
String query =
String.format(
Locale.ROOT,
"SELECT lastname AS firstname FROM %s LIMIT 3 "
+ "UNION ALL SELECT firstname FROM %s LIMIT 3",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT);
CSVResult result = executeCsvRequest(query, false, false, false);
List<String> headers = result.getHeaders();
Assert.assertEquals(1, headers.size());
Assert.assertTrue(headers.contains("firstname"));

List<String> lines = result.getLines();
Assert.assertEquals(6, lines.size());
assertEquals(lines.get(0), "Duke");
assertEquals(lines.get(1), "Bond");
assertEquals(lines.get(2), "Bates");
assertEquals(lines.get(3), "Amber");
assertEquals(lines.get(4), "Hattie");
assertEquals(lines.get(5), "Nanette");
}

@Test
public void unionWithAliasRightTest() throws IOException {
String query =
String.format(
Locale.ROOT,
"SELECT firstname FROM %s LIMIT 3 "
+ "UNION ALL SELECT lastname AS firstname FROM %s LIMIT 3",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT);
CSVResult result = executeCsvRequest(query, false, false, false);
List<String> headers = result.getHeaders();
Assert.assertEquals(1, headers.size());
Assert.assertTrue(headers.contains("firstname"));

List<String> lines = result.getLines();
Assert.assertEquals(6, lines.size());
assertEquals(lines.get(0), "Amber");
assertEquals(lines.get(1), "Hattie");
assertEquals(lines.get(2), "Nanette");
assertEquals(lines.get(3), "Duke");
assertEquals(lines.get(4), "Bond");
assertEquals(lines.get(5), "Bates");
}

@Test
public void unionWithAliasBothSideTest() throws IOException {
String query =
String.format(
Locale.ROOT,
"SELECT firstname AS name FROM %s LIMIT 3 "
+ "UNION ALL SELECT lastname AS name FROM %s LIMIT 3",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT);
CSVResult result = executeCsvRequest(query, false, false, false);
List<String> headers = result.getHeaders();
Assert.assertEquals(1, headers.size());
Assert.assertTrue(headers.contains("name"));

List<String> lines = result.getLines();
Assert.assertEquals(6, lines.size());
assertEquals(lines.get(0), "Amber");
assertEquals(lines.get(1), "Hattie");
assertEquals(lines.get(2), "Nanette");
assertEquals(lines.get(3), "Duke");
assertEquals(lines.get(4), "Bond");
assertEquals(lines.get(5), "Bates");
}

private void verifyFieldOrder(final String[] expectedFields) throws IOException {

final String fields = String.join(", ", expectedFields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,31 @@ public void unionAllSameRequestOnlyOneRecordTwice() throws IOException {
}

@Test
public void unionAllOnlyOneRecordEachWithAlias() throws IOException {
public void unionAllOnlyOneRecordWithAliasLeft() throws IOException {
String query =
String.format(
"SELECT firstname as dog_name FROM %s WHERE firstname = 'Amber' "
+ "UNION ALL "
+ "SELECT dog_name FROM %s WHERE dog_name = 'rex'",
TestsConstants.TEST_INDEX_ACCOUNT, TestsConstants.TEST_INDEX_DOG);

JSONObject response = executeQuery(query);
assertThat(getHits(response).length(), equalTo(2));

Set<String> names = new HashSet<>();
JSONArray hits = getHits(response);
for (int i = 0; i < hits.length(); i++) {
JSONObject hit = hits.getJSONObject(i);
JSONObject source = getSource(hit);

names.add(source.getString("dog_name"));
}

assertThat(names, hasItems("Amber", "rex"));
}

@Test
public void unionAllOnlyOneRecordWithAliasRight() throws IOException {
String query =
String.format(
"SELECT firstname FROM %s WHERE firstname = 'Amber' "
Expand All @@ -81,6 +105,30 @@ public void unionAllOnlyOneRecordEachWithAlias() throws IOException {
assertThat(names, hasItems("Amber", "rex"));
}

@Test
public void unionAllOnlyOneRecordWithAliasBothSide() throws IOException {
String query =
String.format(
"SELECT firstname AS name FROM %s WHERE firstname = 'Amber' "
+ "UNION ALL "
+ "SELECT dog_name AS name FROM %s WHERE dog_name = 'rex'",
TestsConstants.TEST_INDEX_ACCOUNT, TestsConstants.TEST_INDEX_DOG);

JSONObject response = executeQuery(query);
assertThat(getHits(response).length(), equalTo(2));

Set<String> names = new HashSet<>();
JSONArray hits = getHits(response);
for (int i = 0; i < hits.length(); i++) {
JSONObject hit = hits.getJSONObject(i);
JSONObject source = getSource(hit);

names.add(source.getString("name"));
}

assertThat(names, hasItems("Amber", "rex"));
}

@Test
public void unionAllOnlyOneRecordEachWithComplexAlias() throws IOException {
String query =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,80 @@ public void fieldOrderOther() throws IOException {
testFieldOrder(expectedFields, expectedValues);
}

@Test
public void unionQuery() throws IOException {
JSONObject response =
executeQuery(
String.format(
Locale.ROOT,
"SELECT firstname, lastname FROM %s "
+ "UNION ALL SELECT firstname, lastname FROM %s",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT));

List<String> fields = Arrays.asList("firstname", "lastname");
JSONArray dataRows = getDataRows(response);
assertContainsColumns(getSchema(response), fields);
assertContainsData(dataRows, fields);
}

@Test
public void unionQueryWithAliasLeft() throws IOException {
JSONObject response =
executeQuery(
String.format(
Locale.ROOT,
"SELECT lastname AS firstname FROM %s UNION ALL SELECT firstname FROM %s",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT));
List<String> fields = List.of("lastname");
Map<String, String> aliases = new HashMap<>();
aliases.put("lastname", "firstname");
JSONArray schema = getSchema(response);
JSONArray dataRows = getDataRows(response);
assertContainsColumns(schema, fields);
assertContainsAliases(schema, aliases);
assertContainsData(dataRows, fields);
}

@Test
public void unionQueryWithAliasRight() throws IOException {
JSONObject response =
executeQuery(
String.format(
Locale.ROOT,
"SELECT firstname FROM %s UNION ALL SELECT lastname AS firstname FROM %s",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT));
List<String> fields = List.of("firstname");
JSONArray schema = getSchema(response);
JSONArray dataRows = getDataRows(response);
assertContainsColumns(schema, fields);
// Query schema uses first subquery schema, so alias in second subquery doesn't count in.
assertNoAlias(schema);
assertContainsData(dataRows, fields);
}

@Test
public void unionQueryWithAliasBothSide() throws IOException {
JSONObject response =
executeQuery(
String.format(
Locale.ROOT,
"SELECT firstname AS name FROM %s UNION ALL SELECT lastname AS name FROM %s",
TestsConstants.TEST_INDEX_ACCOUNT,
TestsConstants.TEST_INDEX_ACCOUNT));
List<String> fields = List.of("firstname");
Map<String, String> aliases = new HashMap<>();
aliases.put("firstname", "name");
aliases.put("lastname", "name");
JSONArray schema = getSchema(response);
JSONArray dataRows = getDataRows(response);
assertContainsColumns(schema, fields);
assertContainsAliases(schema, aliases);
assertContainsData(dataRows, fields);
}

private void testFieldOrder(final String[] expectedFields, final Object[] expectedValues)
throws IOException {

Expand Down Expand Up @@ -644,6 +718,13 @@ private void assertContainsAliases(JSONArray schema, Map<String, String> aliases
}
}

private void assertNoAlias(JSONArray schema) {
for (int i = 0; i < schema.length(); i++) {
JSONObject column = schema.getJSONObject(i);
assertFalse(column.has("alias"));
}
}

private void assertContainsData(JSONArray dataRows, Collection<String> fields) {
assertThat(dataRows.length(), greaterThan(0));
JSONArray row = dataRows.getJSONArray(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public class Select extends Query {
private List<SubQueryExpression> subQueries;
private boolean selectAll = false;
private JoinType nestedJoinType = JoinType.COMMA;
private boolean partOfUnion = false;

public boolean isQuery = false;
public boolean isAggregate = false;
Expand Down Expand Up @@ -187,4 +188,13 @@ public boolean isOrderdSelect() {
public boolean isSelectAll() {
return selectAll;
}

public void setPartOfUnion(boolean partOfUnion) {
this.partOfUnion = partOfUnion;
}

/** Return true is this SELECT is used in UNION */
public boolean isPartOfUnion() {
return partOfUnion;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.legacy.domain;

import lombok.Getter;

@Getter
public class Union extends Query {
private final Select firstTable;
private final Select secondTable;

public Union(Select firstTable, Select secondTable) {
this.firstTable = firstTable;
this.secondTable = secondTable;
this.firstTable.setPartOfUnion(true);
this.secondTable.setPartOfUnion(true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.opensearch.sql.legacy.domain.IndexStatement;

public class Schema implements Iterable<Schema.Column> {

private String indexName;
private List<Column> columns;

Expand Down Expand Up @@ -44,7 +43,7 @@ public String getIndexName() {
}

public List<String> getHeaders() {
return columns.stream().map(column -> column.getName()).collect(Collectors.toList());
return columns.stream().map(Column::getIdentifier).collect(Collectors.toList());
}

public List<Column> getColumns() {
Expand Down Expand Up @@ -166,5 +165,21 @@ public String getIdentifier() {
public Type getEnumType() {
return type;
}

@Override
public String toString() {
return "Column{"
+ "name='"
+ name
+ '\''
+ ", alias='"
+ alias
+ '\''
+ ", type="
+ type
+ ", identifiedByAlias="
+ identifiedByAlias
+ '}';
}
}
}
Loading
Loading