Skip to content

Commit

Permalink
ESQL: Drop null columns in text formats (elastic#117643)
Browse files Browse the repository at this point in the history
This PR resolves the issue where, despite setting `drop_null_columns=true`, columns that are entirely null are still returned when using `format=txt`, `format=csv`, or `format=tsv`.

Closes elastic#116848
  • Loading branch information
kanoshiou authored Dec 16, 2024
1 parent 2b8c494 commit 6d6eac2
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 31 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/117643.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 117643
summary: Drop null columns in text formats
area: ES|QL
type: bug
issues:
- 116848
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,7 @@ public void testAsyncGetWithoutContentType() throws IOException {
var json = entityToMap(entity, requestObject.contentType());
checkKeepOnCompletion(requestObject, json, true);
String id = (String) json.get("id");
// results won't be returned since keepOnCompletion is true
// results won't be returned because wait_for_completion is provided a very small interval
assertThat(id, is(not(emptyOrNullString())));

// issue an "async get" request with no Content-Type
Expand Down Expand Up @@ -1274,11 +1274,11 @@ static String runEsqlAsTextWithFormat(RequestObjectBuilder builder, String forma
switch (format) {
case "txt" -> assertThat(initialValue, emptyOrNullString());
case "csv" -> {
assertEquals(initialValue, "\r\n");
assertEquals("\r\n", initialValue);
initialValue = "";
}
case "tsv" -> {
assertEquals(initialValue, "\n");
assertEquals("\n", initialValue);
initialValue = "";
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
});
}

private boolean[] nullColumns() {
public boolean[] nullColumns() {
boolean[] nullColumns = new boolean[columns.size()];
for (int c = 0; c < nullColumns.length; c++) {
nullColumns[c] = allColumnsAreNull(c);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public enum TextFormat implements MediaType {
PLAIN_TEXT() {
@Override
public Iterator<CheckedConsumer<Writer, IOException>> format(RestRequest request, EsqlQueryResponse esqlResponse) {
return new TextFormatter(esqlResponse).format(hasHeader(request));
boolean dropNullColumns = request.paramAsBoolean(DROP_NULL_COLUMNS_OPTION, false);
return new TextFormatter(esqlResponse, hasHeader(request), dropNullColumns).format();
}

@Override
Expand Down Expand Up @@ -282,15 +283,21 @@ public Set<HeaderValue> headerValues() {
*/
public static final String URL_PARAM_FORMAT = "format";
public static final String URL_PARAM_DELIMITER = "delimiter";
public static final String DROP_NULL_COLUMNS_OPTION = "drop_null_columns";

public Iterator<CheckedConsumer<Writer, IOException>> format(RestRequest request, EsqlQueryResponse esqlResponse) {
final var delimiter = delimiter(request);
boolean dropNullColumns = request.paramAsBoolean(DROP_NULL_COLUMNS_OPTION, false);
boolean[] dropColumns = dropNullColumns ? esqlResponse.nullColumns() : new boolean[esqlResponse.columns().size()];
return Iterators.concat(
// if the header is requested return the info
hasHeader(request) && esqlResponse.columns() != null
? Iterators.single(writer -> row(writer, esqlResponse.columns().iterator(), ColumnInfo::name, delimiter))
? Iterators.single(writer -> row(writer, esqlResponse.columns().iterator(), ColumnInfo::name, delimiter, dropColumns))
: Collections.emptyIterator(),
Iterators.map(esqlResponse.values(), row -> writer -> row(writer, row, f -> Objects.toString(f, StringUtils.EMPTY), delimiter))
Iterators.map(
esqlResponse.values(),
row -> writer -> row(writer, row, f -> Objects.toString(f, StringUtils.EMPTY), delimiter, dropColumns)
)
);
}

Expand All @@ -313,9 +320,14 @@ public String contentType(RestRequest request) {
}

// utility method for consuming a row.
<F> void row(Writer writer, Iterator<F> row, Function<F, String> toString, Character delimiter) throws IOException {
<F> void row(Writer writer, Iterator<F> row, Function<F, String> toString, Character delimiter, boolean[] dropColumns)
throws IOException {
boolean firstColumn = true;
while (row.hasNext()) {
for (int i = 0; row.hasNext(); i++) {
if (dropColumns[i]) {
row.next();
continue;
}
if (firstColumn) {
firstColumn = false;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@ public class TextFormatter {
private final EsqlQueryResponse response;
private final int[] width;
private final Function<Object, String> FORMATTER = Objects::toString;
private final boolean includeHeader;
private final boolean[] dropColumns;

/**
* Create a new {@linkplain TextFormatter} for formatting responses.
* Create a new {@linkplain TextFormatter} for formatting responses
*/
public TextFormatter(EsqlQueryResponse response) {
public TextFormatter(EsqlQueryResponse response, boolean includeHeader, boolean dropNullColumns) {
this.response = response;
var columns = response.columns();
this.includeHeader = includeHeader;
this.dropColumns = dropNullColumns ? response.nullColumns() : new boolean[columns.size()];
// Figure out the column widths:
// 1. Start with the widths of the column names
width = new int[columns.size()];
Expand All @@ -58,19 +62,22 @@ public TextFormatter(EsqlQueryResponse response) {
}

/**
* Format the provided {@linkplain EsqlQueryResponse} optionally including the header lines.
* Format the provided {@linkplain EsqlQueryResponse}
*/
public Iterator<CheckedConsumer<Writer, IOException>> format(boolean includeHeader) {
public Iterator<CheckedConsumer<Writer, IOException>> format() {
return Iterators.concat(
// The header lines
includeHeader && response.columns().size() > 0 ? Iterators.single(this::formatHeader) : Collections.emptyIterator(),
includeHeader && response.columns().isEmpty() == false ? Iterators.single(this::formatHeader) : Collections.emptyIterator(),
// Now format the results.
formatResults()
);
}

private void formatHeader(Writer writer) throws IOException {
for (int i = 0; i < width.length; i++) {
if (dropColumns[i]) {
continue;
}
if (i > 0) {
writer.append('|');
}
Expand All @@ -86,6 +93,9 @@ private void formatHeader(Writer writer) throws IOException {
writer.append('\n');

for (int i = 0; i < width.length; i++) {
if (dropColumns[i]) {
continue;
}
if (i > 0) {
writer.append('+');
}
Expand All @@ -98,6 +108,10 @@ private Iterator<CheckedConsumer<Writer, IOException>> formatResults() {
return Iterators.map(response.values(), row -> writer -> {
for (int i = 0; i < width.length; i++) {
assert row.hasNext();
if (dropColumns[i]) {
row.next();
continue;
}
if (i > 0) {
writer.append('|');
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,17 @@ public void testTsvFormatWithEmptyData() {
public void testCsvFormatWithRegularData() {
String text = format(CSV, req(), regularData());
assertEquals("""
string,number,location,location2\r
Along The River Bank,708,POINT (12.0 56.0),POINT (1234.0 5678.0)\r
Mind Train,280,POINT (-97.0 26.0),POINT (-9753.0 2611.0)\r
string,number,location,location2,null_field\r
Along The River Bank,708,POINT (12.0 56.0),POINT (1234.0 5678.0),\r
Mind Train,280,POINT (-97.0 26.0),POINT (-9753.0 2611.0),\r
""", text);
}

public void testCsvFormatNoHeaderWithRegularData() {
String text = format(CSV, reqWithParam("header", "absent"), regularData());
assertEquals("""
Along The River Bank,708,POINT (12.0 56.0),POINT (1234.0 5678.0)\r
Mind Train,280,POINT (-97.0 26.0),POINT (-9753.0 2611.0)\r
Along The River Bank,708,POINT (12.0 56.0),POINT (1234.0 5678.0),\r
Mind Train,280,POINT (-97.0 26.0),POINT (-9753.0 2611.0),\r
""", text);
}

Expand All @@ -146,14 +146,17 @@ public void testCsvFormatWithCustomDelimiterRegularData() {
"number",
"location",
"location2",
"null_field",
"Along The River Bank",
"708",
"POINT (12.0 56.0)",
"POINT (1234.0 5678.0)",
"",
"Mind Train",
"280",
"POINT (-97.0 26.0)",
"POINT (-9753.0 2611.0)"
"POINT (-9753.0 2611.0)",
""
);
List<String> expectedTerms = terms.stream()
.map(x -> x.contains(String.valueOf(delim)) ? '"' + x + '"' : x)
Expand All @@ -167,6 +170,8 @@ public void testCsvFormatWithCustomDelimiterRegularData() {
sb.append(expectedTerms.remove(0));
sb.append(delim);
sb.append(expectedTerms.remove(0));
sb.append(delim);
sb.append(expectedTerms.remove(0));
sb.append("\r\n");
} while (expectedTerms.size() > 0);
assertEquals(sb.toString(), text);
Expand All @@ -175,9 +180,9 @@ public void testCsvFormatWithCustomDelimiterRegularData() {
public void testTsvFormatWithRegularData() {
String text = format(TSV, req(), regularData());
assertEquals("""
string\tnumber\tlocation\tlocation2
Along The River Bank\t708\tPOINT (12.0 56.0)\tPOINT (1234.0 5678.0)
Mind Train\t280\tPOINT (-97.0 26.0)\tPOINT (-9753.0 2611.0)
string\tnumber\tlocation\tlocation2\tnull_field
Along The River Bank\t708\tPOINT (12.0 56.0)\tPOINT (1234.0 5678.0)\t
Mind Train\t280\tPOINT (-97.0 26.0)\tPOINT (-9753.0 2611.0)\t
""", text);
}

Expand Down Expand Up @@ -245,6 +250,24 @@ public void testPlainTextEmptyCursorWithoutColumns() {
);
}

public void testCsvFormatWithDropNullColumns() {
String text = format(CSV, reqWithParam("drop_null_columns", "true"), regularData());
assertEquals("""
string,number,location,location2\r
Along The River Bank,708,POINT (12.0 56.0),POINT (1234.0 5678.0)\r
Mind Train,280,POINT (-97.0 26.0),POINT (-9753.0 2611.0)\r
""", text);
}

public void testTsvFormatWithDropNullColumns() {
String text = format(TSV, reqWithParam("drop_null_columns", "true"), regularData());
assertEquals("""
string\tnumber\tlocation\tlocation2
Along The River Bank\t708\tPOINT (12.0 56.0)\tPOINT (1234.0 5678.0)
Mind Train\t280\tPOINT (-97.0 26.0)\tPOINT (-9753.0 2611.0)
""", text);
}

private static EsqlQueryResponse emptyData() {
return new EsqlQueryResponse(singletonList(new ColumnInfoImpl("name", "keyword")), emptyList(), null, false, false, null);
}
Expand All @@ -256,7 +279,8 @@ private static EsqlQueryResponse regularData() {
new ColumnInfoImpl("string", "keyword"),
new ColumnInfoImpl("number", "integer"),
new ColumnInfoImpl("location", "geo_point"),
new ColumnInfoImpl("location2", "cartesian_point")
new ColumnInfoImpl("location2", "cartesian_point"),
new ColumnInfoImpl("null_field", "keyword")
);

BytesRefArray geoPoints = new BytesRefArray(2, BigArrays.NON_RECYCLING_INSTANCE);
Expand All @@ -274,7 +298,8 @@ private static EsqlQueryResponse regularData() {
blockFactory.newBytesRefBlockBuilder(2)
.appendBytesRef(CARTESIAN.asWkb(new Point(1234, 5678)))
.appendBytesRef(CARTESIAN.asWkb(new Point(-9753, 2611)))
.build()
.build(),
blockFactory.newConstantNullBlock(2)
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ public class TextFormatterTests extends ESTestCase {
new EsqlExecutionInfo(randomBoolean())
);

TextFormatter formatter = new TextFormatter(esqlResponse);

/**
* Tests for {@link TextFormatter#format} with header, values
* of exactly the minimum column size, column names of exactly
Expand All @@ -95,7 +93,7 @@ public class TextFormatterTests extends ESTestCase {
* column size.
*/
public void testFormatWithHeader() {
String[] result = getTextBodyContent(formatter.format(true)).split("\n");
String[] result = getTextBodyContent(new TextFormatter(esqlResponse, true, false).format()).split("\n");
assertThat(result, arrayWithSize(4));
assertEquals(
" foo | bar |15charwidename!| null_field1 |superduperwidename!!!| baz |"
Expand All @@ -119,6 +117,35 @@ public void testFormatWithHeader() {
);
}

/**
* Tests for {@link TextFormatter#format} with drop_null_columns and
* truncation of long columns.
*/
public void testFormatWithDropNullColumns() {
String[] result = getTextBodyContent(new TextFormatter(esqlResponse, true, true).format()).split("\n");
assertThat(result, arrayWithSize(4));
assertEquals(
" foo | bar |15charwidename!|superduperwidename!!!| baz |"
+ " date | location | location2 ",
result[0]
);
assertEquals(
"---------------+---------------+---------------+---------------------+---------------+-------"
+ "-----------------+------------------+----------------------",
result[1]
);
assertEquals(
"15charwidedata!|1 |6.888 |12.0 |rabbit |"
+ "1953-09-02T00:00:00.000Z|POINT (12.0 56.0) |POINT (1234.0 5678.0) ",
result[2]
);
assertEquals(
"dog |2 |123124.888 |9912.0 |goat |"
+ "2000-03-15T21:34:37.443Z|POINT (-97.0 26.0)|POINT (-9753.0 2611.0)",
result[3]
);
}

/**
* Tests for {@link TextFormatter#format} without header and
* truncation of long columns.
Expand Down Expand Up @@ -160,7 +187,7 @@ public void testFormatWithoutHeader() {
new EsqlExecutionInfo(randomBoolean())
);

String[] result = getTextBodyContent(new TextFormatter(response).format(false)).split("\n");
String[] result = getTextBodyContent(new TextFormatter(response, false, false).format()).split("\n");
assertThat(result, arrayWithSize(2));
assertEquals(
"doggie |4 |1.0 |null |77.0 |wombat |"
Expand Down Expand Up @@ -199,8 +226,10 @@ public void testVeryLongPadding() {
randomBoolean(),
randomBoolean(),
new EsqlExecutionInfo(randomBoolean())
)
).format(false)
),
false,
false
).format()
)
);
}
Expand Down

0 comments on commit 6d6eac2

Please sign in to comment.