Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String functions support and more TLP oracles #20

Merged
merged 6 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/sqlancer/common/ast/newast/NewOrderingTerm.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package sqlancer.common.ast.newast;

import java.util.Optional;

import sqlancer.Randomly;

public class NewOrderingTerm<T> implements Node<T> {

private final Node<T> expr;
private final Ordering ordering;
private final Optional<OrderingNulls> orderingNullsOptional;

public enum Ordering {
ASC, DESC;
Expand All @@ -15,9 +18,36 @@ public static Ordering getRandom() {
}
}

public enum OrderingNulls {
NULLS_FIRST, NULLS_LAST;

public static OrderingNulls getRandom() {
return Randomly.fromOptions(values());
}

@Override
public String toString() {
switch (this) {
case NULLS_FIRST:
return "NULLS FIRST";
case NULLS_LAST:
return "NULLS LAST";
default:
throw new AssertionError("Unreachable");
}
}
}

public NewOrderingTerm(Node<T> expr, Ordering ordering) {
this.expr = expr;
this.ordering = ordering;
this.orderingNullsOptional = Optional.empty();
}

public NewOrderingTerm(Node<T> expr, Ordering ordering, OrderingNulls orderingNulls) {
this.expr = expr;
this.ordering = ordering;
this.orderingNullsOptional = Optional.of(orderingNulls);
}

public Node<T> getExpr() {
Expand All @@ -28,4 +58,7 @@ public Ordering getOrdering() {
return ordering;
}

public Optional<OrderingNulls> getOrderingNullsOptional() {
return orderingNullsOptional;
}
}
4 changes: 4 additions & 0 deletions src/sqlancer/common/ast/newast/NewToStringVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public void visit(NewOrderingTerm<E> ordering) {
visit(ordering.getExpr());
sb.append(" ");
sb.append(ordering.getOrdering());
if (ordering.getOrderingNullsOptional().isPresent()) {
sb.append(" ");
sb.append(ordering.getOrderingNullsOptional().get());
}
}

public void visit(NewCaseOperatorNode<E> op) {
Expand Down
23 changes: 15 additions & 8 deletions src/sqlancer/datafusion/DataFusionErrors.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import static sqlancer.datafusion.DataFusionUtil.dfAssert;

import java.util.regex.Pattern;

import sqlancer.common.query.ExpectedErrors;

public final class DataFusionErrors {
Expand Down Expand Up @@ -32,23 +30,32 @@ public static void registerExpectedExecutionErrors(ExpectedErrors errors) {
errors.add("Divide by zero");
errors.add("Sort requires at least one column");
errors.add("The data type type Null has no natural order");
errors.add("Regular expression did not compile");
errors.add("Cannot cast value");
errors.add("regex parse error");
errors.add("Invalid string operation: List"); // select [1,2] like null;
errors.add("Unsupported CAST from List"); // not sure

/*
* Known bugs
*/
errors.add("to type Int64"); // https://github.com/apache/datafusion/issues/11249
errors.add("to type Int"); // https://github.com/apache/datafusion/issues/11249
errors.add("bitwise"); // https://github.com/apache/datafusion/issues/11260
errors.add(" Not all InterleaveExec children have a consistent hash partitioning."); // https://github.com/apache/datafusion/issues/11409
Pattern pattern = Pattern.compile("ORDER BY.*LOG", Pattern.CASE_INSENSITIVE);
errors.addRegex(pattern); // https://github.com/apache/datafusion/issues/11549
Pattern patternTriaFunc = Pattern.compile("ORDER BY.*\\b(ACOS|ACOSH|ASIN|ATANH)\\b", Pattern.CASE_INSENSITIVE);
errors.addRegex(patternTriaFunc); // https://github.com/apache/datafusion/issues/11552
errors.add("Sort expressions cannot be empty for streaming merge."); // https://github.com/apache/datafusion/issues/11561
errors.add("compute_utf8_flag_op_scalar failed to cast literal value NULL for operation"); // https://github.com/apache/datafusion/issues/11623
errors.add("Schema error: No field named"); // https://github.com/apache/datafusion/issues/11635

/*
* False positives
*/
errors.add("Cannot cast string"); // ifnull() is passed two non-compattable type and caused execution error
errors.add("Physical plan does not support logical expression AggregateFunction"); // False positive: when aggr
// is generated in where
// clause
/*
* Not critical, report later
*/
errors.add("does not match with the projection expression");
errors.add("invalid operator for nested");
}
}
18 changes: 17 additions & 1 deletion src/sqlancer/datafusion/DataFusionOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import sqlancer.datafusion.DataFusionOptions.DataFusionOracleFactory;
import sqlancer.datafusion.DataFusionProvider.DataFusionGlobalState;
import sqlancer.datafusion.test.DataFusionNoRECOracle;
import sqlancer.datafusion.test.DataFusionQueryPartitioningAggrTester;
import sqlancer.datafusion.test.DataFusionQueryPartitioningHavingTester;
import sqlancer.datafusion.test.DataFusionQueryPartitioningWhereTester;

@Parameters(commandDescription = "DataFusion")
Expand All @@ -22,7 +24,9 @@ public class DataFusionOptions implements DBMSSpecificOptions<DataFusionOracleFa

@Override
public List<DataFusionOracleFactory> getTestOracleFactory() {
return Arrays.asList(DataFusionOracleFactory.NOREC, DataFusionOracleFactory.QUERY_PARTITIONING_WHERE);
return Arrays.asList(DataFusionOracleFactory.NOREC, DataFusionOracleFactory.QUERY_PARTITIONING_WHERE
/* DataFusionOracleFactory.QUERY_PARTITIONING_AGGREGATE */
/* , DataFusionOracleFactory.QUERY_PARTITIONING_HAVING */);
}

public enum DataFusionOracleFactory implements OracleFactory<DataFusionGlobalState> {
Expand All @@ -37,6 +41,18 @@ public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalStat
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
return new DataFusionQueryPartitioningWhereTester(globalState);
}
},
QUERY_PARTITIONING_HAVING {
@Override
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
return new DataFusionQueryPartitioningHavingTester(globalState);
}
},
QUERY_PARTITIONING_AGGREGATE {
@Override
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
return new DataFusionQueryPartitioningAggrTester(globalState);
}
}
}

Expand Down
16 changes: 13 additions & 3 deletions src/sqlancer/datafusion/DataFusionSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ private static List<DataFusionColumn> getTableColumns(SQLConnection con, String

/*
* When adding a new type: 1. Update all methods inside this enum 2. Update all `DataFusionBaseExpr`'s signature, if
* it can support new type (in `DataFusionBaseExprFactory.java`
* it can support new type (in `DataFusionBaseExprFactory.java`)
*
* Types are 'SQL DataType' in DataFusion's documentation
* https://datafusion.apache.org/user-guide/sql/data_types.html
*/
public enum DataFusionDataType {

BIGINT, DOUBLE, BOOLEAN, NULL;
STRING, BIGINT, DOUBLE, BOOLEAN, NULL;

public static DataFusionDataType getRandomWithoutNull() {
DataFusionDataType dt;
Expand All @@ -102,6 +102,10 @@ public static DataFusionDataType getRandomWithoutNull() {
return dt;
}

public boolean isNumeric() {
return this == BIGINT || this == DOUBLE;
}

// How to parse type in DataFusion's catalog to `DataFusionDataType`
// As displayed in:
// create table t1(v1 int, v2 bigint);
Expand All @@ -114,6 +118,8 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
return DataFusionDataType.DOUBLE;
case "Boolean":
return DataFusionDataType.BOOLEAN;
case "Utf8":
return DataFusionDataType.STRING;
default:
dfAssert(false, "Unreachable. All branches should be eovered");
}
Expand All @@ -129,7 +135,9 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
}
switch (this) {
case BIGINT:
return DataFusionConstant.createIntConstant(state.getRandomly().getInteger());
long randInt = Randomly.getBoolean() ? state.getRandomly().getInteger()
: state.getRandomly().getInteger(-5, 5);
return DataFusionConstant.createIntConstant(randInt);
case BOOLEAN:
return new DataFusionConstant.DataFusionBooleanConstant(Randomly.getBoolean());
case DOUBLE:
Expand All @@ -147,6 +155,8 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
return new DataFusionConstant.DataFusionDoubleConstant(state.getRandomly().getDouble());
case NULL:
return DataFusionConstant.createNullConstant();
case STRING:
return new DataFusionConstant.DataFusionStringConstant(state.getRandomly().getString());
default:
dfAssert(false, "Unreachable. All branches should be eovered");
}
Expand Down
6 changes: 6 additions & 0 deletions src/sqlancer/datafusion/DataFusionToStringVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ private void visit(DataFusionConstant constant) {

private void visit(DataFusionSelect select) {
sb.append("SELECT ");
if (select.all && !select.distinct) {
sb.append("ALL ");
}
if (select.distinct) {
sb.append("DISTINCT ");
}
if (select.fetchColumnsString.isPresent()) {
sb.append(select.fetchColumnsString.get());
} else {
Expand Down
33 changes: 28 additions & 5 deletions src/sqlancer/datafusion/DataFusionUtil.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package sqlancer.datafusion;

import static java.lang.System.exit;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
Expand Down Expand Up @@ -67,12 +69,12 @@ public static String displayTables(DataFusionGlobalState state, List<String> fro
// During development, you might want to manually let this function call exit(1) to fail fast
public static void dfAssert(boolean condition, String message) {
if (!condition) {
// // Development mode assertion failure
// String methodName = Thread.currentThread().getStackTrace()[2]// .getMethodName();
// System.err.println("DataFusion assertion failed in function '" + methodName + "': " + message);
// exit(1);
// Development mode assertion failure
String methodName = Thread.currentThread().getStackTrace()[2].getMethodName();
System.err.println("DataFusion assertion failed in function '" + methodName + "': " + message);
exit(1);

throw new AssertionError(message);
// throw new AssertionError(message);
}
}

Expand Down Expand Up @@ -187,4 +189,25 @@ public enum DataFusionLogType {
ERROR, DML, SELECT
}
}

// Only used in TLP-Having
public static String cleanResultSetString(String value) {
if (value == null) {
return value;
}

switch (value) {
case "-0.0":
return "0.0";
case "-0":
return "0";
default:
}

if (value.getBytes().length > 7) {
return new String(value.getBytes(), 0, 7);
}

return value;
}
}
38 changes: 38 additions & 0 deletions src/sqlancer/datafusion/ast/DataFusionConstant.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,42 @@ public String toString() {

}

public static class DataFusionStringConstant extends DataFusionConstant {
private final String value;

public static String cleanString(String input) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
// Check if the character is a high surrogate
if (Character.isHighSurrogate(c)) {
if (i + 1 < input.length() && Character.isLowSurrogate(input.charAt(i + 1))) {
// It's a valid surrogate pair, add both to the string
sb.append(c);
sb.append(input.charAt(i + 1));
i++; // Skip the next character as it's part of the surrogate pair
}
} else if (!Character.isLowSurrogate(c) && !Character.isSurrogate(c)) {
// Add only if it's not a low surrogate or any standalone surrogate
sb.append(c);
}
}
return sb.toString();
}

public DataFusionStringConstant(String value) {
// cleanup invalid Utf8
this.value = cleanString(value.replace("'", "''"));
}

public String getValue() {
return value;
}

@Override
public String toString() {
return "'" + value + "'";
}

}
}
1 change: 0 additions & 1 deletion src/sqlancer/datafusion/ast/DataFusionExpression.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
package sqlancer.datafusion.ast;

public interface DataFusionExpression {

}
37 changes: 32 additions & 5 deletions src/sqlancer/datafusion/ast/DataFusionSelect.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,27 @@
import sqlancer.datafusion.gen.DataFusionExpressionGenerator;

public class DataFusionSelect extends SelectBase<Node<DataFusionExpression>> implements Node<DataFusionExpression> {
public boolean all; // SELECT ALL
public boolean distinct; // SELECT DISTINCT
public Optional<String> fetchColumnsString = Optional.empty(); // When available, override `fetchColumns` in base
// class's `Node` representation (for display)

// `from` is used to represent from table list and join clause
// `fromList` and `joinList` in base class should always be empty
public DataFusionFrom from;
public DataFusionExpressionGenerator exprGen;
// e.g. let's say all colummns are {c1, c2, c3, c4, c5}
// First randomly pick a subset say {c2, c1, c3, c4}
// `exprGenAll` can generate random expr using above 4 columns
//
// Next, randomly take two non-overlapping subset from all columns used by `exprGenAll`
// exprGenGroupBy: {c1} (randomly generate group by exprs using c1 only)
// exprGenAggregate: {c3, c4}
//
// Finally, use all `Gen`s to generate different clauses in a query (`exprGenAll` in where clause, `exprGenGroupBy`
// in group by clause, etc.)
public DataFusionExpressionGenerator exprGenAll;
public DataFusionExpressionGenerator exprGenGroupBy;
public DataFusionExpressionGenerator exprGenAggregate;

public enum JoinType {
INNER, LEFT, RIGHT, FULL, CROSS, NATURAL
Expand Down Expand Up @@ -145,6 +159,9 @@ public static DataFusionFrom generateFromClause(DataFusionGlobalState state,
// - [expr_aggr_cols] SUM(t3.v1 + t2.v1)
public static DataFusionSelect getRandomSelect(DataFusionGlobalState state) {
DataFusionSelect randomSelect = new DataFusionSelect();
if (Randomly.getBooleanWithRatherLowProbability()) {
randomSelect.all = true;
}

/* Setup FROM clause */
DataFusionSchema schema = state.getSchema(); // schema of all tables
Expand All @@ -156,14 +173,24 @@ public static DataFusionSelect getRandomSelect(DataFusionGlobalState state) {
}
DataFusionFrom randomFrom = DataFusionFrom.generateFromClause(state, randomTables);

/* Setup expression generators (to generate different clauses) */
List<DataFusionColumn> randomColumnsAll = DataFusionTable.getRandomColumns(randomTables);
// 0 <= splitPoint1 <= splitPoint2 < randomColumnsALl.size()
int splitPoint1 = state.getRandomly().getInteger(0, randomColumnsAll.size());
int splitPoint2 = state.getRandomly().getInteger(splitPoint1, randomColumnsAll.size());

randomSelect.exprGenAll = new DataFusionExpressionGenerator(state).setColumns(randomColumnsAll);
randomSelect.exprGenGroupBy = new DataFusionExpressionGenerator(state)
.setColumns(randomColumnsAll.subList(0, splitPoint1));
randomSelect.exprGenAggregate = new DataFusionExpressionGenerator(state)
.setColumns(randomColumnsAll.subList(splitPoint1, splitPoint2));

/* Setup WHERE clause */
List<DataFusionColumn> randomColumns = DataFusionTable.getRandomColumns(randomTables);
randomSelect.exprGen = new DataFusionExpressionGenerator(state).setColumns(randomColumns);
Node<DataFusionExpression> whereExpr = randomSelect.exprGen
Node<DataFusionExpression> whereExpr = randomSelect.exprGenAll
.generateExpression(DataFusionSchema.DataFusionDataType.BOOLEAN);

/* Constructing result */
List<Node<DataFusionExpression>> randomColumnNodes = randomColumns.stream()
List<Node<DataFusionExpression>> randomColumnNodes = randomColumnsAll.stream()
.map((c) -> new ColumnReferenceNode<DataFusionExpression, DataFusionColumn>(c))
.collect(Collectors.toList());

Expand Down
Loading
Loading