Skip to content

Commit

Permalink
Legend SQL - assortment of changes (finos#2532)
Browse files Browse the repository at this point in the history
* LegendSQL - assortment of changes
 - support aggregations on multi column within single expression
 - realias relations to ensure uniqueness across query

* SQLGrammarComposer - fix AllColumns
  • Loading branch information
gs-jp1 authored Jan 10, 2024
1 parent 088fea0 commit 90979aa
Show file tree
Hide file tree
Showing 9 changed files with 904 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public String visit(AliasedRelation val)
@Override
public String visit(AllColumns val)
{
return "*";
return val.prefix != null ? val.prefix + ".*" : "*";
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public void testEmptyStatement()
public void testSelectStar()
{
check("SELECT * FROM myTable");
check("SELECT myTable.* FROM myTable");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,13 @@ function <<access.private>> meta::external::query::sql::transformation::queryToP
^$final(aliases = $aliases);
}

function <<access.private>> meta::external::query::sql::transformation::queryToPure::extractAggregatesFromExpression(expression:meta::external::query::sql::metamodel::Expression[0..1]):meta::external::query::sql::metamodel::Expression[*]
function <<access.private>> meta::external::query::sql::transformation::queryToPure::extractAggregatesFromExpression(expression:meta::external::query::sql::metamodel::Expression[0..1]):FunctionCall[*]
{

walk($expression,
t | $t,
f:FunctionCall[1] | if (isExpressionAggregate($f, false), | $f, | $f.arguments->map(a | $a->extractAggregatesFromExpression()))
)
)->cast(@FunctionCall);
}

function <<access.private>> meta::external::query::sql::transformation::queryToPure::extractUsedColumnNames(expression:meta::external::query::sql::metamodel::Expression[*]):String[*]
Expand Down Expand Up @@ -493,55 +493,67 @@ function <<access.private>> meta::external::query::sql::transformation::queryToP
!extractNameFromExpression($h.expression, $context)->in($aggregateExpressionNames);
))->removeDuplicatesBy(c | $c->extractNameFromSingleColumn($context));

let aggregations = $allAggregateColumns->map(column |
let name = extractNameFromSingleColumn($column, $context);

let aggregateExpression = extractAggregatesFromExpression($column.expression);

let functionCalls = $aggregateExpression->match([
f:FunctionCall[1] | ensureAggregateFunction($f),
f:FunctionCall[1..*] |
// we can only support multiple aggs being used within a single expression if they use the case arguments
//e.g. case when sum(col) > 10 then max(col) else min(col) end
//to support more complex cases we need to extract the aggregates into separate columns
assertEquals(1, $f.arguments->removeDuplicates()->size(), 'unsupported aggregation');

$f->map(fc | ensureAggregateFunction($fc));,
e:meta::external::query::sql::metamodel::Expression[*] | fail('invalid aggregate expression');
])->cast(@FunctionCall);

// we take the first as the map only cares about the argument and we ensure they are the same above
let aggregationFunctionCall = $functionCalls->at(0);
let processor = aggregateProcessor($aggregationFunctionCall.name);
let adjusted = $processor.preProcess($column.expression, $aggregationFunctionCall);
//for any aggregate where there is multiple aggregations used with different arguments, we split the aggregates to individual columns and then extend
let aggregateColumnExpresions = $allAggregateColumns->map(column |
let aggregateExpressions = extractAggregatesFromExpression($column.expression)->removeDuplicates();
pair($column, list($aggregateExpressions));
)->groupBy(x | $x.second.values->map(a | $a.arguments->first())->removeDuplicates()->size() > 1);

let mapExpression = processMapFunctionArgument($adjusted.map, $context);
let mapReturnType = $mapExpression.genericType.rawType->toOne();
let mapFunctionType = functionType('row', TDSRow, PureOne, $mapReturnType, ZeroOne);
let mapLambda = lambda($mapFunctionType, $mapExpression);

let aggLambda = processAggregationLambda($adjusted.aggregate, $mapReturnType, $context);
let aggReturnType = $aggLambda->functionReturnType();
let standardAggregations = $aggregateColumnExpresions->get(false).values->map(p |
let column = $p.first;
let name = extractNameFromSingleColumn($column, $context);
createAggregation($column, $name, $p.second.values->cast(@FunctionCall)->at(0), $context);
);

let typeParameters = [^GenericType(rawType = $mapReturnType), $aggReturnType];
let extractedAggregations = $aggregateColumnExpresions->get(true).values.second.values->removeDuplicates()->map(agg |
createAggregation(^SingleColumn(expression = $agg), extractNameFromExpression($agg, $context), $agg, $context);
);

let genericType = ^GenericType(rawType = meta::pure::tds::AggregateValue, typeArguments = $typeParameters);
let aggExtensions = $aggregateColumnExpresions->get(true).values->map(p |
let aggregateExpressions = $p.second.values;
let column = $p.first;
^$column(expression = $p.first.expression->walk([
f:FunctionCall[1] |
if ($f->in($aggregateExpressions), | ^QualifiedNameReference(name = ^QualifiedName(parts = $f->extractNameFromExpression($context))), | $f);
])->toOne());
);

sfe(agg_String_1__FunctionDefinition_1__FunctionDefinition_1__AggregateValue_1_, $genericType, $typeParameters, [
iv($name), iv($mapLambda), iv($aggLambda)
]);
);
let aggregations = $standardAggregations->concatenate($extractedAggregations);

let aggregationInstanceValues = iv($aggregations, ^GenericType(rawType = meta::pure::tds::AggregateValue, typeArguments = [^GenericType(rawType = Any), ^GenericType(rawType = Any)]));
let aggregationInstanceValues = iv($aggregations, ^GenericType(rawType = meta::pure::tds::AggregateValue, typeArguments = [^GenericType(rawType = Any), ^GenericType(rawType = Any)]));

if ($allGroupByColumns->isEmpty() && $aggregations->isEmpty(),
let groupByExpression = if ($allGroupByColumns->isEmpty() && $aggregations->isEmpty(),
| $rename,
| if ($allGroupByColumns->isNotEmpty() && $aggregations->isEmpty(),
|
//group by with no aggregates is not compilable pure, so we simplify to restrict, distinct
let restrict = processRestrict($allGroupByColumns, ^$context(expression = $rename));
appendTdsFunc($restrict, distinct_TabularDataSet_1__TabularDataSet_1_, []);,
| appendTdsFunc($rename, groupBy_TabularDataSet_1__String_MANY__AggregateValue_MANY__TabularDataSet_1_, [list($allGroupByColumns), list($aggregationInstanceValues)])));

if ($aggExtensions->isNotEmpty(), | processExtend($aggExtensions, [], ^$context(expression = $groupByExpression)), | $groupByExpression);
}

function <<access.private>> meta::external::query::sql::transformation::queryToPure::createAggregation(column:SingleColumn[1], name:String[1], aggregationFunctionCall:FunctionCall[1], context:SqlTransformContext[1]):SimpleFunctionExpression[1]
{
let processor = aggregateProcessor($aggregationFunctionCall.name);
let adjusted = $processor.preProcess($column.expression, $aggregationFunctionCall);

let mapExpression = processMapFunctionArgument($adjusted.map, $context);
let mapReturnType = $mapExpression.genericType.rawType->toOne();
let mapFunctionType = functionType('row', TDSRow, PureOne, $mapReturnType, ZeroOne);
let mapLambda = lambda($mapFunctionType, $mapExpression);

let aggLambda = processAggregationLambda($adjusted.aggregate, $mapReturnType, $context);
let aggReturnType = $aggLambda->functionReturnType();

let typeParameters = [^GenericType(rawType = $mapReturnType), $aggReturnType];

let genericType = ^GenericType(rawType = meta::pure::tds::AggregateValue, typeArguments = $typeParameters);

sfe(agg_String_1__FunctionDefinition_1__FunctionDefinition_1__AggregateValue_1_, $genericType, $typeParameters, [
iv($name), iv($mapLambda), iv($aggLambda)
]);
}

function <<access.private>> meta::external::query::sql::transformation::queryToPure::processAggregateFunctionCall(functionCall:FunctionCall[1]):FunctionCall[1]
Expand Down Expand Up @@ -640,11 +652,12 @@ function <<access.private>> meta::external::query::sql::transformation::queryToP
}

//We currently need to ensure that all the functions used in group by are in fact aggregate functions. e.g. select count(*), pi() would not currently translate well to Pure.
function <<access.private>> meta::external::query::sql::transformation::queryToPure::ensureAggregateFunction(functionCall:FunctionCall[1]):FunctionCall[1]
function <<access.private>> meta::external::query::sql::transformation::queryToPure::ensureAggregateFunction(e:meta::external::query::sql::metamodel::Expression[1]):Boolean[1]
{
assert($e->instanceOf(FunctionCall), 'unsupported aggregation');
let functionCall = $e->cast(@FunctionCall);
let processor = functionProcessor($functionCall.name);
assert($processor.isAggregate, | 'function ' + extractNameFromQualifiedName($functionCall.name, []) + ' is not currently supported as aggregate function');
$functionCall;
}

function <<access.private>> meta::external::query::sql::transformation::queryToPure::processExtend(items: SelectItem[*], columnsToRealias:String[*], context: SqlTransformContext[1]):FunctionExpression[1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,14 @@ function <<test.Test>> meta::external::query::sql::transformation::queryToPure::
function <<test.Test>> meta::external::query::sql::transformation::queryToPure::tests::testGroupByAggregateExpressions():Boolean[1]
{
test(
'SELECT Integer, sum(1) AS "sum", count(*) AS "count", sum(Integer + Float) AS "exp", EXTRACT(YEAR FROM MAX("StrictDate")) AS "aggFunctionCall", MAX("StrictDate") + INTERVAL \'1 WEEK\' AS "dateAggMath" FROM service."/service/service1" GROUP BY Integer',
'SELECT ' +
'Integer, ' +
'sum(1) AS "sum", ' +
'count(*) AS "count", ' +
'sum(Integer + Float) AS "exp", ' +
'EXTRACT(YEAR FROM MAX("StrictDate")) AS "aggFunctionCall", ' +
'MAX("StrictDate") + INTERVAL \'1 WEEK\' AS "dateAggMath" ' +
'FROM service."/service/service1" GROUP BY Integer',

{| FlatInput.all()->project(
[ x | $x.booleanIn, x | $x.integerIn, x | $x.floatIn, x | $x.decimalIn, x | $x.strictDateIn, x | $x.dateTimeIn, x | $x.stringIn ],
Expand All @@ -815,8 +822,35 @@ function <<test.Test>> meta::external::query::sql::transformation::queryToPure::
agg('count', row | $row, y | $y->count()),
agg('exp', row | $row.getInteger('Integer') + $row.getFloat('Float'), y | $y->sum()),
agg('aggFunctionCall', row | $row.getStrictDate('StrictDate'), y | $y->max()->toOne()->year()),
agg('dateAggMath', row | $row.getStrictDate('StrictDate'), y | $y->max()->toOne()->adjust(1, DurationUnit.WEEKS))
agg('dateAggMath', row | $row.getStrictDate('StrictDate'), y | $y->max()->toOne()->adjust(1, DurationUnit.WEEKS))])
}, false)
}

function <<test.Test>> meta::external::query::sql::transformation::queryToPure::tests::testGroupByAggregateMixedExpressions():Boolean[1]
{
test(
'SELECT ' +
'Integer, ' +
'sum(Integer) AS "sum", ' +
'count(*) AS "count", ' +
'CASE WHEN sum(Integer) > sum(Float) THEN 1 ELSE 0 END AS "multiArg", ' +
'CASE WHEN sum(Integer) < count(Float) THEN 1 ELSE 0 END AS "multiArg2" ' +
'FROM service."/service/service1" GROUP BY Integer',

{| FlatInput.all()->project(
[ x | $x.booleanIn, x | $x.integerIn, x | $x.floatIn, x | $x.decimalIn, x | $x.strictDateIn, x | $x.dateTimeIn, x | $x.stringIn ],
[ 'Boolean', 'Integer', 'Float', 'Decimal', 'StrictDate', 'DateTime', 'String' ]
)->groupBy(['Integer'], [
agg('sum', row | $row.getInteger('Integer'), y | $y->sum()),
agg('count', row | $row, y | $y->count()),
agg('sum(Integer)', row | $row.getInteger('Integer'), y | $y->sum()),
agg('sum(Float)', row | $row.getFloat('Float'), y | $y->sum()),
agg('count(Float)', row | $row.getFloat('Float'), y | $y->count())
])->extend([
col(row:TDSRow[1] | if ($row.getInteger('sum(Integer)') > $row.getFloat('sum(Float)'), | 1, | 0), 'multiArg'),
col(row:TDSRow[1] | if ($row.getInteger('sum(Integer)') < $row.getInteger('count(Float)'), | 1, | 0), 'multiArg2')
])
->restrict(['Integer', 'sum', 'count', 'multiArg', 'multiArg2'])
}, false)
}

Expand Down
Loading

0 comments on commit 90979aa

Please sign in to comment.