-
Notifications
You must be signed in to change notification settings - Fork 41
Sql4DCompiler
srikalyan chandrashekar edited this page Jul 29, 2014
·
13 revisions
A SQL language for generating druid queries.
Examples
- GroupBy with granularity day
SELECT COUNT(*) AS rows, LONG_SUM(cnt) AS edit_count, DOUBLE_SUM(added) AS chars_added FROM wikipedia WHERE
interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 BREAK BY 'day' GROUP BY rows, edit_count
- GroupBy with Having clause.
SELECT COUNT(*) AS rows, UNIQUE(added) AS chars_added FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 GROUP BY namespace, type HAVING rows > 30
- GroupBy with complex Having clause.
SELECT COUNT(*) AS rows, LONG_SUM(cnt) AS edit_count, DOUBLE_SUM(added) AS chars_added FROM wikipedia WHERE
interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 GROUP BY rows, edit_count HAVING namespace = 10 AND rows > 30
- TopN on the metric 'edit_count' with granularity period 1d and EST
SELECT COUNT(*) AS rows, LONG_SUM(cnt) AS edit_count, DOUBLE_SUM(added) AS chars_added FROM wikipedia WHERE
interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 BREAK BY PERIOD('P1D', 'EST5EDT') GROUP BY rows, edit_count
ORDER BY edit_count LIMIT 10 ;
- GroupBy with complex Post Aggregation.
SELECT COUNT(*) AS rows, DOUBLE_SUM(total) AS tot FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 GROUP BY dimension1, dimension2 THEN (100 / ((total AS tot / rows AS rows) AS avgRows)) AS
average;
- Another GroupBy with complex Post Aggregation
SELECT COUNT(*) AS rows, DOUBLE_SUM(total) AS tot FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 GROUP BY rows, tot THEN ((UNIQUE(unique_users) + (100 * rows)) / 34) AS rows;
- GroupBy with regex filter.
SELECT COUNT(*) AS rows, DOUBLE_SUM(total) AS tot FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 AND rows LIKE '%tetete%' GROUP BY rows, tot;
- GroupBy with complex filter
SELECT COUNT(*) AS rows, DOUBLE_SUM(total) AS tot FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 AND (dam = 10) AND (rows LIKE '%tetete%') GROUP BY rows, tot;
- GroupBy with more complex filter
SELECT COUNT(*) AS rows, DOUBLE_SUM(total) AS tot FROM wikipedia WHERE interval BETWEEN 2010-01-01T00:00
AND 2020-01-01T00 AND ((dam = 10) AND (rows LIKE '%tetete%')) OR NOT (tot =34) GROUP BY rows, tot;
- GroupBy with javascript based post aggregation
SELECT LONG_SUM(headline_views) AS headline_views, LONG_SUM(content_views) AS fullContentViews,
LONG_SUM(shares) AS shares, UNIQUE(unique_content_count) AS unique_content_count, MAX(ISOTimestamp) AS max
FROM tp042 WHERE interval BETWEEN 2014-03-09T01:00:00.000-05:00 AND 2014-03-10T22:00:01.000-04:00 AND
provider_id='someprovider' AND content_type='cavideo' BREAK BY PERIOD('P1D', 'EST5EDT') GROUP BY
headline_views THEN javascript:'sharesPer1000FullContentViews(fullContentViews, shares) { return (1000 * shares
/ fullContentViews).toFixed(2);}' HINT('timeseries');
- Search with single keyword
SELECT a, b FROM wiki WHERE interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 WHICH CONTAINS('somestuff')
- Search with multiple keywords
SELECT a, b FROM wiki WHERE interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 WHICH CONTAINS('somestuff',
'anotherstuff')
- Search with keyword and sort by lexical order
SELECT a, b FROM wiki WHERE interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 WHICH CONTAINS('somestuff')
SORT('lexicographic')
- Search with keyword and Sort by string length
SELECT a, b FROM wiki WHERE interval BETWEEN 2010-01-01T00:00 AND 2020-01-01T00 WHICH CONTAINS('somestuff')
SORT('strlen')
- Timeseries with micro time ranges
SELECT LONG_SUM(all_content_seen) AS content_seen FROM UniqueCountTable WHERE interval BETWEEN (['2014-06-11T23:00:00.000-04:00','2014-06-11T23:59:59.000-04:00'],['2014-06-12T23:00:00.000-04:00','2014-06-12T23:59:59.000-04:00'],['2014-06-13T23:00:00.000-04:00','2014-06-13T23:59:59.000-04:00'],['2014-06-14T23:00:00.000-04:00','2014-06-14T23:59:59.000-04:00'],['2014-06-15T23:00:00.000-04:00','2014-06-15T23:59:59.000-04:00'],['2014-06-16T23:00:00.000-04:00','2014-06-16T23:59:59.000-04:00'],['2014-06-17T23:00:00.000-04:00','2014-06-17T23:59:59.000-04:00']) AND provider_id = 'superpublisher' AND content_type = 'cavideo' BREAK BY PERIOD('P1D', 'EST5EDT', '2014-06-11T04:00:00.000Z') HINT('timeseries');
- Timeseries with micro time ranges expressed through INCLUDE() function in PERIOD() function
SELECT LONG_SUM(all_content_seen) AS content_seen FROM UniqueContent WHERE interval BETWEEN 2014-06-11T23:00:00.000-04:00 AND 2014-06-17T23:59:59.000-04:00 AND provider_id = 'superpublisher' AND content_type = 'cavideo' BREAK BY PERIOD('P1D', 'EST5EDT', '2014-06-11T04:00:00.000Z', INCLUDE([23,1])) HINT('timeseries');
- Select query(Select type druid query) select clause for this type of query can have metrics/dimensions they are figured out at runtime from the coordinator before firing the select query.
SELECT content_uuid, provider_id, content_views, follows FROM AggsTable WHERE interval BETWEEN '2014-07-01' AND '2014-07-02' LIMIT 5;
- Select query(select type druid query,for all columns)
SELECT * FROM AggsTable WHERE interval BETWEEN '2014-07-01' AND '2014-07-02' LIMIT 5;
- GroupBy With Join(currently JOIN,LEFT_JOIN, RIGHT_JOIN are supported)
SELECT timestamp , LONG_SUM(content_views) AS content_views, LONG_SUM(shares) AS shares FROM AggTable WHERE interval BETWEEN 2014-05-20T00:00:00.000-04:00 AND 2014-05-31T23:00:00.000-04:00 AND provider_id='superpublisher' AND content_type='cavideo' BREAK BY PERIOD('P1D', 'EST5EDT') GROUP BY timestamp HINT('timeseries') JOIN (SELECT timestamp , LONG_SUM(all_content_seen) AS content_seen FROM UniqueCountTable WHERE interval BETWEEN 2014-05-20T00:00:00.000-04:00 AND 2014-05-31T23:00:00.000-04:00 AND provider_id='superpublisher' AND content_type='cavideo' BREAK BY PERIOD('P1D', 'EST5EDT') GROUP BY timestamp HINT('timeseries')) ON (timestamp, content_views);