From 2dd53338cf9994830602ee12fa054f4bac647287 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Tue, 31 Oct 2023 23:36:08 -0700 Subject: [PATCH] add queries, table & results for a generic testing staging framework setup Signed-off-by: YANGDB --- .../src/test/python/http_logs/queries/q1.sql | 1 + .../src/test/python/http_logs/queries/q2.sql | 2 + .../src/test/python/http_logs/queries/q3.sql | 8 ++ .../src/test/python/http_logs/queries/q4.sql | 9 ++ .../src/test/python/http_logs/queries/q5.sql | 8 ++ .../src/test/python/http_logs/queries/q6.sql | 3 + .../src/test/python/http_logs/queries/q7.sql | 6 + .../src/test/python/http_logs/results/q1.json | 96 +++++++++++++ .../src/test/python/http_logs/results/q2.json | 21 +++ .../src/test/python/http_logs/results/q3.json | 31 +++++ .../src/test/python/http_logs/results/q4.json | 126 ++++++++++++++++++ .../src/test/python/http_logs/results/q5.json | 38 ++++++ .../src/test/python/http_logs/results/q6.json | 78 +++++++++++ .../src/test/python/http_logs/results/q7.json | 102 ++++++++++++++ .../http_logs/tables/create_cover_index.sql | 7 + .../python/http_logs/tables/create_mv.sql | 14 ++ .../python/http_logs/tables/create_table.sql | 11 ++ 17 files changed, 561 insertions(+) create mode 100644 integ-test/src/test/python/http_logs/queries/q1.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q2.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q3.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q4.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q5.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q6.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q7.sql create mode 100644 integ-test/src/test/python/http_logs/results/q1.json create mode 100644 integ-test/src/test/python/http_logs/results/q2.json create mode 100644 integ-test/src/test/python/http_logs/results/q3.json create mode 100644 integ-test/src/test/python/http_logs/results/q4.json create mode 100644 integ-test/src/test/python/http_logs/results/q5.json create mode 100644 integ-test/src/test/python/http_logs/results/q6.json create mode 100644 integ-test/src/test/python/http_logs/results/q7.json create mode 100644 integ-test/src/test/python/http_logs/tables/create_cover_index.sql create mode 100644 integ-test/src/test/python/http_logs/tables/create_mv.sql create mode 100644 integ-test/src/test/python/http_logs/tables/create_table.sql diff --git a/integ-test/src/test/python/http_logs/queries/q1.sql b/integ-test/src/test/python/http_logs/queries/q1.sql new file mode 100644 index 000000000..41ada9746 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q1.sql @@ -0,0 +1 @@ +SELECT * FROM mys3.default.http_logs ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q2.sql b/integ-test/src/test/python/http_logs/queries/q2.sql new file mode 100644 index 000000000..7b91b97cc --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q2.sql @@ -0,0 +1,2 @@ +SELECT COUNT(DISTINCT clientip) as unique_client_ips +FROM mys3.default.http_logs; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q3.sql b/integ-test/src/test/python/http_logs/queries/q3.sql new file mode 100644 index 000000000..585a8a03b --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q3.sql @@ -0,0 +1,8 @@ +SELECT + FIRST(day) AS day, + status, + COUNT(status) AS status_count_by_day +FROM (SELECT * FROM mys3.default.http_logs LIMIT 1000) +GROUP BY day, status +ORDER BY day, status + LIMIT 10; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q4.sql b/integ-test/src/test/python/http_logs/queries/q4.sql new file mode 100644 index 000000000..119fdbf17 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q4.sql @@ -0,0 +1,9 @@ +SELECT + FIRST(day) AS day, + status, + COUNT(status) AS status_count_by_day +FROM mys3.default.http_logs +WHERE status >= 400 +GROUP BY day, status +ORDER BY day, status + LIMIT 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q5.sql b/integ-test/src/test/python/http_logs/queries/q5.sql new file mode 100644 index 000000000..936d60e33 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q5.sql @@ -0,0 +1,8 @@ +SELECT + status, + COUNT(status) AS status_count_by_day +FROM mys3.default.http_logs +WHERE status >= 400 +GROUP BY status +ORDER BY status + LIMIT 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q6.sql b/integ-test/src/test/python/http_logs/queries/q6.sql new file mode 100644 index 000000000..a528e354f --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q6.sql @@ -0,0 +1,3 @@ +SELECT day, SUM(size) as total_size FROM mys3.default.http_logs +WHERE year = 1998 AND month =6 +GROUP BY day; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q7.sql b/integ-test/src/test/python/http_logs/queries/q7.sql new file mode 100644 index 000000000..a10df95b7 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q7.sql @@ -0,0 +1,6 @@ +SELECT count(*) as count, clientip +FROM mys3.default.http_logs +WHERE clientip BETWEEN '208.0.0.0' AND '210.0.0.0' +GROUP BY clientip +ORDER BY DESC count +limit 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q1.json b/integ-test/src/test/python/http_logs/results/q1.json new file mode 100644 index 000000000..92a48375d --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q1.json @@ -0,0 +1,96 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "@timestamp", + "type": "date" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "request", + "type": "string" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "year", + "type": "integer" + }, + { + "name": "month", + "type": "integer" + }, + { + "name": "day", + "type": "integer" + } + ], + "datarows": [ + [ + "1998-06-10T14:37:23.000Z", + "76.112.16.0", + "GET /images/102325.gif HTTP/1.0", + 200, + 1555, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "78.109.16.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "140.48.14.0", + "GET /images/102321.gif HTTP/1.0", + 200, + 1602, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "114.113.16.0", + "GET /english/images/team_bu_roster_on.gif HTTP/1.0", + 200, + 1567, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:24.000Z", + "79.48.14.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ] + ], + "total": 5, + "size": 5 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q2.json b/integ-test/src/test/python/http_logs/results/q2.json new file mode 100644 index 000000000..4b4214d17 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q2.json @@ -0,0 +1,21 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "unique_client_ips", + "type": "long" + } + ], + "datarows": [ + [ + 1149519 + ] + ], + "total": 1, + "size": 1 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q3.json b/integ-test/src/test/python/http_logs/results/q3.json new file mode 100644 index 000000000..c650d4aed --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q3.json @@ -0,0 +1,31 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 12, + 200, + 1000 + ] + ], + "total": 1, + "size": 1 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q4.json b/integ-test/src/test/python/http_logs/results/q4.json new file mode 100644 index 000000000..44a73799c --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q4.json @@ -0,0 +1,126 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 1, + 400, + 20 + ], + [ + 1, + 404, + 1757 + ], + [ + 1, + 500, + 33 + ], + [ + 2, + 400, + 4 + ], + [ + 2, + 404, + 1743 + ], + [ + 2, + 500, + 36 + ], + [ + 3, + 400, + 13 + ], + [ + 3, + 404, + 12790 + ], + [ + 3, + 500, + 56 + ], + [ + 4, + 400, + 119 + ], + [ + 4, + 403, + 1 + ], + [ + 4, + 404, + 48657 + ], + [ + 4, + 500, + 513 + ], + [ + 5, + 400, + 19 + ], + [ + 5, + 404, + 3190 + ], + [ + 5, + 500, + 47 + ], + [ + 6, + 400, + 35 + ], + [ + 6, + 404, + 4596 + ], + [ + 6, + 500, + 47 + ], + [ + 7, + 400, + 36 + ] + ], + "total": 20, + "size": 20 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q5.json b/integ-test/src/test/python/http_logs/results/q5.json new file mode 100644 index 000000000..e3c0359fe --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q5.json @@ -0,0 +1,38 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 400, + 2677 + ], + [ + 403, + 224 + ], + [ + 404, + 1224876 + ], + [ + 500, + 6134 + ] + ], + "total": 4, + "size": 4 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q6.json b/integ-test/src/test/python/http_logs/results/q6.json new file mode 100644 index 000000000..6fac40799 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q6.json @@ -0,0 +1,78 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "total_size", + "type": "long" + } + ], + "datarows": [ + [ + 5, + 768310801 + ], + [ + 7, + 1708351354 + ], + [ + 6, + 2593510018 + ], + [ + 1, + 3281910483 + ], + [ + 2, + 6791010250 + ], + [ + 3, + 10791413411 + ], + [ + 8, + 27479593892 + ], + [ + 4, + 36649541120 + ], + [ + 14, + 58852258890 + ], + [ + 13, + 82015572020 + ], + [ + 9, + 99444676123 + ], + [ + 12, + 141079393326 + ], + [ + 10, + 143799318169 + ], + [ + 11, + 168343767518 + ] + ], + "total": 14, + "size": 14 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q7.json b/integ-test/src/test/python/http_logs/results/q7.json new file mode 100644 index 000000000..9ef60922f --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q7.json @@ -0,0 +1,102 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "count", + "type": "long" + }, + { + "name": "clientip", + "type": "string" + } + ], + "datarows": [ + [ + 104817, + "21.59.3.0" + ], + [ + 46116, + "208.96.4.0" + ], + [ + 33032, + "210.0.0.0" + ], + [ + 32155, + "208.85.0.0" + ], + [ + 31053, + "208.252.0.0" + ], + [ + 28301, + "208.12.0.0" + ], + [ + 24481, + "21.60.1.0" + ], + [ + 23636, + "209.0.0.0" + ], + [ + 19397, + "208.69.0.0" + ], + [ + 19058, + "208.29.0.0" + ], + [ + 18606, + "208.11.0.0" + ], + [ + 18579, + "209.10.0.0" + ], + [ + 14044, + "21.17.0.0" + ], + [ + 12632, + "209.36.0.0" + ], + [ + 12525, + "208.18.0.0" + ], + [ + 12376, + "209.46.0.0" + ], + [ + 12080, + "208.19.0.0" + ], + [ + 11247, + "21.9.1.0" + ], + [ + 9584, + "209.15.3.0" + ], + [ + 9073, + "209.65.1.0" + ] + ], + "total": 20, + "size": 20 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql new file mode 100644 index 000000000..786462398 --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql @@ -0,0 +1,7 @@ +CREATE INDEX status_clientip_and_day + ON mys3.default.http_logs ( status, day, clientip ) + WITH ( + auto_refresh = true, + refresh_interval = '5 minute', + checkpoint_location = 's3://path/data/http_log/checkpoint_status_and_day' +) \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_mv.sql b/integ-test/src/test/python/http_logs/tables/create_mv.sql new file mode 100644 index 000000000..0ee49161f --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_mv.sql @@ -0,0 +1,14 @@ +CREATE MATERIALIZED VIEW mys3.default.http_count_view +AS +SELECT + window.start AS `start.time`, + COUNT(*) AS count +FROM mys3.default.http_logs +WHERE status != 200 +GROUP BY TUMBLE(`@timestamp`, '1 Minutes') +WITH ( + auto_refresh = true, + refresh_interval = '1 Minutes', + checkpoint_location = 's3:/path/data/http_log/checkpoint_http_count_view', + watermark_delay = '10 Minutes' +); \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_table.sql b/integ-test/src/test/python/http_logs/tables/create_table.sql new file mode 100644 index 000000000..33bdb185f --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_table.sql @@ -0,0 +1,11 @@ +CREATE EXTERNAL TABLE mys3.default.http_logs ( + `@timestamp` TIMESTAMP, + clientip STRING, + request STRING, + status INT, + size INT, + year INT, + month INT, + day INT) +USING json PARTITIONED BY(year, month, day) OPTIONS + (path 's3://path/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2')" \ No newline at end of file