From 2b316e52d6bcf4a54c7cfe124ce5fe11f6ac9433 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Tue, 31 Oct 2023 16:36:30 -0700 Subject: [PATCH 1/9] add sanity.py script for sanity tests capabilities Signed-off-by: YANGDB --- integ-test/src/test/python/requierments.txt | 1 + integ-test/src/test/python/run-sanity.sh | 7 + integ-test/src/test/python/sanity.py | 220 ++++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 integ-test/src/test/python/requierments.txt create mode 100755 integ-test/src/test/python/run-sanity.sh create mode 100644 integ-test/src/test/python/sanity.py diff --git a/integ-test/src/test/python/requierments.txt b/integ-test/src/test/python/requierments.txt new file mode 100644 index 000000000..6ef17738a --- /dev/null +++ b/integ-test/src/test/python/requierments.txt @@ -0,0 +1 @@ +requests==2.26.0 # HTTP library \ No newline at end of file diff --git a/integ-test/src/test/python/run-sanity.sh b/integ-test/src/test/python/run-sanity.sh new file mode 100755 index 000000000..512246874 --- /dev/null +++ b/integ-test/src/test/python/run-sanity.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Set the environment variable for the OpenSearch URL +export OPENSEARCH_URL="http://your_opensearch_url:9200" + +# Run the Python script +python sanity.py diff --git a/integ-test/src/test/python/sanity.py b/integ-test/src/test/python/sanity.py new file mode 100644 index 000000000..038089e28 --- /dev/null +++ b/integ-test/src/test/python/sanity.py @@ -0,0 +1,220 @@ +import requests +import time +import json +import os +import logging + +url = os.environ.get('OPENSEARCH_URL', "http://opensearch_server:9200") # Modify this line +logging.basicConfig(filename='sanity_report.log', level=logging.INFO) + +test_result = [] +sanity_report = [] + +def log_to_report(query, status, runtime, reason=None): + report_entry = { + "Query": query, + "Status": status, + "Runtime (seconds)": runtime, + "Reason": reason + } + sanity_report.append(report_entry) + +def print_sanity_report(): + logging.info("\n=========== Sanity Report ===========") + for entry in sanity_report: + for key, value in entry.items(): + logging.info(f"{key}: {value}") + logging.info("------------------------------------") + +def enable_repl(): + async_url = url + "/_plugins/_query/settings" + headers = {'Content-Type': 'application/json'} + data = {"transient":{"plugins.query.executionengine.spark.session.enabled":"true"}} + try: + response = requests.put(async_url, headers=headers, json=data) + if response.status_code // 100 == 2: + logging.info(f"http request was successful (2xx status code).") + return response + else: + logging.error(f"http request failed with status code: {response.status_code}") + raise + except requests.exceptions.ConnectTimeout as e: + logging.error(f"ConnectTimeout: {e}") + raise + except Exception as e: + logging.error(f"An error occurred: {e}") + raise +def fetch_result(queryId): + fetch_result_url = f"{url}/_plugins/_async_query/{queryId}" + + response = requests.get(fetch_result_url) + if response.status_code // 100 == 2: + logging.info(f"http request was successful (2xx status code).") + return response + else: + logging.info(f"http request failed with status code: {response.status_code}") + raise Exception("FAILED") + +def asnyc_query(query): + async_url = url + "/_plugins/_async_query" + headers = {'Content-Type': 'application/json'} + data = { + "datasource": "mys3", + "lang": "sql", + "query": f"{query}" + } + response = requests.post(async_url, headers=headers, json=data) + if response.status_code // 100 == 2: + logging.info(f"http request was successful (2xx status code).") + return response + else: + logging.info(f"http request failed with status code: {response.status_code}") + raise Exception("FAILED") + +def create_session(): + query = "select 1" + logging.info(f"\n======================") + logging.info(f"[{query}] START") + logging.info(f"======================") + start_time = time.time() + + response=asnyc_query(query).json() + sessionId = response['sessionId'] + queryId = response['queryId'] + logging.info(f"sessionId: {sessionId}") + while True: + response = fetch_result(queryId).json() + logging.info(f"status: {response['status']}") + if response['status'] == 'SUCCESS': + query_end_time = time.time() + logging.info(f"\n======================") + logging.info(f"[{query}] SUCCESS") + logging.info(f" Runtime {query_end_time - start_time} seconds") + logging.info(f"======================") + return sessionId + elif response['status'] == 'FAILED': + raise Exception("FAILED") + time.sleep(5) + +def asnyc_query_session(query, sessionId): + async_url = url + "/_plugins/_async_query" + headers = {'Content-Type': 'application/json'} + data = { + "datasource": "mys3", + "lang": "sql", + "query": f"{query}", + "sessionId": f"{sessionId}" + } + response = requests.post(async_url, headers=headers, json=data) + if response.status_code // 100 == 2: + logging.info(f"http request was successful (2xx status code).") + return response + else: + logging.info(f"http request failed with status code: {response.status_code}") + raise Exception("FAILED") + +def test_repl(expected, query, sessionId): + logging.info(f"\n========REPL==========") + logging.info(f"[{query}] START") + logging.info(f"======================") + start_time = time.time() + + queryId = asnyc_query_session(query, sessionId).json()['queryId'] + logging.info(f"queryId: {queryId}") + while True: + try: + response = fetch_result(queryId).json() + logging.info(f"status: {response['status']}") + if response['status'] == 'SUCCESS': + query_end_time = time.time() + runtime = query_end_time - start_time + if expected(response): + log_to_report(query, "SUCCESS", runtime) + logging.info(f"\n======================") + logging.info(f"[{query}] SUCCESS") + logging.info(f" Runtime {runtime} seconds") + logging.info(f"======================") + else: + log_to_report(query, "FAILED", runtime, "Unexpected response") + logging.info(json.dumps(response, indent=4)) + break + elif response['status'] == 'FAILED': + query_end_time = time.time() + runtime = query_end_time - start_time + log_to_report(query, "FAILED", runtime, response.get('reason', 'Unknown')) + logging.info(f"{response['status']}") + break + except Exception as e: + query_end_time = time.time() + runtime = query_end_time - start_time + log_to_report(query, "FAILED", runtime, str(e)) + logging.info(f"{e}") + break + time.sleep(2) + + + +def main(): + enable_repl() + sessionId = create_session() + + expected_lambda = lambda response: ( + response['status'] == 'SUCCESS' and + response['total'] == 1 and + response['datarows'][0] == [1998] and + response['schema'][0]['name'] == 'year' and + response['schema'][0]['type'] == 'integer' + ) + test_repl(expected_lambda, "select year from mys3.default.http_logs where year = 1998 limit 1", sessionId) + + + expected_lambda = lambda response: ( + response['size'] == 13 and + response['total'] == 13 and + response['datarows'][0] == [ + "@timestamp", + "timestamp", + "" + ] and + response['schema'] == [ + { + "name": "col_name", + "type": "string" + }, + { + "name": "data_type", + "type": "string" + }, + { + "name": "comment", + "type": "string" + } + ] + ) + test_repl(expected_lambda, "DESC mys3.default.http_logs", sessionId) + + expected_lambda = lambda response: ( + response['size'] == 1 and + response['total'] == 1 and + response['datarows'][0] == [ + "default", + "http_logs", + False + ] and + response['schema'] == [ + {"name": "namespace", "type": "string"}, + {"name": "tableName", "type": "string"}, + {"name": "isTemporary", "type": "boolean"} + ] + ) + test_repl(expected_lambda, "SHOW TABLES IN mys3.default LIKE 'http_logs'", sessionId) + + + expected_lambda = lambda response: ( + response['size'] == 0 + ) + test_repl(expected_lambda, "create skipping index on mys3.default.http_logs (status VALUE_SET)", sessionId) + print_sanity_report() + +if __name__ == '__main__': + main() \ No newline at end of file From 2dd53338cf9994830602ee12fa054f4bac647287 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Tue, 31 Oct 2023 23:36:08 -0700 Subject: [PATCH 2/9] add queries, table & results for a generic testing staging framework setup Signed-off-by: YANGDB --- .../src/test/python/http_logs/queries/q1.sql | 1 + .../src/test/python/http_logs/queries/q2.sql | 2 + .../src/test/python/http_logs/queries/q3.sql | 8 ++ .../src/test/python/http_logs/queries/q4.sql | 9 ++ .../src/test/python/http_logs/queries/q5.sql | 8 ++ .../src/test/python/http_logs/queries/q6.sql | 3 + .../src/test/python/http_logs/queries/q7.sql | 6 + .../src/test/python/http_logs/results/q1.json | 96 +++++++++++++ .../src/test/python/http_logs/results/q2.json | 21 +++ .../src/test/python/http_logs/results/q3.json | 31 +++++ .../src/test/python/http_logs/results/q4.json | 126 ++++++++++++++++++ .../src/test/python/http_logs/results/q5.json | 38 ++++++ .../src/test/python/http_logs/results/q6.json | 78 +++++++++++ .../src/test/python/http_logs/results/q7.json | 102 ++++++++++++++ .../http_logs/tables/create_cover_index.sql | 7 + .../python/http_logs/tables/create_mv.sql | 14 ++ .../python/http_logs/tables/create_table.sql | 11 ++ 17 files changed, 561 insertions(+) create mode 100644 integ-test/src/test/python/http_logs/queries/q1.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q2.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q3.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q4.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q5.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q6.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q7.sql create mode 100644 integ-test/src/test/python/http_logs/results/q1.json create mode 100644 integ-test/src/test/python/http_logs/results/q2.json create mode 100644 integ-test/src/test/python/http_logs/results/q3.json create mode 100644 integ-test/src/test/python/http_logs/results/q4.json create mode 100644 integ-test/src/test/python/http_logs/results/q5.json create mode 100644 integ-test/src/test/python/http_logs/results/q6.json create mode 100644 integ-test/src/test/python/http_logs/results/q7.json create mode 100644 integ-test/src/test/python/http_logs/tables/create_cover_index.sql create mode 100644 integ-test/src/test/python/http_logs/tables/create_mv.sql create mode 100644 integ-test/src/test/python/http_logs/tables/create_table.sql diff --git a/integ-test/src/test/python/http_logs/queries/q1.sql b/integ-test/src/test/python/http_logs/queries/q1.sql new file mode 100644 index 000000000..41ada9746 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q1.sql @@ -0,0 +1 @@ +SELECT * FROM mys3.default.http_logs ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q2.sql b/integ-test/src/test/python/http_logs/queries/q2.sql new file mode 100644 index 000000000..7b91b97cc --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q2.sql @@ -0,0 +1,2 @@ +SELECT COUNT(DISTINCT clientip) as unique_client_ips +FROM mys3.default.http_logs; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q3.sql b/integ-test/src/test/python/http_logs/queries/q3.sql new file mode 100644 index 000000000..585a8a03b --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q3.sql @@ -0,0 +1,8 @@ +SELECT + FIRST(day) AS day, + status, + COUNT(status) AS status_count_by_day +FROM (SELECT * FROM mys3.default.http_logs LIMIT 1000) +GROUP BY day, status +ORDER BY day, status + LIMIT 10; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q4.sql b/integ-test/src/test/python/http_logs/queries/q4.sql new file mode 100644 index 000000000..119fdbf17 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q4.sql @@ -0,0 +1,9 @@ +SELECT + FIRST(day) AS day, + status, + COUNT(status) AS status_count_by_day +FROM mys3.default.http_logs +WHERE status >= 400 +GROUP BY day, status +ORDER BY day, status + LIMIT 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q5.sql b/integ-test/src/test/python/http_logs/queries/q5.sql new file mode 100644 index 000000000..936d60e33 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q5.sql @@ -0,0 +1,8 @@ +SELECT + status, + COUNT(status) AS status_count_by_day +FROM mys3.default.http_logs +WHERE status >= 400 +GROUP BY status +ORDER BY status + LIMIT 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q6.sql b/integ-test/src/test/python/http_logs/queries/q6.sql new file mode 100644 index 000000000..a528e354f --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q6.sql @@ -0,0 +1,3 @@ +SELECT day, SUM(size) as total_size FROM mys3.default.http_logs +WHERE year = 1998 AND month =6 +GROUP BY day; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q7.sql b/integ-test/src/test/python/http_logs/queries/q7.sql new file mode 100644 index 000000000..a10df95b7 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q7.sql @@ -0,0 +1,6 @@ +SELECT count(*) as count, clientip +FROM mys3.default.http_logs +WHERE clientip BETWEEN '208.0.0.0' AND '210.0.0.0' +GROUP BY clientip +ORDER BY DESC count +limit 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q1.json b/integ-test/src/test/python/http_logs/results/q1.json new file mode 100644 index 000000000..92a48375d --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q1.json @@ -0,0 +1,96 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "@timestamp", + "type": "date" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "request", + "type": "string" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "year", + "type": "integer" + }, + { + "name": "month", + "type": "integer" + }, + { + "name": "day", + "type": "integer" + } + ], + "datarows": [ + [ + "1998-06-10T14:37:23.000Z", + "76.112.16.0", + "GET /images/102325.gif HTTP/1.0", + 200, + 1555, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "78.109.16.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "140.48.14.0", + "GET /images/102321.gif HTTP/1.0", + 200, + 1602, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "114.113.16.0", + "GET /english/images/team_bu_roster_on.gif HTTP/1.0", + 200, + 1567, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:24.000Z", + "79.48.14.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ] + ], + "total": 5, + "size": 5 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q2.json b/integ-test/src/test/python/http_logs/results/q2.json new file mode 100644 index 000000000..4b4214d17 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q2.json @@ -0,0 +1,21 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "unique_client_ips", + "type": "long" + } + ], + "datarows": [ + [ + 1149519 + ] + ], + "total": 1, + "size": 1 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q3.json b/integ-test/src/test/python/http_logs/results/q3.json new file mode 100644 index 000000000..c650d4aed --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q3.json @@ -0,0 +1,31 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 12, + 200, + 1000 + ] + ], + "total": 1, + "size": 1 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q4.json b/integ-test/src/test/python/http_logs/results/q4.json new file mode 100644 index 000000000..44a73799c --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q4.json @@ -0,0 +1,126 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 1, + 400, + 20 + ], + [ + 1, + 404, + 1757 + ], + [ + 1, + 500, + 33 + ], + [ + 2, + 400, + 4 + ], + [ + 2, + 404, + 1743 + ], + [ + 2, + 500, + 36 + ], + [ + 3, + 400, + 13 + ], + [ + 3, + 404, + 12790 + ], + [ + 3, + 500, + 56 + ], + [ + 4, + 400, + 119 + ], + [ + 4, + 403, + 1 + ], + [ + 4, + 404, + 48657 + ], + [ + 4, + 500, + 513 + ], + [ + 5, + 400, + 19 + ], + [ + 5, + 404, + 3190 + ], + [ + 5, + 500, + 47 + ], + [ + 6, + 400, + 35 + ], + [ + 6, + 404, + 4596 + ], + [ + 6, + 500, + 47 + ], + [ + 7, + 400, + 36 + ] + ], + "total": 20, + "size": 20 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q5.json b/integ-test/src/test/python/http_logs/results/q5.json new file mode 100644 index 000000000..e3c0359fe --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q5.json @@ -0,0 +1,38 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "status", + "type": "integer" + }, + { + "name": "status_count_by_day", + "type": "long" + } + ], + "datarows": [ + [ + 400, + 2677 + ], + [ + 403, + 224 + ], + [ + 404, + 1224876 + ], + [ + 500, + 6134 + ] + ], + "total": 4, + "size": 4 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q6.json b/integ-test/src/test/python/http_logs/results/q6.json new file mode 100644 index 000000000..6fac40799 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q6.json @@ -0,0 +1,78 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "day", + "type": "integer" + }, + { + "name": "total_size", + "type": "long" + } + ], + "datarows": [ + [ + 5, + 768310801 + ], + [ + 7, + 1708351354 + ], + [ + 6, + 2593510018 + ], + [ + 1, + 3281910483 + ], + [ + 2, + 6791010250 + ], + [ + 3, + 10791413411 + ], + [ + 8, + 27479593892 + ], + [ + 4, + 36649541120 + ], + [ + 14, + 58852258890 + ], + [ + 13, + 82015572020 + ], + [ + 9, + 99444676123 + ], + [ + 12, + 141079393326 + ], + [ + 10, + 143799318169 + ], + [ + 11, + 168343767518 + ] + ], + "total": 14, + "size": 14 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q7.json b/integ-test/src/test/python/http_logs/results/q7.json new file mode 100644 index 000000000..9ef60922f --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q7.json @@ -0,0 +1,102 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "count", + "type": "long" + }, + { + "name": "clientip", + "type": "string" + } + ], + "datarows": [ + [ + 104817, + "21.59.3.0" + ], + [ + 46116, + "208.96.4.0" + ], + [ + 33032, + "210.0.0.0" + ], + [ + 32155, + "208.85.0.0" + ], + [ + 31053, + "208.252.0.0" + ], + [ + 28301, + "208.12.0.0" + ], + [ + 24481, + "21.60.1.0" + ], + [ + 23636, + "209.0.0.0" + ], + [ + 19397, + "208.69.0.0" + ], + [ + 19058, + "208.29.0.0" + ], + [ + 18606, + "208.11.0.0" + ], + [ + 18579, + "209.10.0.0" + ], + [ + 14044, + "21.17.0.0" + ], + [ + 12632, + "209.36.0.0" + ], + [ + 12525, + "208.18.0.0" + ], + [ + 12376, + "209.46.0.0" + ], + [ + 12080, + "208.19.0.0" + ], + [ + 11247, + "21.9.1.0" + ], + [ + 9584, + "209.15.3.0" + ], + [ + 9073, + "209.65.1.0" + ] + ], + "total": 20, + "size": 20 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql new file mode 100644 index 000000000..786462398 --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql @@ -0,0 +1,7 @@ +CREATE INDEX status_clientip_and_day + ON mys3.default.http_logs ( status, day, clientip ) + WITH ( + auto_refresh = true, + refresh_interval = '5 minute', + checkpoint_location = 's3://path/data/http_log/checkpoint_status_and_day' +) \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_mv.sql b/integ-test/src/test/python/http_logs/tables/create_mv.sql new file mode 100644 index 000000000..0ee49161f --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_mv.sql @@ -0,0 +1,14 @@ +CREATE MATERIALIZED VIEW mys3.default.http_count_view +AS +SELECT + window.start AS `start.time`, + COUNT(*) AS count +FROM mys3.default.http_logs +WHERE status != 200 +GROUP BY TUMBLE(`@timestamp`, '1 Minutes') +WITH ( + auto_refresh = true, + refresh_interval = '1 Minutes', + checkpoint_location = 's3:/path/data/http_log/checkpoint_http_count_view', + watermark_delay = '10 Minutes' +); \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_table.sql b/integ-test/src/test/python/http_logs/tables/create_table.sql new file mode 100644 index 000000000..33bdb185f --- /dev/null +++ b/integ-test/src/test/python/http_logs/tables/create_table.sql @@ -0,0 +1,11 @@ +CREATE EXTERNAL TABLE mys3.default.http_logs ( + `@timestamp` TIMESTAMP, + clientip STRING, + request STRING, + status INT, + size INT, + year INT, + month INT, + day INT) +USING json PARTITIONED BY(year, month, day) OPTIONS + (path 's3://path/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2')" \ No newline at end of file From 44b8db012c8a43db5d305405415fbf6fe94e7efb Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 2 Nov 2023 14:38:53 -0700 Subject: [PATCH 3/9] add queries, table & results for a generic testing staging framework setup Signed-off-by: YANGDB --- integ-test/src/test/python/README.md | 70 +++++++ .../test/python/http_logs/queries/ppl1.sql | 1 + .../src/test/python/http_logs/queries/q1.sql | 2 +- .../src/test/python/http_logs/queries/q2.sql | 2 +- .../src/test/python/http_logs/queries/q3.sql | 2 +- .../src/test/python/http_logs/queries/q4.sql | 2 +- .../src/test/python/http_logs/queries/q5.sql | 2 +- .../src/test/python/http_logs/queries/q6.sql | 2 +- .../src/test/python/http_logs/queries/q7.sql | 2 +- .../test/python/http_logs/results/ppl1.json | 96 ++++++++++ .../http_logs/tables/create_cover_index.sql | 4 +- .../python/http_logs/tables/create_mv.sql | 4 +- .../python/http_logs/tables/create_table.sql | 4 +- integ-test/src/test/python/run-sanity.sh | 7 - integ-test/src/test/python/run_sanity.sh | 23 +++ integ-test/src/test/python/sanity.py | 175 ++++++++++++------ 16 files changed, 317 insertions(+), 81 deletions(-) create mode 100644 integ-test/src/test/python/README.md create mode 100644 integ-test/src/test/python/http_logs/queries/ppl1.sql create mode 100644 integ-test/src/test/python/http_logs/results/ppl1.json delete mode 100755 integ-test/src/test/python/run-sanity.sh create mode 100755 integ-test/src/test/python/run_sanity.sh diff --git a/integ-test/src/test/python/README.md b/integ-test/src/test/python/README.md new file mode 100644 index 000000000..18c02e94e --- /dev/null +++ b/integ-test/src/test/python/README.md @@ -0,0 +1,70 @@ +# Sanity Script for OpenSearch Queries + +This script is designed to perform sanity checks on OpenSearch queries by executing a series of predefined queries against an OpenSearch cluster and validating the responses. + +## Requirements + +- Python 3.x +- `requests` library (Install using `pip install requests`) + +## Configuration + +Before running the script, ensure that the `OPENSEARCH_URL` environment variable is set to your OpenSearch cluster's URL. + +Example: +```bash +export OPENSEARCH_URL="http://localhost:9200" +``` + +## Running the Script + +The script can be executed with Python 3. To run the script, use the following command: + +```bash +python sanity_script.py +``` + +You can also use the provided bash script `run_sanity.sh` to run the Python script with parameters. + +```bash +./run_sanity.sh --run-tables --run-queries --use-date 20230101 +``` + +Make sure to give execution permission to the bash script: + +```bash +chmod +x run_sanity.sh +``` + +### Parameters + +The script accepts several optional parameters to control its behavior: + +- `--run-tables`: When this flag is set, the script will only execute queries related to table operations. +- `--run-queries`: This flag controls the execution of general queries that are not related to table operations. +- `--date`: A specific date can be provided in `YYYYMMDD` format to replace the `{date}` placeholder in queries. + +### Examples + +1. Run only table queries: + ```bash + python sanity_script.py --run-tables + ``` + +2. Run only non-table queries: + ```bash + python sanity_script.py --run-queries + ``` + +3. Run all queries with a specific date: + ```bash + ./run_sanity.sh --run-tables --run-queries --use-date 20231102 + ``` + +## Output + +The script will generate a log file with a timestamp in its name (e.g., `sanity_report_2023-11-02_12-00-00.log`) that contains the results of the sanity checks, including any errors encountered during execution. + +## Support + +For any queries or issues, please create an issue in the repository or contact the maintainer. diff --git a/integ-test/src/test/python/http_logs/queries/ppl1.sql b/integ-test/src/test/python/http_logs/queries/ppl1.sql new file mode 100644 index 000000000..bf5ff5c31 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/ppl1.sql @@ -0,0 +1 @@ +SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q1.sql b/integ-test/src/test/python/http_logs/queries/q1.sql index 41ada9746..bf5ff5c31 100644 --- a/integ-test/src/test/python/http_logs/queries/q1.sql +++ b/integ-test/src/test/python/http_logs/queries/q1.sql @@ -1 +1 @@ -SELECT * FROM mys3.default.http_logs ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file +SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q2.sql b/integ-test/src/test/python/http_logs/queries/q2.sql index 7b91b97cc..652da9b55 100644 --- a/integ-test/src/test/python/http_logs/queries/q2.sql +++ b/integ-test/src/test/python/http_logs/queries/q2.sql @@ -1,2 +1,2 @@ SELECT COUNT(DISTINCT clientip) as unique_client_ips -FROM mys3.default.http_logs; \ No newline at end of file +FROM mys3.default.http_logs_{date}; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q3.sql b/integ-test/src/test/python/http_logs/queries/q3.sql index 585a8a03b..6b18be3bd 100644 --- a/integ-test/src/test/python/http_logs/queries/q3.sql +++ b/integ-test/src/test/python/http_logs/queries/q3.sql @@ -2,7 +2,7 @@ SELECT FIRST(day) AS day, status, COUNT(status) AS status_count_by_day -FROM (SELECT * FROM mys3.default.http_logs LIMIT 1000) +FROM (SELECT * FROM mys3.default.http_logs_{date} LIMIT 1000) GROUP BY day, status ORDER BY day, status LIMIT 10; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q4.sql b/integ-test/src/test/python/http_logs/queries/q4.sql index 119fdbf17..32fa82a18 100644 --- a/integ-test/src/test/python/http_logs/queries/q4.sql +++ b/integ-test/src/test/python/http_logs/queries/q4.sql @@ -2,7 +2,7 @@ SELECT FIRST(day) AS day, status, COUNT(status) AS status_count_by_day -FROM mys3.default.http_logs +FROM mys3.default.http_logs_{date} WHERE status >= 400 GROUP BY day, status ORDER BY day, status diff --git a/integ-test/src/test/python/http_logs/queries/q5.sql b/integ-test/src/test/python/http_logs/queries/q5.sql index 936d60e33..75cc222f5 100644 --- a/integ-test/src/test/python/http_logs/queries/q5.sql +++ b/integ-test/src/test/python/http_logs/queries/q5.sql @@ -1,7 +1,7 @@ SELECT status, COUNT(status) AS status_count_by_day -FROM mys3.default.http_logs +FROM mys3.default.http_logs_{date} WHERE status >= 400 GROUP BY status ORDER BY status diff --git a/integ-test/src/test/python/http_logs/queries/q6.sql b/integ-test/src/test/python/http_logs/queries/q6.sql index a528e354f..e2266c6c9 100644 --- a/integ-test/src/test/python/http_logs/queries/q6.sql +++ b/integ-test/src/test/python/http_logs/queries/q6.sql @@ -1,3 +1,3 @@ -SELECT day, SUM(size) as total_size FROM mys3.default.http_logs +SELECT day, SUM(size) as total_size FROM mys3.default.http_logs_{date} WHERE year = 1998 AND month =6 GROUP BY day; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q7.sql b/integ-test/src/test/python/http_logs/queries/q7.sql index a10df95b7..378b4b2ec 100644 --- a/integ-test/src/test/python/http_logs/queries/q7.sql +++ b/integ-test/src/test/python/http_logs/queries/q7.sql @@ -1,5 +1,5 @@ SELECT count(*) as count, clientip -FROM mys3.default.http_logs +FROM mys3.default.http_logs_{date} WHERE clientip BETWEEN '208.0.0.0' AND '210.0.0.0' GROUP BY clientip ORDER BY DESC count diff --git a/integ-test/src/test/python/http_logs/results/ppl1.json b/integ-test/src/test/python/http_logs/results/ppl1.json new file mode 100644 index 000000000..92a48375d --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/ppl1.json @@ -0,0 +1,96 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "@timestamp", + "type": "date" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "request", + "type": "string" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "year", + "type": "integer" + }, + { + "name": "month", + "type": "integer" + }, + { + "name": "day", + "type": "integer" + } + ], + "datarows": [ + [ + "1998-06-10T14:37:23.000Z", + "76.112.16.0", + "GET /images/102325.gif HTTP/1.0", + 200, + 1555, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "78.109.16.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "140.48.14.0", + "GET /images/102321.gif HTTP/1.0", + 200, + 1602, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:23.000Z", + "114.113.16.0", + "GET /english/images/team_bu_roster_on.gif HTTP/1.0", + 200, + 1567, + 1998, + 6, + 10 + ], + [ + "1998-06-10T14:37:24.000Z", + "79.48.14.0", + "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + 200, + 1548, + 1998, + 6, + 10 + ] + ], + "total": 5, + "size": 5 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql index 786462398..f736844aa 100644 --- a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql +++ b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql @@ -1,5 +1,5 @@ -CREATE INDEX status_clientip_and_day - ON mys3.default.http_logs ( status, day, clientip ) +CREATE INDEX status_clientip_and_day_{date} + ON mys3.default.http_logs_{date} ( status, day, clientip ) WITH ( auto_refresh = true, refresh_interval = '5 minute', diff --git a/integ-test/src/test/python/http_logs/tables/create_mv.sql b/integ-test/src/test/python/http_logs/tables/create_mv.sql index 0ee49161f..955962a98 100644 --- a/integ-test/src/test/python/http_logs/tables/create_mv.sql +++ b/integ-test/src/test/python/http_logs/tables/create_mv.sql @@ -1,9 +1,9 @@ -CREATE MATERIALIZED VIEW mys3.default.http_count_view +CREATE MATERIALIZED VIEW mys3.default.http_count_view_{date} AS SELECT window.start AS `start.time`, COUNT(*) AS count -FROM mys3.default.http_logs +FROM mys3.default.http_logs_{date} WHERE status != 200 GROUP BY TUMBLE(`@timestamp`, '1 Minutes') WITH ( diff --git a/integ-test/src/test/python/http_logs/tables/create_table.sql b/integ-test/src/test/python/http_logs/tables/create_table.sql index 33bdb185f..8f548a442 100644 --- a/integ-test/src/test/python/http_logs/tables/create_table.sql +++ b/integ-test/src/test/python/http_logs/tables/create_table.sql @@ -1,4 +1,4 @@ -CREATE EXTERNAL TABLE mys3.default.http_logs ( +CREATE EXTERNAL TABLE mys3.default.http_logs_{date} ( `@timestamp` TIMESTAMP, clientip STRING, request STRING, @@ -8,4 +8,4 @@ CREATE EXTERNAL TABLE mys3.default.http_logs ( month INT, day INT) USING json PARTITIONED BY(year, month, day) OPTIONS - (path 's3://path/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2')" \ No newline at end of file + (path 's3://path/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2') \ No newline at end of file diff --git a/integ-test/src/test/python/run-sanity.sh b/integ-test/src/test/python/run-sanity.sh deleted file mode 100755 index 512246874..000000000 --- a/integ-test/src/test/python/run-sanity.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# Set the environment variable for the OpenSearch URL -export OPENSEARCH_URL="http://your_opensearch_url:9200" - -# Run the Python script -python sanity.py diff --git a/integ-test/src/test/python/run_sanity.sh b/integ-test/src/test/python/run_sanity.sh new file mode 100755 index 000000000..a3b435ec9 --- /dev/null +++ b/integ-test/src/test/python/run_sanity.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Set the environment variable for the OpenSearch URL +export OPENSEARCH_URL="http://opensearch:9200" + +# Check for command-line arguments +run_tables="" +run_queries="" +date_arg="" + +# Parse command-line arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --run-tables) run_tables="--run-tables" ;; + --run-queries) run_queries="--run-queries" ;; + --use-date) date_arg="--use-date" ; shift; date_value="$1" ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done + +# Run the Python script with the parsed arguments +python sanity.py $run_tables $run_queries $date_arg $date_value \ No newline at end of file diff --git a/integ-test/src/test/python/sanity.py b/integ-test/src/test/python/sanity.py index 038089e28..076956cff 100644 --- a/integ-test/src/test/python/sanity.py +++ b/integ-test/src/test/python/sanity.py @@ -3,13 +3,29 @@ import json import os import logging +from datetime import datetime +import argparse url = os.environ.get('OPENSEARCH_URL', "http://opensearch_server:9200") # Modify this line -logging.basicConfig(filename='sanity_report.log', level=logging.INFO) +table_name = 'http_logs' +# Generate a timestamp for the file name +timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') +filename = f'sanity_report_{timestamp}.log' +# Configure logging +logging.basicConfig(filename=filename, + level=logging.DEBUG, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') test_result = [] sanity_report = [] +def get_current_date_str(): + return datetime.now().strftime('%Y%m%d') + +# default date for the test +current_date_str = get_current_date_str() + def log_to_report(query, status, runtime, reason=None): report_entry = { "Query": query, @@ -94,7 +110,7 @@ def create_session(): return sessionId elif response['status'] == 'FAILED': raise Exception("FAILED") - time.sleep(5) + time.sleep(10) def asnyc_query_session(query, sessionId): async_url = url + "/_plugins/_async_query" @@ -150,71 +166,108 @@ def test_repl(expected, query, sessionId): log_to_report(query, "FAILED", runtime, str(e)) logging.info(f"{e}") break - time.sleep(2) + time.sleep(10) + +# Rest of your imports remain the same + +def read_query(table_name, query_file): + with open(f"{table_name}/queries/{query_file}", 'r') as file: + query = file.read() + query_with_date = query.replace("{date}", current_date_str) + logging.debug(f"read_query {query_file} with resulting query: {query_with_date} ") + return query_with_date +def read_table(table_name, table_file): + with open(f"{table_name}/tables/{table_file}", 'r') as file: + query = file.read() + query_with_date = query.replace("{date}", current_date_str) + logging.debug(f"read_table {table_file} with resulting table: {query_with_date} ") + return query_with_date + +def read_response(table_name, result_file): + # Ensure the file has a .json extension + result_file_json = f"{os.path.splitext(result_file)[0]}.json" + + with open(f"{table_name}/results/{result_file_json}", 'r') as file: + expected_result = json.load(file) + + # Define a lambda that captures expected_result and returns it when called + response_lambda = lambda: { + 'status': expected_result['data']['resp']['status'], + 'schema': expected_result['data']['resp']['schema'], + 'datarows': expected_result['data']['resp']['datarows'], + 'total': expected_result['data']['resp']['total'], + 'size': expected_result['data']['resp']['size'] + } + + # Log the lambda and its result for debugging + logging.debug(f"read_response {response_lambda} with resulting result: {response_lambda()} ") + + # Return the lambda function + return response_lambda +def main(use_date, run_tables, run_queries): # Default to current date if no argument is provided + current_date_str = get_current_date_str() -def main(): + # Check if a date argument has been provided + if use_date: + # Use the provided date instead of the current date + provided_date_str = use_date + try: + # Validate that the provided date is in the correct format + datetime.strptime(provided_date_str, '%Y%m%d') + current_date_str = provided_date_str + except ValueError as e: + logging.error(f"Date argument provided is not in the correct format: {provided_date_str}") + logging.error(f"Error: {e}") + sys.exit(1) # Exit the script if the date format is incorrect + pass + + logging.info(f"Running tests for the date: {current_date_str}") + logging.info(f"Enabling REPLE...") enable_repl() + logging.info(f"Creating Session...") sessionId = create_session() - expected_lambda = lambda response: ( - response['status'] == 'SUCCESS' and - response['total'] == 1 and - response['datarows'][0] == [1998] and - response['schema'][0]['name'] == 'year' and - response['schema'][0]['type'] == 'integer' - ) - test_repl(expected_lambda, "select year from mys3.default.http_logs where year = 1998 limit 1", sessionId) - - - expected_lambda = lambda response: ( - response['size'] == 13 and - response['total'] == 13 and - response['datarows'][0] == [ - "@timestamp", - "timestamp", - "" - ] and - response['schema'] == [ - { - "name": "col_name", - "type": "string" - }, - { - "name": "data_type", - "type": "string" - }, - { - "name": "comment", - "type": "string" - } - ] - ) - test_repl(expected_lambda, "DESC mys3.default.http_logs", sessionId) - - expected_lambda = lambda response: ( - response['size'] == 1 and - response['total'] == 1 and - response['datarows'][0] == [ - "default", - "http_logs", - False - ] and - response['schema'] == [ - {"name": "namespace", "type": "string"}, - {"name": "tableName", "type": "string"}, - {"name": "isTemporary", "type": "boolean"} - ] - ) - test_repl(expected_lambda, "SHOW TABLES IN mys3.default LIKE 'http_logs'", sessionId) - - - expected_lambda = lambda response: ( - response['size'] == 0 - ) - test_repl(expected_lambda, "create skipping index on mys3.default.http_logs (status VALUE_SET)", sessionId) + logging.info(f"Starting Tests ...") + # Iterate over tables files + if run_tables: + logging.info(f"Starting Create Table Tests ...") + tables_files = os.listdir(f"{table_name}/tables") + for table_file in tables_files: + query = read_table(table_name, table_file) + expected_lambda = lambda response: ( + response['size'] == 0 + ) + test_repl(expected_lambda, query, sessionId) + pass + + # Iterate over query files + if run_queries: + logging.info(f"Starting Queries Tests ...") + query_files = os.listdir(f"{table_name}/queries") + for query_file in query_files: + query = read_query(table_name, query_file) + expected_result = read_response(table_name, query_file) + test_repl(expected_result, query, sessionId) + pass + print_sanity_report() if __name__ == '__main__': - main() \ No newline at end of file + # Initialize the argument parser + parser = argparse.ArgumentParser(description='Run table queries and/or other queries.') + + # Add arguments + parser.add_argument('--use-date', type=str, help='Specify a date for the tables in YYYYMMDD format', default='') + parser.add_argument('--run-tables', action='store_true', help='Run table queries') + parser.add_argument('--run-queries', action='store_true', help='Run other queries') + + # Parse the arguments + args = parser.parse_args() + + # Call main with the parsed arguments + main(use_date=args.use_date ,run_tables=args.run_tables, run_queries=args.run_queries) + + + From db943dae87013225cc1ea2eac1e97982a3278fd8 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 2 Nov 2023 14:45:37 -0700 Subject: [PATCH 4/9] add data-preparation documentation Signed-off-by: YANGDB --- integ-test/src/test/python/README.md | 2 ++ integ-test/src/test/python/data-preparation.md | 3 +++ 2 files changed, 5 insertions(+) create mode 100644 integ-test/src/test/python/data-preparation.md diff --git a/integ-test/src/test/python/README.md b/integ-test/src/test/python/README.md index 18c02e94e..ee05b1380 100644 --- a/integ-test/src/test/python/README.md +++ b/integ-test/src/test/python/README.md @@ -9,6 +9,8 @@ This script is designed to perform sanity checks on OpenSearch queries by execut ## Configuration +Before running the script, ensure that the `s3://path/data/http_log/` is correctly pointing to the http_logs s3 bucket - for additional information see [data-preparation](data-preparation.md). + Before running the script, ensure that the `OPENSEARCH_URL` environment variable is set to your OpenSearch cluster's URL. Example: diff --git a/integ-test/src/test/python/data-preparation.md b/integ-test/src/test/python/data-preparation.md new file mode 100644 index 000000000..a2dec84a4 --- /dev/null +++ b/integ-test/src/test/python/data-preparation.md @@ -0,0 +1,3 @@ +## Data Preparation +This document will explain the data setup for an S3 bucket to perform the sanity test with. +... \ No newline at end of file From ad5829881dff43de9508a3fcb4d649d276f695b9 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 3 Nov 2023 18:07:37 -0700 Subject: [PATCH 5/9] update additional queries and expected results Signed-off-by: YANGDB --- integ-test/src/test/python/README.md | 13 +- .../test/python/http_logs/queries/ppl1.sql | 2 +- .../test/python/http_logs/queries/ppl2.sql | 2 + .../test/python/http_logs/queries/ppl3.sql | 3 + .../src/test/python/http_logs/queries/q0.sql | 1 + .../src/test/python/http_logs/queries/q1.sql | 2 +- .../src/test/python/http_logs/queries/q2.sql | 2 +- .../src/test/python/http_logs/queries/q3.sql | 2 +- .../src/test/python/http_logs/queries/q4.sql | 2 +- .../src/test/python/http_logs/queries/q5.sql | 2 +- .../src/test/python/http_logs/queries/q6.sql | 2 +- .../src/test/python/http_logs/queries/q7.sql | 6 +- .../src/test/python/http_logs/queries/q8.sql | 8 + .../test/python/http_logs/results/ppl1.json | 60 +++--- .../test/python/http_logs/results/ppl2.json | 96 ++++++++++ .../test/python/http_logs/results/ppl3.json | 76 ++++++++ .../src/test/python/http_logs/results/q0.json | 171 ++++++++++++++++++ .../src/test/python/http_logs/results/q1.json | 62 +++---- .../src/test/python/http_logs/results/q3.json | 18 +- .../src/test/python/http_logs/results/q8.json | 0 .../http_logs/tables/create_cover_index.sql | 6 +- .../python/http_logs/tables/create_mv.sql | 4 +- .../python/http_logs/tables/create_table.sql | 4 +- integ-test/src/test/python/run_sanity.sh | 25 +++ integ-test/src/test/python/sanity.py | 9 +- 25 files changed, 490 insertions(+), 88 deletions(-) create mode 100644 integ-test/src/test/python/http_logs/queries/ppl2.sql create mode 100644 integ-test/src/test/python/http_logs/queries/ppl3.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q0.sql create mode 100644 integ-test/src/test/python/http_logs/queries/q8.sql create mode 100644 integ-test/src/test/python/http_logs/results/ppl2.json create mode 100644 integ-test/src/test/python/http_logs/results/ppl3.json create mode 100644 integ-test/src/test/python/http_logs/results/q0.json create mode 100644 integ-test/src/test/python/http_logs/results/q8.json diff --git a/integ-test/src/test/python/README.md b/integ-test/src/test/python/README.md index ee05b1380..72171bdb2 100644 --- a/integ-test/src/test/python/README.md +++ b/integ-test/src/test/python/README.md @@ -9,10 +9,16 @@ This script is designed to perform sanity checks on OpenSearch queries by execut ## Configuration -Before running the script, ensure that the `s3://path/data/http_log/` is correctly pointing to the http_logs s3 bucket - for additional information see [data-preparation](data-preparation.md). +Before running the script, ensure that the `s3://flint/-data/-dp/-eu/-west/-1/-beta/data/http_log/` is correctly pointing to the http_logs s3 bucket - for additional information see [data-preparation](data-preparation.md). Before running the script, ensure that the `OPENSEARCH_URL` environment variable is set to your OpenSearch cluster's URL. +Before running the script, ensure that the datasource name (in this sample `mys3`) match the correct location of your EMR spark cluster. + +Before running the script, ensure that the catalog name (in this sample `default`) match the correct schema name within the AWS-GLUE catalog. + +Before running the script, ensure that the table name (in this sample `http_logs_plain`) match the correct name of table ([Or create table using the next script](./http_logs/tables/create_table.sql)). + Example: ```bash export OPENSEARCH_URL="http://localhost:9200" @@ -62,11 +68,16 @@ The script accepts several optional parameters to control its behavior: ```bash ./run_sanity.sh --run-tables --run-queries --use-date 20231102 ``` +4. Run both table (creation) queries and data queries: + ```bash + python sanity_script.py --run-tables --run-queries + ``` ## Output The script will generate a log file with a timestamp in its name (e.g., `sanity_report_2023-11-02_12-00-00.log`) that contains the results of the sanity checks, including any errors encountered during execution. + ## Support For any queries or issues, please create an issue in the repository or contact the maintainer. diff --git a/integ-test/src/test/python/http_logs/queries/ppl1.sql b/integ-test/src/test/python/http_logs/queries/ppl1.sql index bf5ff5c31..7b137ec1b 100644 --- a/integ-test/src/test/python/http_logs/queries/ppl1.sql +++ b/integ-test/src/test/python/http_logs/queries/ppl1.sql @@ -1 +1 @@ -SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file +source = mys3.default.http_logs_plain | sort @timestamp | head 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/ppl2.sql b/integ-test/src/test/python/http_logs/queries/ppl2.sql new file mode 100644 index 000000000..ccba61a93 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/ppl2.sql @@ -0,0 +1,2 @@ +source = mys3.default.http_logs_plain | +where status >= 400 | sort - @timestamp | head 5 \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/ppl3.sql b/integ-test/src/test/python/http_logs/queries/ppl3.sql new file mode 100644 index 000000000..6bf111b6d --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/ppl3.sql @@ -0,0 +1,3 @@ +source = mys3.default.http_logs_plain | +where status = 200 | stats count(status) by clientip, status | +sort - clientip | head 10 \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q0.sql b/integ-test/src/test/python/http_logs/queries/q0.sql new file mode 100644 index 000000000..504a43b6d --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q0.sql @@ -0,0 +1 @@ +DESCRIBE EXTENDED mys3.default.http_logs_plain \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q1.sql b/integ-test/src/test/python/http_logs/queries/q1.sql index bf5ff5c31..38fb16b1c 100644 --- a/integ-test/src/test/python/http_logs/queries/q1.sql +++ b/integ-test/src/test/python/http_logs/queries/q1.sql @@ -1 +1 @@ -SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5; \ No newline at end of file +SELECT * FROM mys3.default.http_logs_plain ORDER BY '@timestamp' LIMIT 5; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q2.sql b/integ-test/src/test/python/http_logs/queries/q2.sql index 652da9b55..7e407e096 100644 --- a/integ-test/src/test/python/http_logs/queries/q2.sql +++ b/integ-test/src/test/python/http_logs/queries/q2.sql @@ -1,2 +1,2 @@ SELECT COUNT(DISTINCT clientip) as unique_client_ips -FROM mys3.default.http_logs_{date}; \ No newline at end of file +FROM mys3.default.http_logs_plain; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q3.sql b/integ-test/src/test/python/http_logs/queries/q3.sql index 6b18be3bd..34ab62715 100644 --- a/integ-test/src/test/python/http_logs/queries/q3.sql +++ b/integ-test/src/test/python/http_logs/queries/q3.sql @@ -2,7 +2,7 @@ SELECT FIRST(day) AS day, status, COUNT(status) AS status_count_by_day -FROM (SELECT * FROM mys3.default.http_logs_{date} LIMIT 1000) +FROM (SELECT * FROM mys3.default.http_logs_plain ORDER BY `@timestamp` LIMIT 1000) GROUP BY day, status ORDER BY day, status LIMIT 10; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q4.sql b/integ-test/src/test/python/http_logs/queries/q4.sql index 32fa82a18..95b8ab3e7 100644 --- a/integ-test/src/test/python/http_logs/queries/q4.sql +++ b/integ-test/src/test/python/http_logs/queries/q4.sql @@ -2,7 +2,7 @@ SELECT FIRST(day) AS day, status, COUNT(status) AS status_count_by_day -FROM mys3.default.http_logs_{date} +FROM mys3.default.http_logs_plain WHERE status >= 400 GROUP BY day, status ORDER BY day, status diff --git a/integ-test/src/test/python/http_logs/queries/q5.sql b/integ-test/src/test/python/http_logs/queries/q5.sql index 75cc222f5..ffab7264a 100644 --- a/integ-test/src/test/python/http_logs/queries/q5.sql +++ b/integ-test/src/test/python/http_logs/queries/q5.sql @@ -1,7 +1,7 @@ SELECT status, COUNT(status) AS status_count_by_day -FROM mys3.default.http_logs_{date} +FROM mys3.default.http_logs_plain WHERE status >= 400 GROUP BY status ORDER BY status diff --git a/integ-test/src/test/python/http_logs/queries/q6.sql b/integ-test/src/test/python/http_logs/queries/q6.sql index e2266c6c9..197c979a5 100644 --- a/integ-test/src/test/python/http_logs/queries/q6.sql +++ b/integ-test/src/test/python/http_logs/queries/q6.sql @@ -1,3 +1,3 @@ -SELECT day, SUM(size) as total_size FROM mys3.default.http_logs_{date} +SELECT day, SUM(size) as total_size FROM mys3.default.http_logs_plain WHERE year = 1998 AND month =6 GROUP BY day; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q7.sql b/integ-test/src/test/python/http_logs/queries/q7.sql index 378b4b2ec..9e13b6ead 100644 --- a/integ-test/src/test/python/http_logs/queries/q7.sql +++ b/integ-test/src/test/python/http_logs/queries/q7.sql @@ -1,6 +1,6 @@ SELECT count(*) as count, clientip -FROM mys3.default.http_logs_{date} +FROM mys3.default.http_logs_plain WHERE clientip BETWEEN '208.0.0.0' AND '210.0.0.0' GROUP BY clientip -ORDER BY DESC count -limit 20; \ No newline at end of file +ORDER BY count DESC + limit 20; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/queries/q8.sql b/integ-test/src/test/python/http_logs/queries/q8.sql new file mode 100644 index 000000000..f69b95ba5 --- /dev/null +++ b/integ-test/src/test/python/http_logs/queries/q8.sql @@ -0,0 +1,8 @@ +Explain +SELECT + day, + status +FROM mys3.default.http_logs_plain +WHERE status >= 400 +GROUP BY day, status + LIMIT 100; \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/ppl1.json b/integ-test/src/test/python/http_logs/results/ppl1.json index 92a48375d..f39b67c58 100644 --- a/integ-test/src/test/python/http_logs/results/ppl1.json +++ b/integ-test/src/test/python/http_logs/results/ppl1.json @@ -39,54 +39,54 @@ ], "datarows": [ [ - "1998-06-10T14:37:23.000Z", - "76.112.16.0", - "GET /images/102325.gif HTTP/1.0", - 200, - 1555, + "1998-06-14T19:59:55.000Z", + "185.163.25.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 340, 1998, 6, - 10 + 14 ], [ - "1998-06-10T14:37:23.000Z", - "78.109.16.0", - "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", - 200, - 1548, + "1998-06-14T19:59:55.000Z", + "161.62.26.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 343, 1998, 6, - 10 + 14 ], [ - "1998-06-10T14:37:23.000Z", - "140.48.14.0", - "GET /images/102321.gif HTTP/1.0", - 200, - 1602, + "1998-06-14T19:59:55.000Z", + "63.158.15.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, 1998, 6, - 10 + 14 ], [ - "1998-06-10T14:37:23.000Z", - "114.113.16.0", - "GET /english/images/team_bu_roster_on.gif HTTP/1.0", - 200, - 1567, + "1998-06-14T19:59:55.000Z", + "190.10.13.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, 1998, 6, - 10 + 14 ], [ - "1998-06-10T14:37:24.000Z", - "79.48.14.0", - "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", - 200, - 1548, + "1998-06-14T19:59:53.000Z", + "28.87.6.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 349, 1998, 6, - 10 + 14 ] ], "total": 5, diff --git a/integ-test/src/test/python/http_logs/results/ppl2.json b/integ-test/src/test/python/http_logs/results/ppl2.json new file mode 100644 index 000000000..f39b67c58 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/ppl2.json @@ -0,0 +1,96 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "@timestamp", + "type": "date" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "request", + "type": "string" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "year", + "type": "integer" + }, + { + "name": "month", + "type": "integer" + }, + { + "name": "day", + "type": "integer" + } + ], + "datarows": [ + [ + "1998-06-14T19:59:55.000Z", + "185.163.25.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 340, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "161.62.26.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 343, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "63.158.15.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "190.10.13.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:53.000Z", + "28.87.6.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 349, + 1998, + 6, + 14 + ] + ], + "total": 5, + "size": 5 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/ppl3.json b/integ-test/src/test/python/http_logs/results/ppl3.json new file mode 100644 index 000000000..f6ec09f23 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/ppl3.json @@ -0,0 +1,76 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "count(status)", + "type": "long" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "status", + "type": "integer" + } + ], + "datarows": [ + [ + 78, + "99.99.9.0", + 200 + ], + [ + 133, + "99.99.8.0", + 200 + ], + [ + 542, + "99.99.6.0", + 200 + ], + [ + 15, + "99.99.5.0", + 200 + ], + [ + 4, + "99.99.4.0", + 200 + ], + [ + 71, + "99.99.3.0", + 200 + ], + [ + 143, + "99.99.20.0", + 200 + ], + [ + 39, + "99.99.2.0", + 200 + ], + [ + 156, + "99.99.19.0", + 200 + ], + [ + 64, + "99.99.18.0", + 200 + ] + ], + "total": 10, + "size": 10 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q0.json b/integ-test/src/test/python/http_logs/results/q0.json new file mode 100644 index 000000000..be1956450 --- /dev/null +++ b/integ-test/src/test/python/http_logs/results/q0.json @@ -0,0 +1,171 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "col_name", + "type": "string" + }, + { + "name": "data_type", + "type": "string" + }, + { + "name": "comment", + "type": "string" + } + ], + "datarows": [ + [ + "@timestamp", + "timestamp", + "" + ], + [ + "clientip", + "string", + "" + ], + [ + "request", + "string", + "" + ], + [ + "status", + "int", + "" + ], + [ + "size", + "int", + "" + ], + [ + "year", + "int", + "" + ], + [ + "month", + "int", + "" + ], + [ + "day", + "int", + "" + ], + [ + "# Partition Information", + "", + "" + ], + [ + "# col_name", + "data_type", + "comment" + ], + [ + "year", + "int", + "" + ], + [ + "month", + "int", + "" + ], + [ + "day", + "int", + "" + ], + [ + "", + "", + "" + ], + [ + "# Detailed Table Information", + "", + "" + ], + [ + "Database", + "default", + "" + ], + [ + "Table", + "http_logs_20231102", + "" + ], + [ + "Owner", + "hadoop", + "" + ], + [ + "Created Time", + "Thu Nov 02 20:05:27 UTC 2023", + "" + ], + [ + "Last Access", + "UNKNOWN", + "" + ], + [ + "Created By", + "Spark 3.3.2-amzn-0", + "" + ], + [ + "Type", + "EXTERNAL", + "" + ], + [ + "Provider", + "json", + "" + ], + [ + "Location", + "s3://flint-data-dp-eu-west-1-beta/data/http_log/http_logs_partitioned_json_bz2", + "" + ], + [ + "Serde Library", + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "" + ], + [ + "InputFormat", + "org.apache.hadoop.mapred.SequenceFileInputFormat", + "" + ], + [ + "OutputFormat", + "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat", + "" + ], + [ + "Storage Properties", + "[compression=bzip2]", + "" + ], + [ + "Partition Provider", + "Catalog", + "" + ] + ], + "total": 29, + "size": 29 + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q1.json b/integ-test/src/test/python/http_logs/results/q1.json index 92a48375d..2031463fa 100644 --- a/integ-test/src/test/python/http_logs/results/q1.json +++ b/integ-test/src/test/python/http_logs/results/q1.json @@ -39,54 +39,54 @@ ], "datarows": [ [ - "1998-06-10T14:37:23.000Z", - "76.112.16.0", - "GET /images/102325.gif HTTP/1.0", + "1998-04-30T19:30:17.000Z", + "40.135.0.0", + "GET /images/hm_bg.jpg HTTP/1.0", 200, - 1555, + 24736, 1998, - 6, - 10 + 4, + 30 ], [ - "1998-06-10T14:37:23.000Z", - "78.109.16.0", - "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", + "1998-04-30T19:30:53.000Z", + "232.0.0.0", + "GET /images/hm_bg.jpg HTTP/1.0", 200, - 1548, + 24736, 1998, - 6, - 10 + 4, + 30 ], [ - "1998-06-10T14:37:23.000Z", - "140.48.14.0", - "GET /images/102321.gif HTTP/1.0", + "1998-04-30T19:31:12.000Z", + "26.1.0.0", + "GET /images/hm_bg.jpg HTTP/1.0", 200, - 1602, + 24736, 1998, - 6, - 10 + 4, + 30 ], [ - "1998-06-10T14:37:23.000Z", - "114.113.16.0", - "GET /english/images/team_bu_roster_on.gif HTTP/1.0", + "1998-04-30T19:31:19.000Z", + "247.37.0.0", + "GET /french/splash_inet.html HTTP/1.0", 200, - 1567, + 3781, 1998, - 6, - 10 + 4, + 30 ], [ - "1998-06-10T14:37:24.000Z", - "79.48.14.0", - "GET /english/images/comp_bu_stage1n.gif HTTP/1.0", - 200, - 1548, + "1998-04-30T19:31:22.000Z", + "247.37.0.0", + "GET /images/hm_nbg.jpg HTTP/1.0", + 304, + 0, 1998, - 6, - 10 + 4, + 30 ] ], "total": 5, diff --git a/integ-test/src/test/python/http_logs/results/q3.json b/integ-test/src/test/python/http_logs/results/q3.json index c650d4aed..7f8fa08a0 100644 --- a/integ-test/src/test/python/http_logs/results/q3.json +++ b/integ-test/src/test/python/http_logs/results/q3.json @@ -19,13 +19,23 @@ ], "datarows": [ [ - 12, + 30, 200, - 1000 + 844 + ], + [ + 30, + 206, + 4 + ], + [ + 30, + 304, + 152 ] ], - "total": 1, - "size": 1 + "total": 3, + "size": 3 } } } \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/results/q8.json b/integ-test/src/test/python/http_logs/results/q8.json new file mode 100644 index 000000000..e69de29bb diff --git a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql index f736844aa..817a1bf9d 100644 --- a/integ-test/src/test/python/http_logs/tables/create_cover_index.sql +++ b/integ-test/src/test/python/http_logs/tables/create_cover_index.sql @@ -1,7 +1,7 @@ -CREATE INDEX status_clientip_and_day_{date} - ON mys3.default.http_logs_{date} ( status, day, clientip ) +CREATE INDEX status_clientip_and_day + ON mys3.default.http_logs ( status, day, clientip ) WITH ( auto_refresh = true, refresh_interval = '5 minute', - checkpoint_location = 's3://path/data/http_log/checkpoint_status_and_day' + checkpoint_location = 's3://flint/-data/-dp/-eu/-west/-1/-beta/data/http_log/checkpoint_status_and_day' ) \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/tables/create_mv.sql b/integ-test/src/test/python/http_logs/tables/create_mv.sql index 955962a98..0ee49161f 100644 --- a/integ-test/src/test/python/http_logs/tables/create_mv.sql +++ b/integ-test/src/test/python/http_logs/tables/create_mv.sql @@ -1,9 +1,9 @@ -CREATE MATERIALIZED VIEW mys3.default.http_count_view_{date} +CREATE MATERIALIZED VIEW mys3.default.http_count_view AS SELECT window.start AS `start.time`, COUNT(*) AS count -FROM mys3.default.http_logs_{date} +FROM mys3.default.http_logs WHERE status != 200 GROUP BY TUMBLE(`@timestamp`, '1 Minutes') WITH ( diff --git a/integ-test/src/test/python/http_logs/tables/create_table.sql b/integ-test/src/test/python/http_logs/tables/create_table.sql index 8f548a442..ef516ac99 100644 --- a/integ-test/src/test/python/http_logs/tables/create_table.sql +++ b/integ-test/src/test/python/http_logs/tables/create_table.sql @@ -1,4 +1,4 @@ -CREATE EXTERNAL TABLE mys3.default.http_logs_{date} ( +CREATE EXTERNAL TABLE mys3.default.http_logs ( `@timestamp` TIMESTAMP, clientip STRING, request STRING, @@ -8,4 +8,4 @@ CREATE EXTERNAL TABLE mys3.default.http_logs_{date} ( month INT, day INT) USING json PARTITIONED BY(year, month, day) OPTIONS - (path 's3://path/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2') \ No newline at end of file + (path 's3://flint/-data/-dp/-eu/-west/-1/-beta/data/http_log/http_logs_partitioned_json_bz2/', compression 'bzip2') \ No newline at end of file diff --git a/integ-test/src/test/python/run_sanity.sh b/integ-test/src/test/python/run_sanity.sh index a3b435ec9..9ebc35d45 100755 --- a/integ-test/src/test/python/run_sanity.sh +++ b/integ-test/src/test/python/run_sanity.sh @@ -3,6 +3,31 @@ # Set the environment variable for the OpenSearch URL export OPENSEARCH_URL="http://opensearch:9200" + +# ASCII Art for OpenSearch Spark +cat << "EOF" + ______ _____ _____ _______ _____ + / __ \ |\ \ /\ \ /::\ \ /\ \ + / \/ /| \\ \ /::\____\ /::::\ \ /::\____\ + / // \\ \ /:::/ / /::::::\ \ /:::/ / + /__/\ \\ \| | __ /:::/ / /::::::::\ \ /:::/ / + \ \ \ \ __ | |/ \ /:::/ / /:::/~~\:::\ \ /:::/ / + \ \ \ / \ / /\ /:::/ / /:::/ \:::\ \ /:::/ / + \ \ \/ / / / /\/:::/ / /:::/ / \:::\ \ /:::/ / + \ \ / / /__ / / /:::/____/ /:::/____/ \:::\____\ /:::/ / + \___\| | |::::\______ / / \:::\ \ |:::| | |:::| | /:::/ / + \____| |::::| | | / \:::\ \ |:::|____| |:::| |/:::/____/ + | ~~ | | | / \:::\ \ \:::\ \ /:::/ / \:::\ \ + | | | | / \:::\ \ \:::\ \ /:::/ / \:::\ \ + |_____| |_|/ \:::\____\ \:::\ /:::/ / \:::\ \ + \::/ / \:::\__/:::/ / \:::\ \ + \/____/ \::::::::/ / \:::\ \ + ~~\::::::/ / \:::\ \ + \::::/ / \:::\____\ + \::/____/ \::/ / + ~~ \/____/ +EOF + # Check for command-line arguments run_tables="" run_queries="" diff --git a/integ-test/src/test/python/sanity.py b/integ-test/src/test/python/sanity.py index 076956cff..1752e9f69 100644 --- a/integ-test/src/test/python/sanity.py +++ b/integ-test/src/test/python/sanity.py @@ -6,7 +6,7 @@ from datetime import datetime import argparse -url = os.environ.get('OPENSEARCH_URL', "http://opensearch_server:9200") # Modify this line +url = os.environ.get('OPENSEARCH_URL', "http://opensearch:9200") # Modify this line table_name = 'http_logs' # Generate a timestamp for the file name timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') @@ -140,6 +140,7 @@ def test_repl(expected, query, sessionId): while True: try: response = fetch_result(queryId).json() + logging.debug(f"actual response {response} ") logging.info(f"status: {response['status']}") if response['status'] == 'SUCCESS': query_end_time = time.time() @@ -191,7 +192,7 @@ def read_response(table_name, result_file): expected_result = json.load(file) # Define a lambda that captures expected_result and returns it when called - response_lambda = lambda: { + response_lambda = lambda response : { 'status': expected_result['data']['resp']['status'], 'schema': expected_result['data']['resp']['schema'], 'datarows': expected_result['data']['resp']['datarows'], @@ -200,7 +201,7 @@ def read_response(table_name, result_file): } # Log the lambda and its result for debugging - logging.debug(f"read_response {response_lambda} with resulting result: {response_lambda()} ") + logging.debug(f"expected response {expected_result} ") # Return the lambda function return response_lambda @@ -214,8 +215,6 @@ def main(use_date, run_tables, run_queries): # Default to current date if no # Use the provided date instead of the current date provided_date_str = use_date try: - # Validate that the provided date is in the correct format - datetime.strptime(provided_date_str, '%Y%m%d') current_date_str = provided_date_str except ValueError as e: logging.error(f"Date argument provided is not in the correct format: {provided_date_str}") From 10d7c4e1c5b8e30d8a6fbe6efc208138548eabc8 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 3 Nov 2023 18:13:11 -0700 Subject: [PATCH 6/9] update explain query Signed-off-by: YANGDB --- .../src/test/python/http_logs/queries/q8.sql | 15 ++- .../src/test/python/http_logs/results/q8.json | 96 +++++++++++++++++++ 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/integ-test/src/test/python/http_logs/queries/q8.sql b/integ-test/src/test/python/http_logs/queries/q8.sql index f69b95ba5..5f7f6d53c 100644 --- a/integ-test/src/test/python/http_logs/queries/q8.sql +++ b/integ-test/src/test/python/http_logs/queries/q8.sql @@ -1,8 +1,7 @@ -Explain -SELECT - day, - status -FROM mys3.default.http_logs_plain -WHERE status >= 400 -GROUP BY day, status - LIMIT 100; \ No newline at end of file +Explain SELECT + day, + status + FROM mys3.default.http_logs_plain + WHERE status >= 400 + GROUP BY day, status + LIMIT 100; diff --git a/integ-test/src/test/python/http_logs/results/q8.json b/integ-test/src/test/python/http_logs/results/q8.json index e69de29bb..f39b67c58 100644 --- a/integ-test/src/test/python/http_logs/results/q8.json +++ b/integ-test/src/test/python/http_logs/results/q8.json @@ -0,0 +1,96 @@ +{ + "data": { + "ok": true, + "resp": { + "status": "SUCCESS", + "schema": [ + { + "name": "@timestamp", + "type": "date" + }, + { + "name": "clientip", + "type": "string" + }, + { + "name": "request", + "type": "string" + }, + { + "name": "status", + "type": "integer" + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "year", + "type": "integer" + }, + { + "name": "month", + "type": "integer" + }, + { + "name": "day", + "type": "integer" + } + ], + "datarows": [ + [ + "1998-06-14T19:59:55.000Z", + "185.163.25.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 340, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "161.62.26.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 343, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "63.158.15.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:55.000Z", + "190.10.13.0", + "GET /images/comp_bg2_hm.gif HTTP/1.1", + 404, + 335, + 1998, + 6, + 14 + ], + [ + "1998-06-14T19:59:53.000Z", + "28.87.6.0", + "GET /images/comp_bg2_hm.gif HTTP/1.0", + 404, + 349, + 1998, + 6, + 14 + ] + ], + "total": 5, + "size": 5 + } + } +} \ No newline at end of file From 7fcac66c8271beb7da620246496f1c4e2333b3cd Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 3 Nov 2023 19:04:03 -0700 Subject: [PATCH 7/9] move ppl to seperate folder Signed-off-by: YANGDB --- .../python/http_logs/{queries => ppl}/ppl1.sql | 0 .../python/http_logs/{queries => ppl}/ppl2.sql | 0 .../python/http_logs/{queries => ppl}/ppl3.sql | 0 .../python/http_logs/{queries => sql}/q0.sql | 0 .../python/http_logs/{queries => sql}/q1.sql | 0 .../python/http_logs/{queries => sql}/q2.sql | 0 .../python/http_logs/{queries => sql}/q3.sql | 0 .../python/http_logs/{queries => sql}/q4.sql | 0 .../python/http_logs/{queries => sql}/q5.sql | 0 .../python/http_logs/{queries => sql}/q6.sql | 0 .../python/http_logs/{queries => sql}/q7.sql | 0 .../python/http_logs/{queries => sql}/q8.sql | 0 integ-test/src/test/python/run_sanity.sh | 2 +- integ-test/src/test/python/sanity.py | 16 ++++++++++++---- 14 files changed, 13 insertions(+), 5 deletions(-) rename integ-test/src/test/python/http_logs/{queries => ppl}/ppl1.sql (100%) rename integ-test/src/test/python/http_logs/{queries => ppl}/ppl2.sql (100%) rename integ-test/src/test/python/http_logs/{queries => ppl}/ppl3.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q0.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q1.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q2.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q3.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q4.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q5.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q6.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q7.sql (100%) rename integ-test/src/test/python/http_logs/{queries => sql}/q8.sql (100%) diff --git a/integ-test/src/test/python/http_logs/queries/ppl1.sql b/integ-test/src/test/python/http_logs/ppl/ppl1.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/ppl1.sql rename to integ-test/src/test/python/http_logs/ppl/ppl1.sql diff --git a/integ-test/src/test/python/http_logs/queries/ppl2.sql b/integ-test/src/test/python/http_logs/ppl/ppl2.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/ppl2.sql rename to integ-test/src/test/python/http_logs/ppl/ppl2.sql diff --git a/integ-test/src/test/python/http_logs/queries/ppl3.sql b/integ-test/src/test/python/http_logs/ppl/ppl3.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/ppl3.sql rename to integ-test/src/test/python/http_logs/ppl/ppl3.sql diff --git a/integ-test/src/test/python/http_logs/queries/q0.sql b/integ-test/src/test/python/http_logs/sql/q0.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q0.sql rename to integ-test/src/test/python/http_logs/sql/q0.sql diff --git a/integ-test/src/test/python/http_logs/queries/q1.sql b/integ-test/src/test/python/http_logs/sql/q1.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q1.sql rename to integ-test/src/test/python/http_logs/sql/q1.sql diff --git a/integ-test/src/test/python/http_logs/queries/q2.sql b/integ-test/src/test/python/http_logs/sql/q2.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q2.sql rename to integ-test/src/test/python/http_logs/sql/q2.sql diff --git a/integ-test/src/test/python/http_logs/queries/q3.sql b/integ-test/src/test/python/http_logs/sql/q3.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q3.sql rename to integ-test/src/test/python/http_logs/sql/q3.sql diff --git a/integ-test/src/test/python/http_logs/queries/q4.sql b/integ-test/src/test/python/http_logs/sql/q4.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q4.sql rename to integ-test/src/test/python/http_logs/sql/q4.sql diff --git a/integ-test/src/test/python/http_logs/queries/q5.sql b/integ-test/src/test/python/http_logs/sql/q5.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q5.sql rename to integ-test/src/test/python/http_logs/sql/q5.sql diff --git a/integ-test/src/test/python/http_logs/queries/q6.sql b/integ-test/src/test/python/http_logs/sql/q6.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q6.sql rename to integ-test/src/test/python/http_logs/sql/q6.sql diff --git a/integ-test/src/test/python/http_logs/queries/q7.sql b/integ-test/src/test/python/http_logs/sql/q7.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q7.sql rename to integ-test/src/test/python/http_logs/sql/q7.sql diff --git a/integ-test/src/test/python/http_logs/queries/q8.sql b/integ-test/src/test/python/http_logs/sql/q8.sql similarity index 100% rename from integ-test/src/test/python/http_logs/queries/q8.sql rename to integ-test/src/test/python/http_logs/sql/q8.sql diff --git a/integ-test/src/test/python/run_sanity.sh b/integ-test/src/test/python/run_sanity.sh index 9ebc35d45..8cad729ea 100755 --- a/integ-test/src/test/python/run_sanity.sh +++ b/integ-test/src/test/python/run_sanity.sh @@ -1,7 +1,7 @@ #!/bin/bash # Set the environment variable for the OpenSearch URL -export OPENSEARCH_URL="http://opensearch:9200" +export OPENSEARCH_URL="http://ec2-52-39-211-201.us-west-2.compute.amazonaws.com:9200" # ASCII Art for OpenSearch Spark diff --git a/integ-test/src/test/python/sanity.py b/integ-test/src/test/python/sanity.py index 1752e9f69..e6125da52 100644 --- a/integ-test/src/test/python/sanity.py +++ b/integ-test/src/test/python/sanity.py @@ -172,7 +172,7 @@ def test_repl(expected, query, sessionId): # Rest of your imports remain the same def read_query(table_name, query_file): - with open(f"{table_name}/queries/{query_file}", 'r') as file: + with open(f"{table_name}/sql/{query_file}", 'r') as file: query = file.read() query_with_date = query.replace("{date}", current_date_str) logging.debug(f"read_query {query_file} with resulting query: {query_with_date} ") @@ -241,14 +241,22 @@ def main(use_date, run_tables, run_queries): # Default to current date if no test_repl(expected_lambda, query, sessionId) pass - # Iterate over query files + # Iterate over SQL query files if run_queries: - logging.info(f"Starting Queries Tests ...") - query_files = os.listdir(f"{table_name}/queries") + logging.info(f"Starting SQL Queries Tests ...") + query_files = os.listdir(f"{table_name}/sql") for query_file in query_files: query = read_query(table_name, query_file) expected_result = read_response(table_name, query_file) test_repl(expected_result, query, sessionId) + + # Iterate over PPL query files + # logging.info(f"Starting PPL Queries Tests ...") + # query_files = os.listdir(f"{table_name}/ppl") + # for query_file in query_files: + # query = read_query(table_name, query_file) + # expected_result = read_response(table_name, query_file) + # test_repl(expected_result, query, sessionId) pass print_sanity_report() From 98a5895d2b0fe415d458093b70250446583d2b69 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 3 Nov 2023 19:04:23 -0700 Subject: [PATCH 8/9] move ppl to seperate folder Signed-off-by: YANGDB --- integ-test/src/test/python/run_sanity.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integ-test/src/test/python/run_sanity.sh b/integ-test/src/test/python/run_sanity.sh index 8cad729ea..9ebc35d45 100755 --- a/integ-test/src/test/python/run_sanity.sh +++ b/integ-test/src/test/python/run_sanity.sh @@ -1,7 +1,7 @@ #!/bin/bash # Set the environment variable for the OpenSearch URL -export OPENSEARCH_URL="http://ec2-52-39-211-201.us-west-2.compute.amazonaws.com:9200" +export OPENSEARCH_URL="http://opensearch:9200" # ASCII Art for OpenSearch Spark From b7924c086a984c84c2d379373e1fcc35dec6f3ec Mon Sep 17 00:00:00 2001 From: YANGDB Date: Wed, 8 Nov 2023 09:22:36 -0800 Subject: [PATCH 9/9] update scripts Signed-off-by: YANGDB --- .../src/test/python/http_logs/results/q8.json | 96 ------------------- .../src/test/python/http_logs/sql/q8.sql | 7 -- integ-test/src/test/python/run_sanity.sh | 26 +---- integ-test/src/test/python/sanity.py | 9 ++ 4 files changed, 10 insertions(+), 128 deletions(-) delete mode 100644 integ-test/src/test/python/http_logs/results/q8.json delete mode 100644 integ-test/src/test/python/http_logs/sql/q8.sql diff --git a/integ-test/src/test/python/http_logs/results/q8.json b/integ-test/src/test/python/http_logs/results/q8.json deleted file mode 100644 index f39b67c58..000000000 --- a/integ-test/src/test/python/http_logs/results/q8.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "data": { - "ok": true, - "resp": { - "status": "SUCCESS", - "schema": [ - { - "name": "@timestamp", - "type": "date" - }, - { - "name": "clientip", - "type": "string" - }, - { - "name": "request", - "type": "string" - }, - { - "name": "status", - "type": "integer" - }, - { - "name": "size", - "type": "integer" - }, - { - "name": "year", - "type": "integer" - }, - { - "name": "month", - "type": "integer" - }, - { - "name": "day", - "type": "integer" - } - ], - "datarows": [ - [ - "1998-06-14T19:59:55.000Z", - "185.163.25.0", - "GET /images/comp_bg2_hm.gif HTTP/1.1", - 404, - 340, - 1998, - 6, - 14 - ], - [ - "1998-06-14T19:59:55.000Z", - "161.62.26.0", - "GET /images/comp_bg2_hm.gif HTTP/1.0", - 404, - 343, - 1998, - 6, - 14 - ], - [ - "1998-06-14T19:59:55.000Z", - "63.158.15.0", - "GET /images/comp_bg2_hm.gif HTTP/1.1", - 404, - 335, - 1998, - 6, - 14 - ], - [ - "1998-06-14T19:59:55.000Z", - "190.10.13.0", - "GET /images/comp_bg2_hm.gif HTTP/1.1", - 404, - 335, - 1998, - 6, - 14 - ], - [ - "1998-06-14T19:59:53.000Z", - "28.87.6.0", - "GET /images/comp_bg2_hm.gif HTTP/1.0", - 404, - 349, - 1998, - 6, - 14 - ] - ], - "total": 5, - "size": 5 - } - } -} \ No newline at end of file diff --git a/integ-test/src/test/python/http_logs/sql/q8.sql b/integ-test/src/test/python/http_logs/sql/q8.sql deleted file mode 100644 index 5f7f6d53c..000000000 --- a/integ-test/src/test/python/http_logs/sql/q8.sql +++ /dev/null @@ -1,7 +0,0 @@ -Explain SELECT - day, - status - FROM mys3.default.http_logs_plain - WHERE status >= 400 - GROUP BY day, status - LIMIT 100; diff --git a/integ-test/src/test/python/run_sanity.sh b/integ-test/src/test/python/run_sanity.sh index 9ebc35d45..a300fdb02 100755 --- a/integ-test/src/test/python/run_sanity.sh +++ b/integ-test/src/test/python/run_sanity.sh @@ -1,33 +1,9 @@ #!/bin/bash # Set the environment variable for the OpenSearch URL -export OPENSEARCH_URL="http://opensearch:9200" +export OPENSEARCH_URL="http://ec2-52-39-211-201.us-west-2.compute.amazonaws.com:9200" -# ASCII Art for OpenSearch Spark -cat << "EOF" - ______ _____ _____ _______ _____ - / __ \ |\ \ /\ \ /::\ \ /\ \ - / \/ /| \\ \ /::\____\ /::::\ \ /::\____\ - / // \\ \ /:::/ / /::::::\ \ /:::/ / - /__/\ \\ \| | __ /:::/ / /::::::::\ \ /:::/ / - \ \ \ \ __ | |/ \ /:::/ / /:::/~~\:::\ \ /:::/ / - \ \ \ / \ / /\ /:::/ / /:::/ \:::\ \ /:::/ / - \ \ \/ / / / /\/:::/ / /:::/ / \:::\ \ /:::/ / - \ \ / / /__ / / /:::/____/ /:::/____/ \:::\____\ /:::/ / - \___\| | |::::\______ / / \:::\ \ |:::| | |:::| | /:::/ / - \____| |::::| | | / \:::\ \ |:::|____| |:::| |/:::/____/ - | ~~ | | | / \:::\ \ \:::\ \ /:::/ / \:::\ \ - | | | | / \:::\ \ \:::\ \ /:::/ / \:::\ \ - |_____| |_|/ \:::\____\ \:::\ /:::/ / \:::\ \ - \::/ / \:::\__/:::/ / \:::\ \ - \/____/ \::::::::/ / \:::\ \ - ~~\::::::/ / \:::\ \ - \::::/ / \:::\____\ - \::/____/ \::/ / - ~~ \/____/ -EOF - # Check for command-line arguments run_tables="" run_queries="" diff --git a/integ-test/src/test/python/sanity.py b/integ-test/src/test/python/sanity.py index e6125da52..000ce4a21 100644 --- a/integ-test/src/test/python/sanity.py +++ b/integ-test/src/test/python/sanity.py @@ -5,6 +5,8 @@ import logging from datetime import datetime import argparse +import re + url = os.environ.get('OPENSEARCH_URL', "http://opensearch:9200") # Modify this line table_name = 'http_logs' @@ -261,6 +263,13 @@ def main(use_date, run_tables, run_queries): # Default to current date if no print_sanity_report() +def remove_field_identifiers(plan): + # Regular expression to find field identifiers (e.g., "day#8") + pattern = re.compile(r'\b(\w+)#\d+\b') + # Replace field identifiers with just the field name + modified_plan = pattern.sub(r'\1', plan) + return modified_plan + if __name__ == '__main__': # Initialize the argument parser parser = argparse.ArgumentParser(description='Run table queries and/or other queries.')