From 1d1b80788d5ec8c08521f65212e9ed505ef5be15 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Thu, 12 Dec 2024 15:39:05 -0800 Subject: [PATCH] Can integration tests against the docker cluster The Python script for integration tests was updated to run queries against the docker cluster. The required indices are created as part of the script. The queries for the Python script were likely out of date. These have been updated when the fix for the query was obvious. There are still 6 tests that fail. Signed-off-by: Norman Jordan --- docker/integ-test/spark-defaults.conf | 2 +- integ-test/script/README.md | 45 +- integ-test/script/SanityTest.py | 202 ++---- integ-test/script/data/customer.mapping.json | 30 + integ-test/script/data/http_logs.json | 12 + integ-test/script/data/http_logs.mapping.json | 30 + integ-test/script/data/lineitem.mapping.json | 54 ++ integ-test/script/data/nation.mapping.json | 18 + integ-test/script/data/nested.json | 10 + integ-test/script/data/nested.mapping.json | 37 + integ-test/script/data/orders.mapping.json | 33 + integ-test/script/data/part.mapping.json | 33 + integ-test/script/data/partsupp.mapping.json | 21 + integ-test/script/data/people.json | 12 + integ-test/script/data/people.mapping.json | 24 + integ-test/script/data/region.mapping.json | 15 + integ-test/script/data/supplier.mapping.json | 27 + integ-test/script/data/work_info.json | 10 + integ-test/script/data/work_info.mapping.json | 18 + integ-test/script/test_cases.csv | 644 +++++++++--------- 20 files changed, 803 insertions(+), 474 deletions(-) create mode 100644 integ-test/script/data/customer.mapping.json create mode 100644 integ-test/script/data/http_logs.json create mode 100644 integ-test/script/data/http_logs.mapping.json create mode 100644 integ-test/script/data/lineitem.mapping.json create mode 100644 integ-test/script/data/nation.mapping.json create mode 100644 integ-test/script/data/nested.json create mode 100644 integ-test/script/data/nested.mapping.json create mode 100644 integ-test/script/data/orders.mapping.json create mode 100644 integ-test/script/data/part.mapping.json create mode 100644 integ-test/script/data/partsupp.mapping.json create mode 100644 integ-test/script/data/people.json create mode 100644 integ-test/script/data/people.mapping.json create mode 100644 integ-test/script/data/region.mapping.json create mode 100644 integ-test/script/data/supplier.mapping.json create mode 100644 integ-test/script/data/work_info.json create mode 100644 integ-test/script/data/work_info.mapping.json diff --git a/docker/integ-test/spark-defaults.conf b/docker/integ-test/spark-defaults.conf index 5cfd47c23..19b9e4ec1 100644 --- a/docker/integ-test/spark-defaults.conf +++ b/docker/integ-test/spark-defaults.conf @@ -26,7 +26,7 @@ # spark.driver.memory 5g # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" spark.sql.extensions org.opensearch.flint.spark.FlintPPLSparkExtensions,org.opensearch.flint.spark.FlintSparkExtensions -spark.sql.catalog.myglue_test org.apache.spark.opensearch.catalog.OpenSearchCatalog +spark.sql.catalog.dev org.apache.spark.opensearch.catalog.OpenSearchCatalog spark.datasource.flint.host opensearch spark.datasource.flint.port 9200 spark.datasource.flint.scheme http diff --git a/integ-test/script/README.md b/integ-test/script/README.md index 7ce0c6886..348847f67 100644 --- a/integ-test/script/README.md +++ b/integ-test/script/README.md @@ -17,21 +17,31 @@ Apart from the basic feature, it also has some advanced functionality includes: ### Usage To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries: ```shell -pip install requests pandas openpyxl +pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf +``` + +Next start the Docker containers that will be used for the tests. In the directory `docker/integ-test` +```shell +docker compose up -d +``` + +After the tests are finished, the Docker containers can be stopped from the directory `docker/integ-test` with: +```shell +docker compose down ``` After getting the requisite libraries, you can run the script with the following command line parameters in your shell: ```shell -python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --datasource ${DATASOURCE_NAME} --input-csv test_cases.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600 +python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --opensearch-url ${OPENSEARCH_URL} --input-csv test_cases.csv --output-file test_report ``` -You need to replace the placeholders with your actual values of URL_ADDRESS, DATASOURCE_NAME and USERNAME, PASSWORD for authentication to your endpoint. +You need to replace the placeholders with your actual values of URL_ADDRESS, OPENSEARCH_URL and USERNAME, PASSWORD for authentication to your endpoint. For more details of the command line parameters, you can see the help manual via command: ```shell python SanityTest.py --help usage: SanityTest.py [-h] --base-url BASE_URL --username USERNAME --password PASSWORD --datasource DATASOURCE --input-csv INPUT_CSV - --output-file OUTPUT_FILE [--max-workers MAX_WORKERS] [--check-interval CHECK_INTERVAL] [--timeout TIMEOUT] + --output-file OPENSEARCH_URL [--max-workers MAX_WORKERS] [--check-interval CHECK_INTERVAL] [--timeout TIMEOUT] [--start-row START_ROW] [--end-row END_ROW] Run tests from a CSV file and generate a report. @@ -41,17 +51,12 @@ options: --base-url BASE_URL Base URL of the service --username USERNAME Username for authentication --password PASSWORD Password for authentication - --datasource DATASOURCE - Datasource name + --output-file OPENSEARCH_URL + URL of the OpenSearch service --input-csv INPUT_CSV Path to the CSV file containing test queries --output-file OUTPUT_FILE Path to the output report file - --max-workers MAX_WORKERS - optional, Maximum number of worker threads (default: 2) - --check-interval CHECK_INTERVAL - optional, Check interval in seconds (default: 10) - --timeout TIMEOUT optional, Timeout in seconds (default: 600) --start-row START_ROW optional, The start row of the query to run, start from 1 --end-row END_ROW optional, The end row of the query to run, not included @@ -78,12 +83,12 @@ It also provides the query_id, session_id and start/end time for each query, whi An example of Excel report: -| query_name | query | expected_status | status | check_status | error | result | Duration (s) | query_id | session_id | Start Time | End Time | -|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|---------|--------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|-------------------------------|------------------------------|----------------------|---------------------| -| 1 | describe myglue_test.default.http_logs | SUCCESS | SUCCESS | TRUE | | {'status': 'SUCCESS', 'schema': [{...}, ...], 'datarows': [[...], ...], 'total': 31, 'size': 31} | 37.51 | SHFEVWxDNnZjem15Z2x1ZV90ZXN0 | RkgzZm0xNlA5MG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:10 | 2024-11-07 13:34:47 | -| 2 | source = myglue_test.default.http_logs \| dedup status CONSECUTIVE=true | SUCCESS | FAILED | FALSE | {"Message":"Fail to run query. Cause: Consecutive deduplication is not supported"} | | 39.53 | dVNlaVVxOFZrZW15Z2x1ZV90ZXN0 | ZGU2MllVYmI4dG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:10 | 2024-11-07 13:34:49 | -| 3 | source = myglue_test.default.http_logs \| eval res = json_keys(json('{"account_number":1,"balance":39225,"age":32,"gender":"M"}')) \| head 1 \| fields res | SUCCESS | SUCCESS | TRUE | | {'status': 'SUCCESS', 'schema': [{'name': 'res', 'type': 'array'}], 'datarows': [[['account_number', 'balance', 'age', 'gender']]], 'total': 1, 'size': 1} | 12.77 | WHQxaXlVSGtGUm15Z2x1ZV90ZXN0 | RkgzZm0xNlA5MG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:47 | 2024-11-07 13:38:45 | -| ... | ... | ... | ... | ... | | | ... | ... | ... | ... | ... | +| query_name | query | expected_status | status | check_status | error | result | duration (s) | Start Time | End Time | +|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|---------|--------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|----------------------|---------------------| +| 1 | describe myglue_test.default.http_logs | SUCCESS | SUCCESS | TRUE | | {'status': 'SUCCESS', 'schema': [{...}, ...], 'datarows': [[...], ...], 'total': 31, 'size': 31} | 37.51 | 2024-11-07 13:34:10 | 2024-11-07 13:34:47 | +| 2 | source = myglue_test.default.http_logs \| dedup status CONSECUTIVE=true | SUCCESS | FAILED | FALSE | {"Message":"Fail to run query. Cause: Consecutive deduplication is not supported"} | | 39.53 | 2024-11-07 13:34:10 | 2024-11-07 13:34:49 | +| 3 | source = myglue_test.default.http_logs \| eval res = json_keys(json('{"account_number":1,"balance":39225,"age":32,"gender":"M"}')) \| head 1 \| fields res | SUCCESS | SUCCESS | TRUE | | {'status': 'SUCCESS', 'schema': [{'name': 'res', 'type': 'array'}], 'datarows': [[['account_number', 'balance', 'age', 'gender']]], 'total': 1, 'size': 1} | 12.77 | 2024-11-07 13:34:47 | 2024-11-07 13:38:45 | +| ... | ... | ... | ... | ... | | | ... | ... | ... | #### JSON Report @@ -103,7 +108,7 @@ An example of JSON report: "detailed_results": [ { "query_name": 1, - "query": "source = myglue_test.default.http_logs | stats avg(size)", + "query": "source = dev.default.http_logs | stats avg(size)", "query_id": "eFZmTlpTa3EyTW15Z2x1ZV90ZXN0", "session_id": "bFJDMWxzb2NVUm15Z2x1ZV90ZXN0", "status": "SUCCESS", @@ -130,7 +135,7 @@ An example of JSON report: }, { "query_name": 2, - "query": "source = myglue_test.default.http_logs | eval res = json_keys(json(\u2018{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}')) | head 1 | fields res", + "query": "source = def.default.http_logs | eval res = json_keys(json(\u2018{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}')) | head 1 | fields res", "query_id": "bjF4Y1VnbXdFYm15Z2x1ZV90ZXN0", "session_id": "c3pvU1V6OW8xM215Z2x1ZV90ZXN0", "status": "FAILED", @@ -142,7 +147,7 @@ An example of JSON report: }, { "query_name": 2, - "query": "source = myglue_test.default.http_logs | eval col1 = size, col2 = clientip | stats avg(col1) by col2", + "query": "source = dev.default.http_logs | eval col1 = size, col2 = clientip | stats avg(col1) by col2", "query_id": "azVyMFFORnBFRW15Z2x1ZV90ZXN0", "session_id": "VWF0SEtrNWM3bm15Z2x1ZV90ZXN0", "status": "TIMEOUT", diff --git a/integ-test/script/SanityTest.py b/integ-test/script/SanityTest.py index eb97752b4..d7944d314 100644 --- a/integ-test/script/SanityTest.py +++ b/integ-test/script/SanityTest.py @@ -3,6 +3,7 @@ SPDX-License-Identifier: Apache-2.0 """ +import glob import signal import sys import requests @@ -11,18 +12,18 @@ import time import logging from datetime import datetime -import pandas as pd import argparse from requests.auth import HTTPBasicAuth -from concurrent.futures import ThreadPoolExecutor, as_completed +from pyspark.sql import SparkSession import threading +import pandas as pd """ Environment: python3 Example to use this script: -python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --datasource ${DATASOURCE_NAME} --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600 +python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --opensearch_url ${OPENSEARCH_URL} --input-csv test_queries.csv --output-file test_report The input file test_queries.csv should contain column: `query` @@ -33,24 +34,19 @@ """ class FlintTester: - def __init__(self, base_url, username, password, datasource, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level): - self.base_url = base_url + def __init__(self, spark_url, username, password, opensearch_url, output_file, start_row, end_row, log_level): + self.spark_url = spark_url self.auth = HTTPBasicAuth(username, password) - self.datasource = datasource - self.headers = { 'Content-Type': 'application/json' } - self.max_workers = max_workers - self.check_interval = check_interval - self.timeout = timeout + self.opensearch_url = opensearch_url self.output_file = output_file self.start = start_row - 1 if start_row else None self.end = end_row - 1 if end_row else None self.log_level = log_level - self.max_attempts = (int)(timeout / check_interval) self.logger = self._setup_logger() - self.executor = ThreadPoolExecutor(max_workers=self.max_workers) - self.thread_local = threading.local() self.test_results = [] + self.spark_client = SparkSession.builder.remote(spark_url).appName("integ-test").getOrCreate() + def _setup_logger(self): logger = logging.getLogger('FlintTester') logger.setLevel(self.log_level) @@ -72,126 +68,80 @@ def _setup_logger(self): return logger + # Create the indices needed for the tests + def create_indices(self): + self.logger.info("Creating indices") - def get_session_id(self): - if not hasattr(self.thread_local, 'session_id'): - self.thread_local.session_id = "empty_session_id" - self.logger.debug(f"get session id {self.thread_local.session_id}") - return self.thread_local.session_id + json_files = glob.glob('data/*.json') + mapping_files = [f for f in json_files if f.endswith('.mapping.json')] + data_files = [f for f in json_files if not f.endswith('.mapping.json')] + existing_indices = set() - def set_session_id(self, session_id): - """Reuse the session id for the same thread""" - self.logger.debug(f"set session id {session_id}") - self.thread_local.session_id = session_id + for mapping_file in mapping_files: + index_name = mapping_file[5 : mapping_file.index('.')] - # Call submit API to submit the query - def submit_query(self, query, session_id="Empty"): - url = f"{self.base_url}/_plugins/_async_query" - payload = { - "datasource": self.datasource, - "lang": "ppl", - "query": query, - "sessionId": session_id - } - self.logger.debug(f"Submit query with payload: {payload}") - response_json = None - try: - response = requests.post(url, auth=self.auth, json=payload, headers=self.headers) - response_json = response.json() - response.raise_for_status() - return response_json - except Exception as e: - return {"error": f"{str(e)}, got response {response_json}"} + self.logger.info(f"Checking if index exists: {index_name}") + response = requests.get(f'{self.opensearch_url}/{index_name}', auth=self.auth) + if response.status_code == 200: + existing_indices.add(index_name) + continue - # Call get API to check the query status - def get_query_result(self, query_id): - url = f"{self.base_url}/_plugins/_async_query/{query_id}" - response_json = None - try: - response = requests.get(url, auth=self.auth) - response_json = response.json() - response.raise_for_status() - return response_json - except Exception as e: - return {"status": "FAILED", "error": f"{str(e)}, got response {response_json}"} + self.logger.info(f"Creating index: {index_name}") - # Call delete API to cancel the query - def cancel_query(self, query_id): - url = f"{self.base_url}/_plugins/_async_query/{query_id}" - response_json = None - try: - response = requests.delete(url, auth=self.auth) - response_json = response.json() - response.raise_for_status() - self.logger.info(f"Cancelled query [{query_id}] with info {response.json()}") - return response_json - except Exception as e: - self.logger.warning(f"Cancel query [{query_id}] error: {str(e)}, got response {response_json}") + file_data = open(mapping_file, 'rb').read() + headers = {'Content-Type': 'application/json'} + + response = requests.put(f'{self.opensearch_url}/{index_name}', auth=self.auth, headers=headers, data=file_data) + if response.status_code != 200: + self.logger.error(f'Failed to create index: {index_name}') + response.raise_for_status() + + for data_file in data_files: + index_name = data_file[5 : data_file.index('.')] + if index_name in existing_indices: + continue + + self.logger.info(f"Populating index: {index_name}") + + file_data = open(data_file, 'rb').read() + headers = {'Content-Type': 'application/x-ndjson'} + + response = requests.post(f'{self.opensearch_url}/{index_name}/_bulk', auth=self.auth, headers=headers, data=file_data) + if response.status_code != 200: + response.raise_for_status() # Run the test and return the result def run_test(self, query, seq_id, expected_status): self.logger.info(f"Starting test: {seq_id}, {query}") start_time = datetime.now() - pre_session_id = self.get_session_id() - submit_result = self.submit_query(query, pre_session_id) - if "error" in submit_result: - self.logger.warning(f"Submit error: {submit_result}") - return { - "query_name": seq_id, - "query": query, - "expected_status": expected_status, - "status": "SUBMIT_FAILED", - "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None, - "error": submit_result["error"], - "duration": 0, - "start_time": start_time, - "end_time": datetime.now() - } - - query_id = submit_result["queryId"] - session_id = submit_result["sessionId"] - self.logger.info(f"Submit return: {submit_result}") - if (session_id != pre_session_id): - self.logger.info(f"Update session id from {pre_session_id} to {session_id}") - self.set_session_id(session_id) - - test_result = self.check_query_status(query_id) + + query_str = query.replace('\n', ' ') + status = None + result = None + error_str = None + try: + result = self.spark_client.sql(query_str) + status = 'SUCCESS' + except Exception as e: + status = 'FAILED' + error_str = str(e) + end_time = datetime.now() duration = (end_time - start_time).total_seconds() return { "query_name": seq_id, "query": query, - "query_id": query_id, - "session_id": session_id, "expected_status": expected_status, - "status": test_result["status"], - "check_status": test_result["status"] == expected_status if expected_status else None, - "error": test_result.get("error", ""), - "result": test_result if test_result["status"] == "SUCCESS" else None, + "status": status, + "check_status": status == expected_status if expected_status else None, + "error": error_str if error_str else None, + "result": result, "duration": duration, "start_time": start_time, "end_time": end_time } - # Check the status of the query periodically until it is completed or failed or exceeded the timeout - def check_query_status(self, query_id): - query_id = query_id - - for attempt in range(self.max_attempts): - time.sleep(self.check_interval) - result = self.get_query_result(query_id) - - if result["status"] == "FAILED" or result["status"] == "SUCCESS": - return result - - # Cancel the query if it exceeds the timeout - self.cancel_query(query_id) - return { - "status": "TIMEOUT", - "error": "Query execution exceeded " + str(self.timeout) + " seconds with last status: " + result["status"], - } - def run_tests_from_csv(self, csv_file): with open(csv_file, 'r') as f: reader = csv.DictReader(f) @@ -200,20 +150,15 @@ def run_tests_from_csv(self, csv_file): # Filtering queries based on start and end queries = queries[self.start:self.end] - # Parallel execution - futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries] - for future in as_completed(futures): - result = future.result() - self.logger.info(f"Completed test: {result["query_name"]}, {result["query"]}, got result status: {result["status"]}") - self.test_results.append(result) + self.test_results = [] + for query in queries: + self.test_results.append(self.run_test(query[0], query[1], query[2])) def generate_report(self): self.logger.info("Generating report...") total_queries = len(self.test_results) successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS') failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED') - submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED') - timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT') # Create report report = { @@ -221,8 +166,6 @@ def generate_report(self): "total_queries": total_queries, "successful_queries": successful_queries, "failed_queries": failed_queries, - "submit_failed_queries": submit_failed_queries, - "timeout_queries": timeout_queries, "execution_time": sum(r['duration'] for r in self.test_results) }, "detailed_results": self.test_results @@ -249,15 +192,12 @@ def signal_handler(sig, frame, tester): def main(): # Parse command line arguments parser = argparse.ArgumentParser(description="Run tests from a CSV file and generate a report.") - parser.add_argument("--base-url", required=True, help="Base URL of the service") + parser.add_argument("--spark-url", required=True, help="URL of the Spark service") parser.add_argument("--username", required=True, help="Username for authentication") parser.add_argument("--password", required=True, help="Password for authentication") - parser.add_argument("--datasource", required=True, help="Datasource name") + parser.add_argument("--opensearch-url", required=True, help="URL of the OpenSearch service") parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries") parser.add_argument("--output-file", required=True, help="Path to the output report file") - parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)") - parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)") - parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)") parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1") parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included") parser.add_argument("--log-level", default="INFO", help="optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO)") @@ -265,13 +205,10 @@ def main(): args = parser.parse_args() tester = FlintTester( - base_url=args.base_url, + spark_url=args.spark_url, username=args.username, password=args.password, - datasource=args.datasource, - max_workers=args.max_workers, - check_interval=args.check_interval, - timeout=args.timeout, + opensearch_url=args.opensearch_url, output_file=args.output_file, start_row=args.start_row, end_row=args.end_row, @@ -282,6 +219,9 @@ def main(): signal.signal(signal.SIGINT, lambda sig, frame: signal_handler(sig, frame, tester)) signal.signal(signal.SIGTERM, lambda sig, frame: signal_handler(sig, frame, tester)) + # Create indices + tester.create_indices() + # Running tests tester.run_tests_from_csv(args.input_csv) diff --git a/integ-test/script/data/customer.mapping.json b/integ-test/script/data/customer.mapping.json new file mode 100644 index 000000000..a98d473a2 --- /dev/null +++ b/integ-test/script/data/customer.mapping.json @@ -0,0 +1,30 @@ +{ + "mappings": { + "properties": { + "c_custkey": { + "type": "integer" + }, + "c_name": { + "type": "text" + }, + "c_address": { + "type": "text" + }, + "c_nationkey": { + "type": "integer" + }, + "c_phone": { + "type": "text" + }, + "c_acctbal": { + "type": "double" + }, + "c_mktsegment": { + "type": "text" + }, + "c_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/http_logs.json b/integ-test/script/data/http_logs.json new file mode 100644 index 000000000..ff2aa2fca --- /dev/null +++ b/integ-test/script/data/http_logs.json @@ -0,0 +1,12 @@ +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696154400000, "year": 2023, "month": 10, "day": 1, "clientip": "40.135.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736} +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696154700000, "year": 2023, "month": 10, "day": 1, "clientip": "232.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736} +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696155000000, "year": 2023, "month": 10, "day": 1, "clientip": "26.1.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736} +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696155300000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /french/splash_inet.html HTTP/1.0", "status": 200, "size": 3781} +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696155600000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /images/hm_nbg.jpg HTTP/1.0", "status": 304, "size": 0} +{"index": {"_index": "http_logs"}} +{"@timestamp": 1696155900000, "year": 2023, "month": 10, "day": 1, "clientip": "252.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736} diff --git a/integ-test/script/data/http_logs.mapping.json b/integ-test/script/data/http_logs.mapping.json new file mode 100644 index 000000000..b944fbd4b --- /dev/null +++ b/integ-test/script/data/http_logs.mapping.json @@ -0,0 +1,30 @@ +{ + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "year": { + "type": "integer" + }, + "month": { + "type": "integer" + }, + "day": { + "type": "integer" + }, + "clientip": { + "type": "keyword" + }, + "request": { + "type": "text" + }, + "status": { + "type": "integer" + }, + "size": { + "type": "integer" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/lineitem.mapping.json b/integ-test/script/data/lineitem.mapping.json new file mode 100644 index 000000000..2fb1cdb40 --- /dev/null +++ b/integ-test/script/data/lineitem.mapping.json @@ -0,0 +1,54 @@ +{ + "mappings": { + "properties": { + "l_orderkey": { + "type": "integer" + }, + "l_partkey": { + "type": "text" + }, + "l_suppkey": { + "type": "integer" + }, + "l_linenumber": { + "type": "integer" + }, + "l_quantity": { + "type": "double" + }, + "l_extendedprice": { + "type": "double" + }, + "l_discount": { + "type": "double" + }, + "l_tax": { + "type": "double" + }, + "l_returnflag": { + "type": "text" + }, + "l_linestatus": { + "type": "text" + }, + "l_shipdate": { + "type": "date" + }, + "l_commitdate": { + "type": "date" + }, + "l_receiptdate": { + "type": "date" + }, + "l_shipinstruct": { + "type": "text" + }, + "l_shipmode": { + "type": "text" + }, + "l_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/nation.mapping.json b/integ-test/script/data/nation.mapping.json new file mode 100644 index 000000000..d0e82e559 --- /dev/null +++ b/integ-test/script/data/nation.mapping.json @@ -0,0 +1,18 @@ +{ + "mappings": { + "properties": { + "n_nationkey": { + "type": "integer" + }, + "n_name": { + "type": "text" + }, + "n_regionkey": { + "type": "integer" + }, + "n_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/nested.json b/integ-test/script/data/nested.json new file mode 100644 index 000000000..eb8af683b --- /dev/null +++ b/integ-test/script/data/nested.json @@ -0,0 +1,10 @@ +{"index": {"_index": "nested"}} +{"int_col": 30, "struct_col": {"field1": {"subfield": "value1"}, "field2": 123}, "struct_col2": {"field1": {"subfield": "valueA"}, "field2": 23}} +{"index": {"_index": "nested"}} +{"int_col": 40, "struct_col": {"field1": {"subfield": "value5"}, "field2": 123}, "struct_col2": {"field1": {"subfield": "valueB"}, "field2": 33}} +{"index": {"_index": "nested"}} +{"int_col": 30, "struct_col": {"field1": {"subfield": "value4"}, "field2": 823}, "struct_col2": {"field1": {"subfield": "valueC"}, "field2": 83}} +{"index": {"_index": "nested"}} +{"int_col": 40, "struct_col": {"field1": {"subfield": "value2"}, "field2": 456}, "struct_col2": {"field1": {"subfield": "valueD"}, "field2": 46}} +{"index": {"_index": "nested"}} +{"int_col": 50, "struct_col": {"field1": {"subfield": "value3"}, "field2": 789}, "struct_col2": {"field1": {"subfield": "valueE"}, "field2": 89}} diff --git a/integ-test/script/data/nested.mapping.json b/integ-test/script/data/nested.mapping.json new file mode 100644 index 000000000..1aa189415 --- /dev/null +++ b/integ-test/script/data/nested.mapping.json @@ -0,0 +1,37 @@ +{ + "mappings": { + "properties": { + "int_col": { + "type": "integer" + }, + "struct_col": { + "properties": { + "field1": { + "properties": { + "subfield": { + "type": "text" + } + } + }, + "field2": { + "type": "integer" + } + } + }, + "struct_col2": { + "properties": { + "field1": { + "properties": { + "subfield": { + "type": "text" + } + } + }, + "field2": { + "type": "integer" + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/orders.mapping.json b/integ-test/script/data/orders.mapping.json new file mode 100644 index 000000000..59b3cecdd --- /dev/null +++ b/integ-test/script/data/orders.mapping.json @@ -0,0 +1,33 @@ +{ + "mappings": { + "properties": { + "o_orderkey": { + "type": "integer" + }, + "o_custkey": { + "type": "integer" + }, + "o_orderstatus": { + "type": "text" + }, + "o_totalprice": { + "type": "double" + }, + "o_orderdate": { + "type": "date" + }, + "o_orderpriority": { + "type": "text" + }, + "o_clerk": { + "type": "text" + }, + "o_shippriority": { + "type": "integer" + }, + "o_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/part.mapping.json b/integ-test/script/data/part.mapping.json new file mode 100644 index 000000000..8be7e9aa0 --- /dev/null +++ b/integ-test/script/data/part.mapping.json @@ -0,0 +1,33 @@ +{ + "mappings": { + "properties": { + "p_partkey": { + "type": "integer" + }, + "p_name": { + "type": "text" + }, + "p_mfgr": { + "type": "text" + }, + "p_brand": { + "type": "text" + }, + "p_type": { + "type": "text" + }, + "p_size": { + "type": "integer" + }, + "p_container": { + "type": "text" + }, + "p_retailprice": { + "type": "double" + }, + "p_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/partsupp.mapping.json b/integ-test/script/data/partsupp.mapping.json new file mode 100644 index 000000000..13509ad46 --- /dev/null +++ b/integ-test/script/data/partsupp.mapping.json @@ -0,0 +1,21 @@ +{ + "mappings": { + "properties": { + "ps_partkey": { + "type": "integer" + }, + "ps_suppkey": { + "type": "integer" + }, + "ps_availqty": { + "type": "integer" + }, + "ps_supplycost": { + "type": "double" + }, + "ps_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/people.json b/integ-test/script/data/people.json new file mode 100644 index 000000000..4563a2c4b --- /dev/null +++ b/integ-test/script/data/people.json @@ -0,0 +1,12 @@ +{"index": {"_index": "people"}} +{"@timestamp": 1718458823000, "id": 1000, "name": "Jake", "occupation": "Engineer", "country": "England", "salary": 100000} +{"index": {"_index": "people"}} +{"@timestamp": 1718458833000, "id": 1001, "name": "Hello", "occupation": "Artist", "country": "USA", "salary": 70000} +{"index": {"_index": "people"}} +{"@timestamp": 1718458843000, "id": 1002, "name": "John", "occupation": "Doctor", "country": "Canada", "salary": 120000} +{"index": {"_index": "people"}} +{"@timestamp": 1718458853000, "id": 1003, "name": "David", "occupation": "Doctor", "country": null, "salary": 120000} +{"index": {"_index": "people"}} +{"@timestamp": 1718458863000, "id": 1004, "name": "David", "occupation": null, "country": "Canada", "salary": 0} +{"index": {"_index": "people"}} +{"@timestamp": 1718458873000, "id": 1005, "name": "Jane", "occupation": "Scientist", "country": "Canada", "salary": 90000} diff --git a/integ-test/script/data/people.mapping.json b/integ-test/script/data/people.mapping.json new file mode 100644 index 000000000..b5dde8ff6 --- /dev/null +++ b/integ-test/script/data/people.mapping.json @@ -0,0 +1,24 @@ +{ + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "id": { + "type": "integer" + }, + "name": { + "type": "text" + }, + "occupation": { + "type": "text" + }, + "country": { + "type": "text" + }, + "salary": { + "type": "integer" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/region.mapping.json b/integ-test/script/data/region.mapping.json new file mode 100644 index 000000000..3dddbc580 --- /dev/null +++ b/integ-test/script/data/region.mapping.json @@ -0,0 +1,15 @@ +{ + "mappings": { + "properties": { + "r_regionkey": { + "type": "integer" + }, + "r_name": { + "type": "text" + }, + "r_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/supplier.mapping.json b/integ-test/script/data/supplier.mapping.json new file mode 100644 index 000000000..bdcb933b6 --- /dev/null +++ b/integ-test/script/data/supplier.mapping.json @@ -0,0 +1,27 @@ +{ + "mappings": { + "properties": { + "s_suppkey": { + "type": "integer" + }, + "s_name": { + "type": "text" + }, + "s_address": { + "type": "text" + }, + "s_nationkey": { + "type": "integer" + }, + "s_phone": { + "type": "text" + }, + "s_acctbal": { + "type": "double" + }, + "s_comment": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/data/work_info.json b/integ-test/script/data/work_info.json new file mode 100644 index 000000000..64802bdad --- /dev/null +++ b/integ-test/script/data/work_info.json @@ -0,0 +1,10 @@ +{"index": {"_index": "work_info"}} +{"uid": 1000, "name": "Jake", "department": "IT", "occupation": "Engineer"} +{"index": {"_index": "work_info"}} +{"uid": 1002, "name": "John", "department": "DATA", "occupation": "Scientist"} +{"index": {"_index": "work_info"}} +{"uid": 1003, "name": "David", "department": "HR", "occupation": "Doctor"} +{"index": {"_index": "work_info"}} +{"uid": 1005, "name": "Jane", "department": "DATA", "occupation": "Engineer"} +{"index": {"_index": "work_info"}} +{"uid": 1006, "name": "Tom", "department": "SALES", "occupation": "Artist"} diff --git a/integ-test/script/data/work_info.mapping.json b/integ-test/script/data/work_info.mapping.json new file mode 100644 index 000000000..3fb5e2c28 --- /dev/null +++ b/integ-test/script/data/work_info.mapping.json @@ -0,0 +1,18 @@ +{ + "mappings": { + "properties": { + "uid": { + "type": "integer" + }, + "name": { + "type": "text" + }, + "department": { + "type": "text" + }, + "occupation": { + "type": "text" + } + } + } +} \ No newline at end of file diff --git a/integ-test/script/test_cases.csv b/integ-test/script/test_cases.csv index 7df05f5a3..91500efea 100644 --- a/integ-test/script/test_cases.csv +++ b/integ-test/script/test_cases.csv @@ -1,116 +1,116 @@ query,expected_status -describe myglue_test.default.http_logs,FAILED -describe `myglue_test`.`default`.`http_logs`,FAILED -"source = myglue_test.default.http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10",SUCCESS -"source = myglue_test.default.http_logs | dedup status, size | head 10",SUCCESS -source = myglue_test.default.http_logs | dedup 1 status keepempty=true | head 10,SUCCESS -"source = myglue_test.default.http_logs | dedup status, size keepempty=true | head 10",SUCCESS -source = myglue_test.default.http_logs | dedup 2 status | head 10,SUCCESS -"source = myglue_test.default.http_logs | dedup 2 status, size | head 10",SUCCESS -"source = myglue_test.default.http_logs | dedup 2 status, size keepempty=true | head 10",SUCCESS -source = myglue_test.default.http_logs | dedup status CONSECUTIVE=true | fields status,FAILED -"source = myglue_test.default.http_logs | dedup 2 status, size CONSECUTIVE=true | fields status",FAILED -"source = myglue_test.default.http_logs | sort stat | fields @timestamp, clientip, status | head 10",SUCCESS -"source = myglue_test.default.http_logs | fields @timestamp, notexisted | head 10",FAILED -"source = myglue_test.default.nested | fields int_col, struct_col.field1, struct_col2.field1 | head 10",FAILED -"source = myglue_test.default.nested | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield",FAILED -"source = myglue_test.default.http_logs | fields - @timestamp, clientip, status | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval new_time = @timestamp, new_clientip = clientip | fields - new_time, new_clientip, status | head 10",SUCCESS -source = myglue_test.default.http_logs | eval new_clientip = lower(clientip) | fields - new_clientip | head 10,SUCCESS -"source = myglue_test.default.http_logs | fields + @timestamp, clientip, status | fields - clientip, status | head 10",SUCCESS -"source = myglue_test.default.http_logs | fields - clientip, status | fields + @timestamp, clientip, status| head 10",SUCCESS -source = myglue_test.default.http_logs | where status = 200 | head 10,SUCCESS -source = myglue_test.default.http_logs | where status != 200 | head 10,SUCCESS -source = myglue_test.default.http_logs | where size > 0 | head 10,SUCCESS -source = myglue_test.default.http_logs | where size <= 0 | head 10,SUCCESS -source = myglue_test.default.http_logs | where clientip = '236.14.2.0' | head 10,SUCCESS -source = myglue_test.default.http_logs | where size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS -"source = myglue_test.default.http_logs | where size <= 0 AND like(request, 'GET%') | head 10",SUCCESS -source = myglue_test.default.http_logs status = 200 | head 10,SUCCESS -source = myglue_test.default.http_logs size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS -"source = myglue_test.default.http_logs size <= 0 AND like(request, 'GET%') | head 10",SUCCESS -"source = myglue_test.default.http_logs substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS -source = myglue_test.default.http_logs | where isempty(size),FAILED -source = myglue_test.default.http_logs | where ispresent(size),FAILED -source = myglue_test.default.http_logs | where isnull(size) | head 10,SUCCESS -source = myglue_test.default.http_logs | where isnotnull(size) | head 10,SUCCESS -"source = myglue_test.default.http_logs | where isnotnull(coalesce(size, status)) | head 10",FAILED -"source = myglue_test.default.http_logs | where like(request, 'GET%') | head 10",SUCCESS -"source = myglue_test.default.http_logs | where like(request, '%bordeaux%') | head 10",SUCCESS -"source = myglue_test.default.http_logs | where substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS -"source = myglue_test.default.http_logs | where lower(request) = ""get /images/backnews.gif http/1.0"" | head 10",SUCCESS -source = myglue_test.default.http_logs | where length(request) = 38 | head 10,SUCCESS -"source = myglue_test.default.http_logs | where case(status = 200, 'success' else 'failed') = 'success' | head 10",FAILED -"source = myglue_test.default.http_logs | eval h = ""Hello"", w = ""World"" | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval @h = ""Hello"" | eval @w = ""World"" | fields @timestamp, @h, @w",SUCCESS -source = myglue_test.default.http_logs | eval newF = clientip | head 10,SUCCESS -"source = myglue_test.default.http_logs | eval newF = clientip | fields clientip, newF | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval f = size | where f > 1 | sort f | fields size, clientip, status | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval f = status * 2 | eval h = f * 2 | fields status, f, h | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval f = size * 2, h = status | stats sum(f) by h",SUCCESS -"source = myglue_test.default.http_logs | eval f = UPPER(request) | eval h = 40 | fields f, h | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval request = ""test"" | fields request | head 10",FAILED -source = myglue_test.default.http_logs | eval size = abs(size) | where size < 500,FAILED -"source = myglue_test.default.http_logs | eval status_string = case(status = 200, 'success' else 'failed') | head 10",FAILED -"source = myglue_test.default.http_logs | eval n = now() | eval t = unix_timestamp(@timestamp) | fields n, t | head 10",SUCCESS -source = myglue_test.default.http_logs | eval e = isempty(size) | eval p = ispresent(size) | head 10,FAILED -"source = myglue_test.default.http_logs | eval c = coalesce(size, status) | head 10",FAILED -source = myglue_test.default.http_logs | eval c = coalesce(request) | head 10,FAILED -source = myglue_test.default.http_logs | eval col1 = ln(size) | eval col2 = unix_timestamp(@timestamp) | sort - col1 | head 10,SUCCESS -"source = myglue_test.default.http_logs | eval col1 = 1 | sort col1 | head 4 | eval col2 = 2 | sort - col2 | sort - size | head 2 | fields @timestamp, clientip, col2",SUCCESS -"source = myglue_test.default.mini_http_logs | eval stat = status | where stat > 300 | sort stat | fields @timestamp,clientip,status | head 5",SUCCESS -"source = myglue_test.default.http_logs | eval col1 = size, col2 = clientip | stats avg(col1) by col2",SUCCESS -source = myglue_test.default.http_logs | stats avg(size) by clientip,SUCCESS -"source = myglue_test.default.http_logs | eval new_request = upper(request) | eval compound_field = concat('Hello ', if(like(new_request, '%bordeaux%'), 'World', clientip)) | fields new_request, compound_field | head 10",SUCCESS -source = myglue_test.default.http_logs | stats avg(size),SUCCESS -source = myglue_test.default.nested | stats max(int_col) by struct_col.field2,SUCCESS -source = myglue_test.default.nested | stats distinct_count(int_col),SUCCESS -source = myglue_test.default.nested | stats stddev_samp(int_col),SUCCESS -source = myglue_test.default.nested | stats stddev_pop(int_col),SUCCESS -source = myglue_test.default.nested | stats percentile(int_col),SUCCESS -source = myglue_test.default.nested | stats percentile_approx(int_col),SUCCESS -source = myglue_test.default.mini_http_logs | stats stddev_samp(status),SUCCESS -"source = myglue_test.default.mini_http_logs | where stats > 200 | stats percentile_approx(status, 99)",SUCCESS -"source = myglue_test.default.nested | stats count(int_col) by span(struct_col.field2, 10) as a_span",SUCCESS -"source = myglue_test.default.nested | stats avg(int_col) by span(struct_col.field2, 10) as a_span, struct_col2.field2",SUCCESS -"source = myglue_test.default.http_logs | stats sum(size) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS -"source = myglue_test.default.http_logs | stats distinct_count(clientip) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS -"source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year | stats avg(avg_size) as avg_avg_size by year",SUCCESS -"source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year, month | stats avg(avg_size) as avg_avg_size by year, month | stats avg(avg_avg_size) as avg_avg_avg_size by year",SUCCESS -"source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | stats avg(avg_int) as avg_avg_int by struct_col2.field2",FAILED -"source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | eval new_col = avg_int | stats avg(avg_int) as avg_avg_int by new_col",SUCCESS -source = myglue_test.default.nested | rare int_col,SUCCESS -source = myglue_test.default.nested | rare int_col by struct_col.field2,SUCCESS -source = myglue_test.default.http_logs | rare request,SUCCESS -source = myglue_test.default.http_logs | where status > 300 | rare request by status,SUCCESS -source = myglue_test.default.http_logs | rare clientip,SUCCESS -source = myglue_test.default.http_logs | where status > 300 | rare clientip,SUCCESS -source = myglue_test.default.http_logs | where status > 300 | rare clientip by day,SUCCESS -source = myglue_test.default.nested | top int_col by struct_col.field2,SUCCESS -source = myglue_test.default.nested | top 1 int_col by struct_col.field2,SUCCESS -source = myglue_test.default.nested | top 2 int_col by struct_col.field2,SUCCESS -source = myglue_test.default.nested | top int_col,SUCCESS -source = myglue_test.default.http_logs | inner join left=l right=r on l.status = r.int_col myglue_test.default.nested | head 10,FAILED -"source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | fields request, domain | head 10",SUCCESS -source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | top 1 domain,SUCCESS -source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | stats count() by domain,SUCCESS -"source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | eval a = 1 | fields a, domain | head 10",SUCCESS -"source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | where size > 0 | sort - size | fields size, domain | head 10",SUCCESS -"source = myglue_test.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/(?[a-zA-Z]+)/.*' | where domain = 'english' | sort - picName | fields domain, picName | head 10",SUCCESS -source = myglue_test.default.http_logs | patterns request | fields patterns_field | head 10,SUCCESS -source = myglue_test.default.http_logs | patterns request | where size > 0 | fields patterns_field | head 10,SUCCESS -"source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | fields request, no_letter | head 10",SUCCESS -source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | stats count() by no_letter,SUCCESS -"source = myglue_test.default.http_logs | patterns new_field='status' pattern='[a-zA-Z]' request | fields request, status | head 10",FAILED -source = myglue_test.default.http_logs | rename @timestamp as timestamp | head 10,FAILED -source = myglue_test.default.http_logs | sort size | head 10,SUCCESS -source = myglue_test.default.http_logs | sort + size | head 10,SUCCESS -source = myglue_test.default.http_logs | sort - size | head 10,SUCCESS -"source = myglue_test.default.http_logs | sort + size, + @timestamp | head 10",SUCCESS -"source = myglue_test.default.http_logs | sort - size, - @timestamp | head 10",SUCCESS -"source = myglue_test.default.http_logs | sort - size, @timestamp | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval c1 = upper(request) | eval c2 = concat('Hello ', if(like(c1, '%bordeaux%'), 'World', clientip)) | eval c3 = length(request) | eval c4 = ltrim(request) | eval c5 = rtrim(request) | eval c6 = substring(clientip, 5, 2) | eval c7 = trim(request) | eval c8 = upper(request) | eval c9 = position('bordeaux' IN request) | eval c10 = replace(request, 'GET', 'GGG') | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval c1 = unix_timestamp(@timestamp) | eval c2 = now() | eval c3 = +describe dev.default.http_logs,FAILED +describe `dev`.`default`.`http_logs`,FAILED +"source = dev.default.http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10",SUCCESS +"source = dev.default.http_logs | dedup status, size | head 10",SUCCESS +source = dev.default.http_logs | dedup 1 status keepempty=true | head 10,SUCCESS +"source = dev.default.http_logs | dedup status, size keepempty=true | head 10",SUCCESS +source = dev.default.http_logs | dedup 2 status | head 10,SUCCESS +"source = dev.default.http_logs | dedup 2 status, size | head 10",SUCCESS +"source = dev.default.http_logs | dedup 2 status, size keepempty=true | head 10",SUCCESS +source = dev.default.http_logs | dedup status CONSECUTIVE=true | fields status,FAILED +"source = dev.default.http_logs | dedup 2 status, size CONSECUTIVE=true | fields status",FAILED +"source = dev.default.http_logs | sort status | fields @timestamp, clientip, status | head 10",SUCCESS +"source = dev.default.http_logs | fields @timestamp, notexisted | head 10",FAILED +"source = dev.default.nested | fields int_col, struct_col.field1, struct_col2.field1 | head 10",SUCCESS +"source = dev.default.nested | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield",SUCCESS +"source = dev.default.http_logs | fields - @timestamp, clientip, status | head 10",SUCCESS +"source = dev.default.http_logs | eval new_time = @timestamp, new_clientip = clientip | fields - new_time, new_clientip, status | head 10",SUCCESS +source = dev.default.http_logs | eval new_clientip = lower(clientip) | fields - new_clientip | head 10,SUCCESS +"source = dev.default.http_logs | fields + @timestamp, clientip, status | fields - clientip, status | head 10",SUCCESS +"source = dev.default.http_logs | fields - clientip, status | fields + @timestamp, clientip, status| head 10",SUCCESS +source = dev.default.http_logs | where status = 200 | head 10,SUCCESS +source = dev.default.http_logs | where status != 200 | head 10,SUCCESS +source = dev.default.http_logs | where size > 0 | head 10,SUCCESS +source = dev.default.http_logs | where size <= 0 | head 10,SUCCESS +source = dev.default.http_logs | where clientip = '236.14.2.0' | head 10,SUCCESS +source = dev.default.http_logs | where size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS +"source = dev.default.http_logs | where size <= 0 AND like(request, 'GET%') | head 10",SUCCESS +source = dev.default.http_logs status = 200 | head 10,SUCCESS +source = dev.default.http_logs size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS +"source = dev.default.http_logs size <= 0 AND like(request, 'GET%') | head 10",SUCCESS +"source = dev.default.http_logs substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS +source = dev.default.http_logs | where isempty(size),SUCCESS +source = dev.default.http_logs | where ispresent(size),SUCCESS +source = dev.default.http_logs | where isnull(size) | head 10,SUCCESS +source = dev.default.http_logs | where isnotnull(size) | head 10,SUCCESS +"source = dev.default.http_logs | where isnotnull(coalesce(size, status)) | head 10",SUCCESS +"source = dev.default.http_logs | where like(request, 'GET%') | head 10",SUCCESS +"source = dev.default.http_logs | where like(request, '%bordeaux%') | head 10",SUCCESS +"source = dev.default.http_logs | where substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS +"source = dev.default.http_logs | where lower(request) = ""get /images/backnews.gif http/1.0"" | head 10",SUCCESS +source = dev.default.http_logs | where length(request) = 38 | head 10,SUCCESS +"source = dev.default.http_logs | where case(status = 200, 'success' else 'failed') = 'success' | head 10",SUCCESS +"source = dev.default.http_logs | eval h = ""Hello"", w = ""World"" | head 10",SUCCESS +"source = dev.default.http_logs | eval @h = ""Hello"" | eval @w = ""World"" | fields @timestamp, @h, @w",SUCCESS +source = dev.default.http_logs | eval newF = clientip | head 10,SUCCESS +"source = dev.default.http_logs | eval newF = clientip | fields clientip, newF | head 10",SUCCESS +"source = dev.default.http_logs | eval f = size | where f > 1 | sort f | fields size, clientip, status | head 10",SUCCESS +"source = dev.default.http_logs | eval f = status * 2 | eval h = f * 2 | fields status, f, h | head 10",SUCCESS +"source = dev.default.http_logs | eval f = size * 2, h = status | stats sum(f) by h",SUCCESS +"source = dev.default.http_logs | eval f = UPPER(request) | eval h = 40 | fields f, h | head 10",SUCCESS +"source = dev.default.http_logs | eval request = ""test"" | fields request | head 10",FAILED +source = dev.default.http_logs | eval size = abs(size) | where size < 500,FAILED +"source = dev.default.http_logs | eval status_string = case(status = 200, 'success' else 'failed') | head 10",SUCCESS +"source = dev.default.http_logs | eval n = now() | eval t = unix_timestamp(@timestamp) | fields n, t | head 10",SUCCESS +source = dev.default.http_logs | eval e = isempty(size) | eval p = ispresent(size) | head 10,SUCCESS +"source = dev.default.http_logs | eval c = coalesce(size, status) | head 10",SUCCESS +source = dev.default.http_logs | eval c = coalesce(request) | head 10,SUCCESS +source = dev.default.http_logs | eval col1 = ln(size) | eval col2 = unix_timestamp(@timestamp) | sort - col1 | head 10,SUCCESS +"source = dev.default.http_logs | eval col1 = 1 | sort col1 | head 4 | eval col2 = 2 | sort - col2 | sort - size | head 2 | fields @timestamp, clientip, col2",SUCCESS +"source = dev.default.http_logs | eval stat = status | where stat > 300 | sort stat | fields @timestamp,clientip,status | head 5",SUCCESS +"source = dev.default.http_logs | eval col1 = size, col2 = clientip | stats avg(col1) by col2",SUCCESS +source = dev.default.http_logs | stats avg(size) by clientip,SUCCESS +"source = dev.default.http_logs | eval new_request = upper(request) | eval compound_field = concat('Hello ', if(like(new_request, '%bordeaux%'), 'World', clientip)) | fields new_request, compound_field | head 10",SUCCESS +source = dev.default.http_logs | stats avg(size),SUCCESS +source = dev.default.nested | stats max(int_col) by struct_col.field2,SUCCESS +source = dev.default.nested | stats distinct_count(int_col),SUCCESS +source = dev.default.nested | stats stddev_samp(int_col),SUCCESS +source = dev.default.nested | stats stddev_pop(int_col),SUCCESS +"source = dev.default.nested | stats percentile(int_col, 90)",SUCCESS +"source = dev.default.nested | stats percentile_approx(int_col, 99)",SUCCESS +source = dev.default.http_logs | stats stddev_samp(status),SUCCESS +"source = dev.default.http_logs | where status > 200 | stats percentile_approx(status, 99)",SUCCESS +"source = dev.default.nested | stats count(int_col) by span(struct_col.field2, 10) as a_span",SUCCESS +"source = dev.default.nested | stats avg(int_col) by span(struct_col.field2, 10) as a_span, struct_col2.field2",SUCCESS +"source = dev.default.http_logs | stats sum(size) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS +"source = dev.default.http_logs | stats distinct_count(clientip) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS +"source = dev.default.http_logs | stats avg(size) as avg_size by status, year | stats avg(avg_size) as avg_avg_size by year",SUCCESS +"source = dev.default.http_logs | stats avg(size) as avg_size by status, year, month | stats avg(avg_size) as avg_avg_size by year, month | stats avg(avg_avg_size) as avg_avg_avg_size by year",SUCCESS +"source = dev.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | stats avg(avg_int) as avg_avg_int by struct_col2.field2",FAILED +"source = dev.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | eval new_col = avg_int | stats avg(avg_int) as avg_avg_int by new_col",SUCCESS +source = dev.default.nested | rare int_col,SUCCESS +source = dev.default.nested | rare int_col by struct_col.field2,SUCCESS +source = dev.default.http_logs | rare request,SUCCESS +source = dev.default.http_logs | where status > 300 | rare request by status,SUCCESS +source = dev.default.http_logs | rare clientip,SUCCESS +source = dev.default.http_logs | where status > 300 | rare clientip,SUCCESS +source = dev.default.http_logs | where status > 300 | rare clientip by day,SUCCESS +source = dev.default.nested | top int_col by struct_col.field2,SUCCESS +source = dev.default.nested | top 1 int_col by struct_col.field2,SUCCESS +source = dev.default.nested | top 2 int_col by struct_col.field2,SUCCESS +source = dev.default.nested | top int_col,SUCCESS +source = dev.default.http_logs | inner join left=l right=r on l.status = r.int_col dev.default.nested | head 10,SUCCESS +"source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | fields request, domain | head 10",SUCCESS +source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | top 1 domain,SUCCESS +source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | stats count() by domain,SUCCESS +"source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | eval a = 1 | fields a, domain | head 10",SUCCESS +"source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/.*' | where size > 0 | sort - size | fields size, domain | head 10",SUCCESS +"source = dev.default.http_logs | parse request 'GET /(?[a-zA-Z]+)/(?[a-zA-Z]+)/.*' | where domain = 'english' | sort - picName | fields domain, picName | head 10",SUCCESS +source = dev.default.http_logs | patterns request | fields patterns_field | head 10,SUCCESS +source = dev.default.http_logs | patterns request | where size > 0 | fields patterns_field | head 10,SUCCESS +"source = dev.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | fields request, no_letter | head 10",SUCCESS +source = dev.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | stats count() by no_letter,SUCCESS +"source = dev.default.http_logs | patterns new_field='status' pattern='[a-zA-Z]' request | fields request, status | head 10",FAILED +source = dev.default.http_logs | rename @timestamp as timestamp | head 10,SUCCESS +source = dev.default.http_logs | sort size | head 10,SUCCESS +source = dev.default.http_logs | sort + size | head 10,SUCCESS +source = dev.default.http_logs | sort - size | head 10,SUCCESS +"source = dev.default.http_logs | sort + size, + @timestamp | head 10",SUCCESS +"source = dev.default.http_logs | sort - size, - @timestamp | head 10",SUCCESS +"source = dev.default.http_logs | sort - size, @timestamp | head 10",SUCCESS +"source = dev.default.http_logs | eval c1 = upper(request) | eval c2 = concat('Hello ', if(like(c1, '%bordeaux%'), 'World', clientip)) | eval c3 = length(request) | eval c4 = ltrim(request) | eval c5 = rtrim(request) | eval c6 = substring(clientip, 5, 2) | eval c7 = trim(request) | eval c8 = upper(request) | eval c9 = position('bordeaux' IN request) | eval c10 = replace(request, 'GET', 'GGG') | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 | head 10",SUCCESS +"source = dev.default.http_logs | eval c1 = unix_timestamp(@timestamp) | eval c2 = now() | eval c3 = DAY_OF_WEEK(@timestamp) | eval c4 = DAY_OF_MONTH(@timestamp) | eval c5 = DAY_OF_YEAR(@timestamp) | eval c6 = @@ -121,151 +121,151 @@ HOUR_OF_DAY(@timestamp) | eval c10 = MINUTE_OF_HOUR(@timestamp) | eval c11 = SECOND_OF_MINUTE(@timestamp) | eval c12 = LOCALTIME() | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12 | head 10",SUCCESS -"source=myglue_test.default.people | eval c1 = adddate(@timestamp, 1) | fields c1 | head 10",SUCCESS -"source=myglue_test.default.people | eval c2 = subdate(@timestamp, 1) | fields c2 | head 10",SUCCESS -source=myglue_test.default.people | eval c1 = date_add(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS -source=myglue_test.default.people | eval c1 = date_sub(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS -source=myglue_test.default.people | eval `CURDATE()` = CURDATE() | fields `CURDATE()`,SUCCESS -source=myglue_test.default.people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()`,SUCCESS -source=myglue_test.default.people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()`,SUCCESS -source=myglue_test.default.people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')`,SUCCESS -source=myglue_test.default.people | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))`,SUCCESS -source=myglue_test.default.people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')`,SUCCESS -"source=myglue_test.default.people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')`",SUCCESS -"source=myglue_test.default.people | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`", -source=myglue_test.default.people | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))`, -source=myglue_test.default.people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))`,FAILED -source=myglue_test.default.people | eval `CURRENT_TIMEZONE()` = CURRENT_TIMEZONE() | fields `CURRENT_TIMEZONE()`,SUCCESS -source=myglue_test.default.people | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()`,SUCCESS -"source=myglue_test.default.people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))` = TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))`",SUCCESS -"source=myglue_test.default.people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')`",SUCCESS - source = myglue_test.default.http_logs | stats count(),SUCCESS -"source = myglue_test.default.http_logs | stats avg(size) as c1, max(size) as c2, min(size) as c3, sum(size) as c4, percentile(size, 50) as c5, stddev_pop(size) as c6, stddev_samp(size) as c7, distinct_count(size) as c8",SUCCESS -"source = myglue_test.default.http_logs | eval c1 = abs(size) | eval c2 = ceil(size) | eval c3 = floor(size) | eval c4 = sqrt(size) | eval c5 = ln(size) | eval c6 = pow(size, 2) | eval c7 = mod(size, 2) | fields c1, c2, c3, c4, c5, c6, c7 | head 10",SUCCESS -"source = myglue_test.default.http_logs | eval c1 = isnull(request) | eval c2 = isnotnull(request) | eval c3 = ifnull(request, +"source=dev.default.people | eval c1 = adddate(@timestamp, 1) | fields c1 | head 10",SUCCESS +"source=dev.default.people | eval c2 = subdate(@timestamp, 1) | fields c2 | head 10",SUCCESS +source=dev.default.people | eval c1 = date_add(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS +source=dev.default.people | eval c1 = date_sub(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS +source=dev.default.people | eval `CURDATE()` = CURDATE() | fields `CURDATE()`,SUCCESS +source=dev.default.people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()`,SUCCESS +source=dev.default.people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()`,SUCCESS +source=dev.default.people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')`,SUCCESS +source=dev.default.people | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))`,SUCCESS +source=dev.default.people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')`,SUCCESS +"source=dev.default.people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')`",SUCCESS +"source=dev.default.people | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`",SUCCESS +source=dev.default.people | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))`,SUCCESS +source=dev.default.people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))`,FAILED +source=dev.default.people | eval `CURRENT_TIMEZONE()` = CURRENT_TIMEZONE() | fields `CURRENT_TIMEZONE()`,SUCCESS +source=dev.default.people | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()`,SUCCESS +"source=dev.default.people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))` = TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))`",SUCCESS +"source=dev.default.people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')`",SUCCESS + source = dev.default.http_logs | stats count(),SUCCESS +"source = dev.default.http_logs | stats avg(size) as c1, max(size) as c2, min(size) as c3, sum(size) as c4, percentile(size, 50) as c5, stddev_pop(size) as c6, stddev_samp(size) as c7, distinct_count(size) as c8",SUCCESS +"source = dev.default.http_logs | eval c1 = abs(size) | eval c2 = ceil(size) | eval c3 = floor(size) | eval c4 = sqrt(size) | eval c5 = ln(size) | eval c6 = pow(size, 2) | eval c7 = mod(size, 2) | fields c1, c2, c3, c4, c5, c6, c7 | head 10",SUCCESS +"source = dev.default.http_logs | eval c1 = isnull(request) | eval c2 = isnotnull(request) | eval c3 = ifnull(request, ""Unknown"") | eval c4 = nullif(request, ""Unknown"") | eval c5 = isnull(size) | eval c6 = if(like(request, '%bordeaux%'), 'hello', 'world') | fields c1, c2, c3, c4, c5, c6 | head 10",SUCCESS -/* this is block comment */ source = myglue_test.tpch_csv.orders | head 1 // this is line comment,SUCCESS -"/* test in tpch q16, q18, q20 */ source = myglue_test.tpch_csv.orders | head 1 // add source=xx to avoid failure in automation",SUCCESS -"/* test in tpch q4, q21, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS -"/* test in tpch q2, q11, q15, q17, q20, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS -"/* test in tpch q7, q8, q9, q13, q15, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS -/* lots of inner join tests in tpch */ source = myglue_test.tpch_csv.orders | head 1,SUCCESS -/* left join test in tpch q13 */ source = myglue_test.tpch_csv.orders | head 1,SUCCESS -"source = myglue_test.tpch_csv.orders +/* this is block comment */ source = dev.default.orders | head 1 // this is line comment,SUCCESS +"/* test in tpch q16, q18, q20 */ source = dev.default.orders | head 1 // add source=xx to avoid failure in automation",SUCCESS +"/* test in tpch q4, q21, q22 */ source = dev.default.orders | head 1",SUCCESS +"/* test in tpch q2, q11, q15, q17, q20, q22 */ source = dev.default.orders | head 1",SUCCESS +"/* test in tpch q7, q8, q9, q13, q15, q22 */ source = dev.default.orders | head 1",SUCCESS +/* lots of inner join tests in tpch */ source = dev.default.orders | head 1,SUCCESS +/* left join test in tpch q13 */ source = dev.default.orders | head 1,SUCCESS +"source = dev.default.orders | right outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%') - myglue_test.tpch_csv.customer + dev.default.customer | stats count(o_orderkey) as c_count by c_custkey | sort - c_count",SUCCESS -"source = myglue_test.tpch_csv.orders +"source = dev.default.orders | full outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%') - myglue_test.tpch_csv.customer + dev.default.customer | stats count(o_orderkey) as c_count by c_custkey | sort - c_count",SUCCESS -"source = myglue_test.tpch_csv.customer -| semi join ON c_custkey = o_custkey myglue_test.tpch_csv.orders +"source = dev.default.customer +| semi join ON c_custkey = o_custkey dev.default.orders | where c_mktsegment = 'BUILDING' | sort - c_custkey | head 10",SUCCESS -"source = myglue_test.tpch_csv.customer -| anti join ON c_custkey = o_custkey myglue_test.tpch_csv.orders +"source = dev.default.customer +| anti join ON c_custkey = o_custkey dev.default.orders | where c_mktsegment = 'BUILDING' | sort - c_custkey | head 10",SUCCESS -"source = myglue_test.tpch_csv.supplier +"source = dev.default.supplier | where like(s_comment, '%Customer%Complaints%') -| join ON s_nationkey > n_nationkey [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ] +| join ON s_nationkey > n_nationkey [ source = dev.default.nation | where n_name = 'SAUDI ARABIA' ] | sort - s_name | head 10",SUCCESS -"source = myglue_test.tpch_csv.supplier +"source = dev.default.supplier | where like(s_comment, '%Customer%Complaints%') -| join [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ] +| join [ source = dev.default.nation | where n_name = 'SAUDI ARABIA' ] | sort - s_name | head 10",SUCCESS -source=myglue_test.default.people | LOOKUP myglue_test.default.work_info uid AS id REPLACE department | stats distinct_count(department),SUCCESS -source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department | stats distinct_count(department),SUCCESS -source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id REPLACE department AS country | stats distinct_count(country),SUCCESS -source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department AS country | stats distinct_count(country),SUCCESS -"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name REPLACE department | stats distinct_count(department)",SUCCESS -"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS ID, name APPEND department | stats distinct_count(department)",SUCCESS -"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name | head 10",SUCCESS -"source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name REPLACE occupation AS major | stats distinct_count(major)",SUCCESS -"source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name APPEND occupation AS major | stats distinct_count(major)",SUCCESS -"source = myglue_test.default.http_logs | eval res = json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = json('[]') | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json(‘{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json('{""invalid"": ""json""') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json('[1,2,3]') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json(‘[1,2') | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = json('[invalid json]') | head 1 | fields res,SUCCESS -source = myglue_test.default.http_logs | eval res = json('invalid json') | head 1 | fields res,SUCCESS -source = myglue_test.default.http_logs | eval res = json(null) | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 'a', 'string', 'array') | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = json_array() | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array(1, 2, 0, -1, 1.1, -0.11) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 1.1, -0.11, true, false) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = array_length(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = array_length(json_array()) | head 1 | fields res,SUCCESS -source = myglue_test.default.http_logs | eval res = json_array_length('[]') | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array_length('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array_length('{\""key\"": 1}') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_array_length('[1,2') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 'string_value')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 123.45)) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', true)) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object(""a"", 1, ""b"", 2, ""c"", 3)) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array())) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = to_json_string(json_object(""array"", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | where json_valid(('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS -"source = myglue_test.default.http_logs | where not json_valid(('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]')) | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = json_keys(json('[]')) | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json(‘{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('{""invalid"": ""json""')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3]')) | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2')) | head 1 | fields res",SUCCESS -source = myglue_test.default.http_logs | eval res = json_keys(json('[invalid json]')) | head 1 | fields res,SUCCESS -source = myglue_test.default.http_logs | eval res = json_keys(json('invalid json')) | head 1 | fields res,SUCCESS -source = myglue_test.default.http_logs | eval res = json_keys(json(null)) | head 1 | fields res,SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.teacher') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*]') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0]') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*].name') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[1].name') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0].not_exist_key') | head 1 | fields res",SUCCESS -"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[10]') | head 1 | fields res",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > 0) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > -10) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = forall(array, x -> x.a > 0) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = exists(array, x -> x.b < 0) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 0) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 10) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 0) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 10) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, x -> x + 1) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, (x, y) -> x + y) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x) | head 1 | fields result",SUCCESS -"source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | head 1 | fields result",SUCCESS -source=myglue_test.default.people | eval age = salary | eventstats avg(age) | sort id | head 10,SUCCESS -"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count | sort id | head 10",SUCCESS -source=myglue_test.default.people | eventstats avg(salary) by country | sort id | head 10,SUCCESS -"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by country | sort id | head 10",SUCCESS -"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count +source=dev.default.people | LOOKUP dev.default.work_info uid AS id REPLACE department | stats distinct_count(department),SUCCESS +source = dev.default.people| LOOKUP dev.default.work_info uid AS id APPEND department | stats distinct_count(department),SUCCESS +source = dev.default.people| LOOKUP dev.default.work_info uid AS id REPLACE department AS country | stats distinct_count(country),SUCCESS +source = dev.default.people| LOOKUP dev.default.work_info uid AS id APPEND department AS country | stats distinct_count(country),SUCCESS +"source = dev.default.people| LOOKUP dev.default.work_info uID AS id, name REPLACE department | stats distinct_count(department)",SUCCESS +"source = dev.default.people| LOOKUP dev.default.work_info uid AS ID, name APPEND department | stats distinct_count(department)",SUCCESS +"source = dev.default.people| LOOKUP dev.default.work_info uID AS id, name | head 10",SUCCESS +"source = dev.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP dev.default.work_info name REPLACE occupation AS major | stats distinct_count(major)",SUCCESS +"source = dev.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP dev.default.work_info name APPEND occupation AS major | stats distinct_count(major)",SUCCESS +"source = dev.default.http_logs | eval res = json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = json('[]') | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json('{""invalid"": ""json""') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json('[1,2,3]') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json('[1,2') | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = json('[invalid json]') | head 1 | fields res,SUCCESS +source = dev.default.http_logs | eval res = json('invalid json') | head 1 | fields res,SUCCESS +source = dev.default.http_logs | eval res = json(null) | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json_array('this', 'is', 'a', 'string', 'array') | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = json_array() | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json_array(1, 2, 0, -1, 1.1, -0.11) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_array('this', 'is', 1.1, -0.11, true, false) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = array_length(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = array_length(json_array()) | head 1 | fields res,SUCCESS +source = dev.default.http_logs | eval res = json_array_length('[]') | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json_array_length('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_array_length('{\""key\"": 1}') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_array_length('[1,2') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('key', 'string_value')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('key', 123.45)) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('key', true)) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object(""a"", 1, ""b"", 2, ""c"", 3)) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('key', array())) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = to_json_string(json_object(""array"", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | where json_valid('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS +"source = dev.default.http_logs | where not json_valid('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]')) | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = json_keys(json('[]')) | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('{""invalid"": ""json""')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('[1,2,3]')) | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_keys(json('[1,2')) | head 1 | fields res",SUCCESS +source = dev.default.http_logs | eval res = json_keys(json('[invalid json]')) | head 1 | fields res,SUCCESS +source = dev.default.http_logs | eval res = json_keys(json('invalid json')) | head 1 | fields res,SUCCESS +source = dev.default.http_logs | eval res = json_keys(json(null)) | head 1 | fields res,SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.teacher') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*]') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0]') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*].name') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[1].name') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0].not_exist_key') | head 1 | fields res",SUCCESS +"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[10]') | head 1 | fields res",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > 0) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > -10) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = forall(array, x -> x.a > 0) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = exists(array, x -> x.b < 0) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 0) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 10) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 0) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 10) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,3), result = transform(array, x -> x + 1) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,3), result = transform(array, (x, y) -> x + y) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x) | head 1 | fields result",SUCCESS +"source = dev.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | head 1 | fields result",SUCCESS +source=dev.default.people | eval age = salary | eventstats avg(age) | sort id | head 10,SUCCESS +"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count | sort id | head 10",SUCCESS +source=dev.default.people | eventstats avg(salary) by country | sort id | head 10,SUCCESS +"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by country | sort id | head 10",SUCCESS +"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) | sort id | head 10",SUCCESS -"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) as age_span, country | sort id | head 10",SUCCESS -"source=myglue_test.default.people | where country != 'USA' | eventstats stddev_samp(salary), stddev_pop(salary), percentile_approx(salary, 60) by span(salary, 1000) as salary_span | sort id | head 10",SUCCESS -"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age by occupation, country | eventstats avg(avg_age) as avg_state_age by country | sort id | head 10",SUCCESS -"source=myglue_test.default.people | eventstats distinct_count(salary) by span(salary, 1000) as age_span",FAILED -"source = myglue_test.tpch_csv.lineitem +"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) as age_span, country | sort id | head 10",SUCCESS +"source=dev.default.people | where country != 'USA' | eventstats stddev_samp(salary), stddev_pop(salary), percentile_approx(salary, 60) by span(salary, 1000) as salary_span | sort id | head 10",SUCCESS +"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age by occupation, country | eventstats avg(avg_age) as avg_state_age by country | sort id | head 10",SUCCESS +"source=dev.default.people | eventstats distinct_count(salary) by span(salary, 1000) as age_span",FAILED +"source = dev.default.lineitem | where l_shipdate <= subdate(date('1998-12-01'), 90) | stats sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, @@ -277,59 +277,59 @@ by span(age, 10) | sort id | head 10",SUCCESS count() as count_order by l_returnflag, l_linestatus | sort l_returnflag, l_linestatus",SUCCESS -"source = myglue_test.tpch_csv.part -| join ON p_partkey = ps_partkey myglue_test.tpch_csv.partsupp -| join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier -| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation -| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region +"source = dev.default.part +| join ON p_partkey = ps_partkey dev.default.partsupp +| join ON s_suppkey = ps_suppkey dev.default.supplier +| join ON s_nationkey = n_nationkey dev.default.nation +| join ON n_regionkey = r_regionkey dev.default.region | where p_size = 15 AND like(p_type, '%BRASS') AND r_name = 'EUROPE' AND ps_supplycost = [ - source = myglue_test.tpch_csv.partsupp - | join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier - | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation - | join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region + source = dev.default.partsupp + | join ON s_suppkey = ps_suppkey dev.default.supplier + | join ON s_nationkey = n_nationkey dev.default.nation + | join ON n_regionkey = r_regionkey dev.default.region | where r_name = 'EUROPE' | stats MIN(ps_supplycost) ] | sort - s_acctbal, n_name, s_name, p_partkey | head 100",SUCCESS -"source = myglue_test.tpch_csv.customer -| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders -| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem +"source = dev.default.customer +| join ON c_custkey = o_custkey dev.default.orders +| join ON l_orderkey = o_orderkey dev.default.lineitem | where c_mktsegment = 'BUILDING' AND o_orderdate < date('1995-03-15') AND l_shipdate > date('1995-03-15') | stats sum(l_extendedprice * (1 - l_discount)) as revenue by l_orderkey, o_orderdate, o_shippriority | sort - revenue, o_orderdate | head 10",SUCCESS -"source = myglue_test.tpch_csv.orders +"source = dev.default.orders | where o_orderdate >= date('1993-07-01') and o_orderdate < date_add(date('1993-07-01'), interval 3 month) and exists [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate ] | stats count() as order_count by o_orderpriority | sort o_orderpriority",SUCCESS -"source = myglue_test.tpch_csv.customer -| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders -| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem -| join ON l_suppkey = s_suppkey AND c_nationkey = s_nationkey myglue_test.tpch_csv.supplier -| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation -| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region +"source = dev.default.customer +| join ON c_custkey = o_custkey dev.default.orders +| join ON l_orderkey = o_orderkey dev.default.lineitem +| join ON l_suppkey = s_suppkey AND c_nationkey = s_nationkey dev.default.supplier +| join ON s_nationkey = n_nationkey dev.default.nation +| join ON n_regionkey = r_regionkey dev.default.region | where r_name = 'ASIA' AND o_orderdate >= date('1994-01-01') AND o_orderdate < date_add(date('1994-01-01'), interval 1 year) | stats sum(l_extendedprice * (1 - l_discount)) as revenue by n_name | sort - revenue",SUCCESS -"source = myglue_test.tpch_csv.lineitem +"source = dev.default.lineitem | where l_shipdate >= date('1994-01-01') and l_shipdate < adddate(date('1994-01-01'), 365) and l_discount between .06 - 0.01 and .06 + 0.01 and l_quantity < 24 | stats sum(l_extendedprice * l_discount) as revenue",SUCCESS "source = [ - source = myglue_test.tpch_csv.supplier - | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.lineitem - | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders - | join ON c_custkey = o_custkey myglue_test.tpch_csv.customer - | join ON s_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1 - | join ON c_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2 + source = dev.default.supplier + | join ON s_suppkey = l_suppkey dev.default.lineitem + | join ON o_orderkey = l_orderkey dev.default.orders + | join ON c_custkey = o_custkey dev.default.customer + | join ON s_nationkey = n1.n_nationkey dev.default.nation as n1 + | join ON c_nationkey = n2.n_nationkey dev.default.nation as n2 | where l_shipdate between date('1995-01-01') and date('1996-12-31') and n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY' or n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE' | eval supp_nation = n1.n_name, cust_nation = n2.n_name, l_year = year(l_shipdate), volume = l_extendedprice * (1 - l_discount) @@ -338,14 +338,14 @@ by span(age, 10) | sort id | head 10",SUCCESS | stats sum(volume) as revenue by supp_nation, cust_nation, l_year | sort supp_nation, cust_nation, l_year",SUCCESS "source = [ - source = myglue_test.tpch_csv.part - | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem - | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier - | join ON l_orderkey = o_orderkey myglue_test.tpch_csv.orders - | join ON o_custkey = c_custkey myglue_test.tpch_csv.customer - | join ON c_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1 - | join ON s_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2 - | join ON n1.n_regionkey = r_regionkey myglue_test.tpch_csv.region + source = dev.default.part + | join ON p_partkey = l_partkey dev.default.lineitem + | join ON s_suppkey = l_suppkey dev.default.supplier + | join ON l_orderkey = o_orderkey dev.default.orders + | join ON o_custkey = c_custkey dev.default.customer + | join ON c_nationkey = n1.n_nationkey dev.default.nation as n1 + | join ON s_nationkey = n2.n_nationkey dev.default.nation as n2 + | join ON n1.n_regionkey = r_regionkey dev.default.region | where r_name = 'AMERICA' AND p_type = 'ECONOMY ANODIZED STEEL' and o_orderdate between date('1995-01-01') and date('1996-12-31') | eval o_year = year(o_orderdate) @@ -358,12 +358,12 @@ by span(age, 10) | sort id | head 10",SUCCESS | fields mkt_share, o_year | sort o_year",SUCCESS "source = [ - source = myglue_test.tpch_csv.part - | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem - | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier - | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey myglue_test.tpch_csv.partsupp - | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders - | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation + source = dev.default.part + | join ON p_partkey = l_partkey dev.default.lineitem + | join ON s_suppkey = l_suppkey dev.default.supplier + | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey dev.default.partsupp + | join ON o_orderkey = l_orderkey dev.default.orders + | join ON s_nationkey = n_nationkey dev.default.nation | where like(p_name, '%green%') | eval nation = n_name | eval o_year = year(o_orderdate) @@ -372,33 +372,33 @@ by span(age, 10) | sort id | head 10",SUCCESS ] as profit | stats sum(amount) as sum_profit by nation, o_year | sort nation, - o_year",SUCCESS -"source = myglue_test.tpch_csv.customer -| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders -| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem -| join ON c_nationkey = n_nationkey myglue_test.tpch_csv.nation +"source = dev.default.customer +| join ON c_custkey = o_custkey dev.default.orders +| join ON l_orderkey = o_orderkey dev.default.lineitem +| join ON c_nationkey = n_nationkey dev.default.nation | where o_orderdate >= date('1993-10-01') AND o_orderdate < date_add(date('1993-10-01'), interval 3 month) AND l_returnflag = 'R' | stats sum(l_extendedprice * (1 - l_discount)) as revenue by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment | sort - revenue | head 20",SUCCESS -"source = myglue_test.tpch_csv.partsupp -| join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier -| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation +"source = dev.default.partsupp +| join ON ps_suppkey = s_suppkey dev.default.supplier +| join ON s_nationkey = n_nationkey dev.default.nation | where n_name = 'GERMANY' | stats sum(ps_supplycost * ps_availqty) as value by ps_partkey | where value > [ - source = myglue_test.tpch_csv.partsupp - | join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier - | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation + source = dev.default.partsupp + | join ON ps_suppkey = s_suppkey dev.default.supplier + | join ON s_nationkey = n_nationkey dev.default.nation | where n_name = 'GERMANY' | stats sum(ps_supplycost * ps_availqty) as check | eval threshold = check * 0.0001000000 | fields threshold ] | sort - value",SUCCESS -"source = myglue_test.tpch_csv.orders -| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem +"source = dev.default.orders +| join ON o_orderkey = l_orderkey dev.default.lineitem | where l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_shipmode in ('MAIL', 'SHIP') @@ -409,32 +409,32 @@ by span(age, 10) | sort id | head 10",SUCCESS by l_shipmode | sort l_shipmode",SUCCESS "source = [ - source = myglue_test.tpch_csv.customer + source = dev.default.customer | left outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%') - myglue_test.tpch_csv.orders + dev.default.orders | stats count(o_orderkey) as c_count by c_custkey ] as c_orders | stats count() as custdist by c_count | sort - custdist, - c_count",SUCCESS -"source = myglue_test.tpch_csv.lineitem +"source = dev.default.lineitem | join ON l_partkey = p_partkey AND l_shipdate >= date('1995-09-01') AND l_shipdate < date_add(date('1995-09-01'), interval 1 month) - myglue_test.tpch_csv.part + dev.default.part | stats sum(case(like(p_type, 'PROMO%'), l_extendedprice * (1 - l_discount) else 0)) as sum1, sum(l_extendedprice * (1 - l_discount)) as sum2 | eval promo_revenue = 100.00 * sum1 / sum2 // Stats and Eval commands can combine when issues/819 resolved | fields promo_revenue",SUCCESS -"source = myglue_test.tpch_csv.supplier +"source = dev.default.supplier | join right = revenue0 ON s_suppkey = supplier_no [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month) | eval supplier_no = l_suppkey | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no ] | where total_revenue = [ source = [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month) | eval supplier_no = l_suppkey | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no @@ -443,24 +443,24 @@ by span(age, 10) | sort id | head 10",SUCCESS ] | sort s_suppkey | fields s_suppkey, s_name, s_address, s_phone, total_revenue",SUCCESS -"source = myglue_test.tpch_csv.partsupp -| join ON p_partkey = ps_partkey myglue_test.tpch_csv.part +"source = dev.default.partsupp +| join ON p_partkey = ps_partkey dev.default.part | where p_brand != 'Brand#45' and not like(p_type, 'MEDIUM POLISHED%') and p_size in (49, 14, 23, 45, 19, 3, 36, 9) and ps_suppkey not in [ - source = myglue_test.tpch_csv.supplier + source = dev.default.supplier | where like(s_comment, '%Customer%Complaints%') | fields s_suppkey ] | stats distinct_count(ps_suppkey) as supplier_cnt by p_brand, p_type, p_size | sort - supplier_cnt, p_brand, p_type, p_size",SUCCESS -"source = myglue_test.tpch_csv.lineitem -| join ON p_partkey = l_partkey myglue_test.tpch_csv.part +"source = dev.default.lineitem +| join ON p_partkey = l_partkey dev.default.part | where p_brand = 'Brand#23' and p_container = 'MED BOX' and l_quantity < [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | where l_partkey = p_partkey | stats avg(l_quantity) as avg | eval `0.2 * avg` = 0.2 * avg @@ -469,11 +469,11 @@ by span(age, 10) | sort id | head 10",SUCCESS | stats sum(l_extendedprice) as sum | eval avg_yearly = sum / 7.0 | fields avg_yearly",SUCCESS -"source = myglue_test.tpch_csv.customer -| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders -| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem +"source = dev.default.customer +| join ON c_custkey = o_custkey dev.default.orders +| join ON o_orderkey = l_orderkey dev.default.lineitem | where o_orderkey in [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | stats sum(l_quantity) as sum by l_orderkey | where sum > 300 | fields l_orderkey @@ -481,7 +481,7 @@ by span(age, 10) | sort id | head 10",SUCCESS | stats sum(l_quantity) by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice | sort - o_totalprice, o_orderdate | head 100",SUCCESS -"source = myglue_test.tpch_csv.lineitem +"source = dev.default.lineitem | join ON p_partkey = l_partkey and p_brand = 'Brand#12' and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') @@ -503,19 +503,19 @@ by span(age, 10) | sort id | head 10",SUCCESS and p_size between 1 and 15 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' - myglue_test.tpch_csv.part",SUCCESS -"source = myglue_test.tpch_csv.supplier -| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation + dev.default.part",SUCCESS +"source = dev.default.supplier +| join ON s_nationkey = n_nationkey dev.default.nation | where n_name = 'CANADA' and s_suppkey in [ - source = myglue_test.tpch_csv.partsupp + source = dev.default.partsupp | where ps_partkey in [ - source = myglue_test.tpch_csv.part + source = dev.default.part | where like(p_name, 'forest%') | fields p_partkey ] and ps_availqty > [ - source = myglue_test.tpch_csv.lineitem + source = dev.default.lineitem | where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= date('1994-01-01') @@ -526,19 +526,19 @@ by span(age, 10) | sort id | head 10",SUCCESS ] | fields ps_suppkey ]",SUCCESS -"source = myglue_test.tpch_csv.supplier -| join ON s_suppkey = l1.l_suppkey myglue_test.tpch_csv.lineitem as l1 -| join ON o_orderkey = l1.l_orderkey myglue_test.tpch_csv.orders -| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation +"source = dev.default.supplier +| join ON s_suppkey = l1.l_suppkey dev.default.lineitem as l1 +| join ON o_orderkey = l1.l_orderkey dev.default.orders +| join ON s_nationkey = n_nationkey dev.default.nation | where o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists [ - source = myglue_test.tpch_csv.lineitem as l2 + source = dev.default.lineitem as l2 | where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey != l1.l_suppkey ] and not exists [ - source = myglue_test.tpch_csv.lineitem as l3 + source = dev.default.lineitem as l3 | where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey != l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate @@ -548,16 +548,16 @@ by span(age, 10) | sort id | head 10",SUCCESS | sort - numwait, s_name | head 100",SUCCESS "source = [ - source = myglue_test.tpch_csv.customer + source = dev.default.customer | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') and c_acctbal > [ - source = myglue_test.tpch_csv.customer + source = dev.default.customer | where c_acctbal > 0.00 and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') | stats avg(c_acctbal) ] and not exists [ - source = myglue_test.tpch_csv.orders + source = dev.default.orders | where o_custkey = c_custkey ] | eval cntrycode = substring(c_phone, 1, 2)