Skip to content

Commit

Permalink
update additional queries and expected results
Browse files Browse the repository at this point in the history
Signed-off-by: YANGDB <[email protected]>
  • Loading branch information
YANG-DB committed Nov 4, 2023
1 parent db943da commit ad58298
Show file tree
Hide file tree
Showing 25 changed files with 490 additions and 88 deletions.
13 changes: 12 additions & 1 deletion integ-test/src/test/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@ This script is designed to perform sanity checks on OpenSearch queries by execut

## Configuration

Before running the script, ensure that the `s3://path/data/http_log/` is correctly pointing to the http_logs s3 bucket - for additional information see [data-preparation](data-preparation.md).
Before running the script, ensure that the `s3://flint/-data/-dp/-eu/-west/-1/-beta/data/http_log/` is correctly pointing to the http_logs s3 bucket - for additional information see [data-preparation](data-preparation.md).

Before running the script, ensure that the `OPENSEARCH_URL` environment variable is set to your OpenSearch cluster's URL.

Before running the script, ensure that the datasource name (in this sample `mys3`) match the correct location of your EMR spark cluster.

Before running the script, ensure that the catalog name (in this sample `default`) match the correct schema name within the AWS-GLUE catalog.

Before running the script, ensure that the table name (in this sample `http_logs_plain`) match the correct name of table ([Or create table using the next script](./http_logs/tables/create_table.sql)).

Example:
```bash
export OPENSEARCH_URL="http://localhost:9200"
Expand Down Expand Up @@ -62,11 +68,16 @@ The script accepts several optional parameters to control its behavior:
```bash
./run_sanity.sh --run-tables --run-queries --use-date 20231102
```
4. Run both table (creation) queries and data queries:
```bash
python sanity_script.py --run-tables --run-queries
```

## Output

The script will generate a log file with a timestamp in its name (e.g., `sanity_report_2023-11-02_12-00-00.log`) that contains the results of the sanity checks, including any errors encountered during execution.


## Support

For any queries or issues, please create an issue in the repository or contact the maintainer.
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/ppl1.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5;
source = mys3.default.http_logs_plain | sort @timestamp | head 5;
2 changes: 2 additions & 0 deletions integ-test/src/test/python/http_logs/queries/ppl2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
source = mys3.default.http_logs_plain |
where status >= 400 | sort - @timestamp | head 5
3 changes: 3 additions & 0 deletions integ-test/src/test/python/http_logs/queries/ppl3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source = mys3.default.http_logs_plain |
where status = 200 | stats count(status) by clientip, status |
sort - clientip | head 10
1 change: 1 addition & 0 deletions integ-test/src/test/python/http_logs/queries/q0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DESCRIBE EXTENDED mys3.default.http_logs_plain
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q1.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SELECT * FROM mys3.default.http_logs_{date} ORDER BY "@timestamp" LIMIT 5;
SELECT * FROM mys3.default.http_logs_plain ORDER BY '@timestamp' LIMIT 5;
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q2.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT COUNT(DISTINCT clientip) as unique_client_ips
FROM mys3.default.http_logs_{date};
FROM mys3.default.http_logs_plain;
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q3.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ SELECT
FIRST(day) AS day,
status,
COUNT(status) AS status_count_by_day
FROM (SELECT * FROM mys3.default.http_logs_{date} LIMIT 1000)
FROM (SELECT * FROM mys3.default.http_logs_plain ORDER BY `@timestamp` LIMIT 1000)
GROUP BY day, status
ORDER BY day, status
LIMIT 10;
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q4.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ SELECT
FIRST(day) AS day,
status,
COUNT(status) AS status_count_by_day
FROM mys3.default.http_logs_{date}
FROM mys3.default.http_logs_plain
WHERE status >= 400
GROUP BY day, status
ORDER BY day, status
Expand Down
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q5.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
status,
COUNT(status) AS status_count_by_day
FROM mys3.default.http_logs_{date}
FROM mys3.default.http_logs_plain
WHERE status >= 400
GROUP BY status
ORDER BY status
Expand Down
2 changes: 1 addition & 1 deletion integ-test/src/test/python/http_logs/queries/q6.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT day, SUM(size) as total_size FROM mys3.default.http_logs_{date}
SELECT day, SUM(size) as total_size FROM mys3.default.http_logs_plain
WHERE year = 1998 AND month =6
GROUP BY day;
6 changes: 3 additions & 3 deletions integ-test/src/test/python/http_logs/queries/q7.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT count(*) as count, clientip
FROM mys3.default.http_logs_{date}
FROM mys3.default.http_logs_plain
WHERE clientip BETWEEN '208.0.0.0' AND '210.0.0.0'
GROUP BY clientip
ORDER BY DESC count
limit 20;
ORDER BY count DESC
limit 20;
8 changes: 8 additions & 0 deletions integ-test/src/test/python/http_logs/queries/q8.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Explain
SELECT
day,
status
FROM mys3.default.http_logs_plain
WHERE status >= 400
GROUP BY day, status
LIMIT 100;
60 changes: 30 additions & 30 deletions integ-test/src/test/python/http_logs/results/ppl1.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,54 +39,54 @@
],
"datarows": [
[
"1998-06-10T14:37:23.000Z",
"76.112.16.0",
"GET /images/102325.gif HTTP/1.0",
200,
1555,
"1998-06-14T19:59:55.000Z",
"185.163.25.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
340,
1998,
6,
10
14
],
[
"1998-06-10T14:37:23.000Z",
"78.109.16.0",
"GET /english/images/comp_bu_stage1n.gif HTTP/1.0",
200,
1548,
"1998-06-14T19:59:55.000Z",
"161.62.26.0",
"GET /images/comp_bg2_hm.gif HTTP/1.0",
404,
343,
1998,
6,
10
14
],
[
"1998-06-10T14:37:23.000Z",
"140.48.14.0",
"GET /images/102321.gif HTTP/1.0",
200,
1602,
"1998-06-14T19:59:55.000Z",
"63.158.15.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
335,
1998,
6,
10
14
],
[
"1998-06-10T14:37:23.000Z",
"114.113.16.0",
"GET /english/images/team_bu_roster_on.gif HTTP/1.0",
200,
1567,
"1998-06-14T19:59:55.000Z",
"190.10.13.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
335,
1998,
6,
10
14
],
[
"1998-06-10T14:37:24.000Z",
"79.48.14.0",
"GET /english/images/comp_bu_stage1n.gif HTTP/1.0",
200,
1548,
"1998-06-14T19:59:53.000Z",
"28.87.6.0",
"GET /images/comp_bg2_hm.gif HTTP/1.0",
404,
349,
1998,
6,
10
14
]
],
"total": 5,
Expand Down
96 changes: 96 additions & 0 deletions integ-test/src/test/python/http_logs/results/ppl2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"data": {
"ok": true,
"resp": {
"status": "SUCCESS",
"schema": [
{
"name": "@timestamp",
"type": "date"
},
{
"name": "clientip",
"type": "string"
},
{
"name": "request",
"type": "string"
},
{
"name": "status",
"type": "integer"
},
{
"name": "size",
"type": "integer"
},
{
"name": "year",
"type": "integer"
},
{
"name": "month",
"type": "integer"
},
{
"name": "day",
"type": "integer"
}
],
"datarows": [
[
"1998-06-14T19:59:55.000Z",
"185.163.25.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
340,
1998,
6,
14
],
[
"1998-06-14T19:59:55.000Z",
"161.62.26.0",
"GET /images/comp_bg2_hm.gif HTTP/1.0",
404,
343,
1998,
6,
14
],
[
"1998-06-14T19:59:55.000Z",
"63.158.15.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
335,
1998,
6,
14
],
[
"1998-06-14T19:59:55.000Z",
"190.10.13.0",
"GET /images/comp_bg2_hm.gif HTTP/1.1",
404,
335,
1998,
6,
14
],
[
"1998-06-14T19:59:53.000Z",
"28.87.6.0",
"GET /images/comp_bg2_hm.gif HTTP/1.0",
404,
349,
1998,
6,
14
]
],
"total": 5,
"size": 5
}
}
}
76 changes: 76 additions & 0 deletions integ-test/src/test/python/http_logs/results/ppl3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"data": {
"ok": true,
"resp": {
"status": "SUCCESS",
"schema": [
{
"name": "count(status)",
"type": "long"
},
{
"name": "clientip",
"type": "string"
},
{
"name": "status",
"type": "integer"
}
],
"datarows": [
[
78,
"99.99.9.0",
200
],
[
133,
"99.99.8.0",
200
],
[
542,
"99.99.6.0",
200
],
[
15,
"99.99.5.0",
200
],
[
4,
"99.99.4.0",
200
],
[
71,
"99.99.3.0",
200
],
[
143,
"99.99.20.0",
200
],
[
39,
"99.99.2.0",
200
],
[
156,
"99.99.19.0",
200
],
[
64,
"99.99.18.0",
200
]
],
"total": 10,
"size": 10
}
}
}
Loading

0 comments on commit ad58298

Please sign in to comment.