diff --git a/docs/index.md b/docs/index.md
index cd52051a1..13fda06f1 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,9 +20,9 @@ Please see the following example in which Index Building Logic and Query Rewrite
| Skipping Index | Create Index Statement | Index Building Logic | Query Rewrite Logic |
|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Partition | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
year PARTITION,
month PARTITION,
day PARTITION,
hour PARTITION
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
FIRST(year) AS year,
FIRST(month) AS month,
FIRST(day) AS day,
FIRST(hour) AS hour,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE year = 2023 AND month = 4
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE year = 2023 AND month = 4
)
WHERE year = 2023 AND month = 4 |
-| ValueSet | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
elb_status_code VALUE_SET
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
COLLECT_SET(elb_status_code) AS elb_status_code,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE elb_status_code = 404
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE ARRAY_CONTAINS(elb_status_code, 404)
)
WHERE elb_status_code = 404 |
-| MinMax | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
request_processing_time MIN_MAX
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
MIN(request_processing_time) AS request_processing_time_min,
MAX(request_processing_time) AS request_processing_time_max,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE request_processing_time = 100
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE request_processing_time_min <= 100
AND 100 <= request_processing_time_max
)
WHERE request_processing_time = 100
+| Partition | CREATE SKIPPING INDEX
ON alb_logs
(
year PARTITION,
month PARTITION,
day PARTITION,
hour PARTITION
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
FIRST(year) AS year,
FIRST(month) AS month,
FIRST(day) AS day,
FIRST(hour) AS hour,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE year = 2023 AND month = 4
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE year = 2023 AND month = 4
)
WHERE year = 2023 AND month = 4 |
+| ValueSet | CREATE SKIPPING INDEX
ON alb_logs
(
elb_status_code VALUE_SET
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
COLLECT_SET(elb_status_code) AS elb_status_code,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE elb_status_code = 404
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE ARRAY_CONTAINS(elb_status_code, 404)
)
WHERE elb_status_code = 404 |
+| MinMax | CREATE SKIPPING INDEX
ON alb_logs
(
request_processing_time MIN_MAX
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
MIN(request_processing_time) AS request_processing_time_min,
MAX(request_processing_time) AS request_processing_time_max,
input_file_name() AS file_path
FROM alb_logs
GROUP BY
input_file_name() | SELECT *
FROM alb_logs
WHERE request_processing_time = 100
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
FROM flint_alb_logs_skipping_index
WHERE request_processing_time_min <= 100
AND 100 <= request_processing_time_max
)
WHERE request_processing_time = 100
### Flint Index Specification
@@ -223,7 +223,23 @@ WITH (
### OpenSearch
-OpenSearch stores the Flint index in an OpenSearch index of the given name.
+OpenSearch index corresponding to the Flint index follows the naming convention below:
+
+1. Skipping index: flint_[catalog_database_table]_skipping_index
+2. Covering index: flint_[catalog_database_table]_[index_name]_index
+
+It's important to note that any uppercase letters in the index name and table name (catalog, database and table) in SQL statement will be automatically converted to lowercase due to restriction imposed by OpenSearch.
+
+Examples:
+
+```sql
+-- OpenSearch index name is `flint_spark_catalog_default_alb_logs_skipping_index`
+CREATE SKIPPING INDEX ON spark_catalog.default.alb_logs ...
+
+-- OpenSearch index name is `flint_spark_catalog_default_alb_logs_elb_and_requesturi_index`
+CREATE INDEX elb_and_requestUri ON spark_catalog.default.alb_logs ...
+```
+
In the index mapping, the `_meta` and `properties`field stores meta and schema info of a Flint index.
```json
@@ -390,6 +406,22 @@ TODO
## Limitations
+### Flint Index Naming
+
+Due to the conversion of uppercase letters to lowercase in OpenSearch index names, it is not permissible to create a Flint index with a table name or index name that differs solely by case.
+
+For instance, only one of the statement per group can be successfully:
+
+```sql
+-- my[G]lue vs my[g]lue
+CREATE SKIPPING INDEX ON myGlue.default.alb_logs ...
+CREATE SKIPPING INDEX ON myglue.default.alb_logs ...
+
+-- [i]dx_elb vs [I]dx_elb
+CREATE INDEX idx_elb ON alb_logs ...
+CREATE INDEX Idx_elb ON alb_logs ...
+```
+
### Query Optimization
For now, only single or conjunct conditions (conditions connected by AND) in WHERE clause can be optimized by skipping index.