From 7b6e485b374e1218da94bef0796da5eb2e5cb712 Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Tue, 19 Nov 2024 10:29:19 +0800
Subject: [PATCH] [DOC] Ensure PPL docs have consistent look (#926)

* refactor ppl docs to keep consistent look

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* remove auto generated file

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* minor updates

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* address comments

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* fix hyper-link issue

Signed-off-by: Lantao Jin <ltjin@amazon.com>

---------

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 README.md                                     |   4 +-
 docs/ppl-lang/README.md                       |   2 +-
 .../{ppl-lambda.md => ppl-collection.md}      |  80 +++-
 docs/ppl-lang/functions/ppl-json.md           | 214 ++++++++-
 docs/ppl-lang/ppl-correlation-command.md      |   2 +-
 docs/ppl-lang/ppl-dedup-command.md            |   8 +-
 docs/ppl-lang/ppl-eval-command.md             |   6 +-
 docs/ppl-lang/ppl-fields-command.md           |   6 +-
 docs/ppl-lang/ppl-fieldsummary-command.md     |   4 +-
 docs/ppl-lang/ppl-grok-command.md             |   2 +-
 docs/ppl-lang/ppl-head-command.md             |   2 +-
 docs/ppl-lang/ppl-join-command.md             | 258 ++++++-----
 docs/ppl-lang/ppl-lookup-command.md           |  85 ++--
 docs/ppl-lang/ppl-parse-command.md            |   2 +-
 docs/ppl-lang/ppl-rare-command.md             |   8 +-
 docs/ppl-lang/ppl-search-command.md           |   2 +-
 docs/ppl-lang/ppl-sort-command.md             |   4 +-
 docs/ppl-lang/ppl-stats-command.md            |   2 +-
 docs/ppl-lang/ppl-subquery-command.md         | 405 +++++-------------
 docs/ppl-lang/ppl-top-command.md              |   4 +-
 docs/ppl-lang/ppl-trendline-command.md        |   6 +-
 docs/ppl-lang/ppl-where-command.md            |   2 +-
 22 files changed, 587 insertions(+), 521 deletions(-)
 rename docs/ppl-lang/functions/{ppl-lambda.md => ppl-collection.md} (57%)
diff --git a/README.md b/README.md
index 12123b456..db3790e64 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Please refer to the [Flint Index Reference Manual](./docs/index.md) for more inf
 
 * For additional details on Spark PPL commands project, see [PPL Project](https://github.com/orgs/opensearch-project/projects/214/views/2)
 
-* Experiment ppl queries on local spark cluster[PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
+* Experiment ppl queries on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
 
 ## Prerequisites
 
@@ -88,7 +88,7 @@ bin/spark-shell --packages "org.opensearch:opensearch-spark-ppl_2.12:0.7.0-SNAPS
 ```
 
 ### PPL Run queries on a local spark cluster
-See ppl usage sample on local spark cluster[PPL on local spark ](local-spark-ppl-test-instruction.md)
+See ppl usage sample on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
 
 
 ## Code of Conduct
diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md
index 9df9f5986..19e1a6ee0 100644
--- a/docs/ppl-lang/README.md
+++ b/docs/ppl-lang/README.md
@@ -94,7 +94,7 @@ For additional examples see the next [documentation](PPL-Example-Commands.md).
 
     - [`IP Address Functions`](functions/ppl-ip.md)
      
-    - [`Lambda Functions`](functions/ppl-lambda.md)
+    - [`Collection Functions`](functions/ppl-collection)
 
 ---
 ### PPL On Spark
diff --git a/docs/ppl-lang/functions/ppl-lambda.md b/docs/ppl-lang/functions/ppl-collection.md
similarity index 57%
rename from docs/ppl-lang/functions/ppl-lambda.md
rename to docs/ppl-lang/functions/ppl-collection.md
index cdb6f9e8f..b98f5f5ca 100644
--- a/docs/ppl-lang/functions/ppl-lambda.md
+++ b/docs/ppl-lang/functions/ppl-collection.md
@@ -1,4 +1,56 @@
-## Lambda Functions
+## PPL Collection Functions
+
+### `ARRAY`
+
+**Description**
+
+`array(<value>...)` Returns an array with the given elements.
+
+**Argument type:**
+- A \<value\> can be any kind of value such as string, number, or boolean.
+
+**Return type:** ARRAY
+
+Example:
+
+    os> source=people | eval `array` = array(1, 2, 0, -1, 1.1, -0.11)
+    fetched rows / total rows = 1/1
+    +------------------------------+
+    | array                        |
+    +------------------------------+
+    | [1.0,2.0,0.0,-1.0,1.1,-0.11] |
+    +------------------------------+
+    os> source=people | eval `array` = array(true, false, true, true)
+    fetched rows / total rows = 1/1
+    +------------------------------+
+    | array                        |
+    +------------------------------+
+    | [true, false, true, true]    |
+    +------------------------------+
+
+
+### `ARRAY_LENGTH`
+
+**Description**
+
+`array_length(array)` Returns the number of elements in the outermost array.
+
+**Argument type:** ARRAY
+
+ARRAY or JSON_ARRAY object.
+
+**Return type:** INTEGER
+
+Example:
+
+    os> source=people | eval `array` = array_length(array(1,2,3,4)), `empty_array` = array_length(array())
+    fetched rows / total rows = 1/1
+    +---------+---------------+
+    | array   | empty_array   |
+    +---------+---------------+
+    | 4       | 0             |
+    +---------+---------------+
+
 
 ### `FORALL`
 
@@ -14,7 +66,7 @@ Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherw
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -22,7 +74,7 @@ Example:
     | false     |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -41,7 +93,7 @@ Consider constructing the following array:
 
 and perform lambda functions against the nested fields `a` or `b`. See the examples:
 
-    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
+    os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -49,7 +101,7 @@ and perform lambda functions against the nested fields `a` or `b`. See the examp
     | false     |
     +-----------+
 
-    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
+    os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -71,7 +123,7 @@ Returns `TRUE` if at least one element in the array satisfies the lambda predica
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -79,7 +131,7 @@ Example:
     | true      |
     +-----------+
 
-    os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -102,7 +154,7 @@ An ARRAY that contains all elements in the input array that satisfy the lambda p
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -110,7 +162,7 @@ Example:
     | [1, 2]    |
     +-----------+
 
-    os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -132,7 +184,7 @@ An ARRAY that contains the result of applying the lambda transform function to e
 
 Example:
 
-    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
     fetched rows / total rows = 1/1
     +--------------+
     | result       |
@@ -140,7 +192,7 @@ Example:
     | [2, 3, 4]    |
     +--------------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
     fetched rows / total rows = 1/1
     +--------------+
     | result       |
@@ -162,7 +214,7 @@ The final result of applying the lambda functions to the start value and the inp
 
 Example:
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -170,7 +222,7 @@ Example:
     | 6         |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -178,7 +230,7 @@ Example:
     | 16        |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
diff --git a/docs/ppl-lang/functions/ppl-json.md b/docs/ppl-lang/functions/ppl-json.md
index 5b26ee427..2c0c0ca67 100644
--- a/docs/ppl-lang/functions/ppl-json.md
+++ b/docs/ppl-lang/functions/ppl-json.md
@@ -95,6 +95,11 @@ Example:
     | {"array":[1.0,2.0,0.0,-1.0,1.1,-0.11]} |
     +----------------------------------------+
 
+**Limitation**
+
+The list of parameters of `json_array` should all be the same type.
+`json_array('this', 'is', 1.1, -0.11, true, false)` throws exception.
+
 ### `TO_JSON_STRING`
 
 **Description**
@@ -149,29 +154,6 @@ Example:
     +-----------+-----------+-------------+
 
 
-### `ARRAY_LENGTH`
-
-**Description**
-
-`array_length(jsonArray)` Returns the number of elements in the outermost array.
-
-**Argument type:** ARRAY
-
-ARRAY or JSON_ARRAY object.
-
-**Return type:** INTEGER
-
-Example:
-
-    os> source=people | eval `json_array` = json_array_length(json_array(1,2,3,4)), `empty_array` = json_array_length(json_array())
-    fetched rows / total rows = 1/1
-    +--------------+---------------+
-    | json_array   | empty_array   |
-    +--------------+---------------+
-    | 4            | 0             |
-    +--------------+---------------+
-
-
 ### `JSON_EXTRACT`
 
 **Description**
@@ -280,3 +262,189 @@ Example:
     |------------------+---------|
     | 13               | null    |
     +------------------+---------+
+
+### `FORALL`
+
+**Description**
+
+`forall(json_array, lambda)` Evaluates whether a lambda predicate holds for all elements in the json_array.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** BOOLEAN
+
+Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherwise `FALSE`.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+**Note:** The lambda expression can access the nested fields of the array elements. This applies to all lambda functions introduced in this document.
+
+Consider constructing the following array:
+
+    array = [
+        {"a":1, "b":1},
+        {"a":-1, "b":2}
+    ]
+
+and perform lambda functions against the nested fields `a` or `b`. See the examples:
+
+    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+### `EXISTS`
+
+**Description**
+
+`exists(json_array, lambda)` Evaluates whether a lambda predicate holds for one or more elements in the json_array.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** BOOLEAN
+
+Returns `TRUE` if at least one element in the array satisfies the lambda predicate, otherwise `FALSE`.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+    os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+
+### `FILTER`
+
+**Description**
+
+`filter(json_array, lambda)`  Filters the input json_array using the given lambda function.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** ARRAY
+
+An ARRAY that contains all elements in the input json_array that satisfy the lambda predicate.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | [1, 2]    |
+    +-----------+
+
+    os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | []        |
+    +-----------+
+
+### `TRANSFORM`
+
+**Description**
+
+`transform(json_array, lambda)` Transform elements in a json_array using the lambda transform function. The second argument implies the index of the element if using binary lambda function. This is similar to a `map` in functional programming.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** ARRAY
+
+An ARRAY that contains the result of applying the lambda transform function to each element in the input array.
+
+Example:
+
+    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
+    fetched rows / total rows = 1/1
+    +--------------+
+    | result       |
+    +--------------+
+    | [2, 3, 4]    |
+    +--------------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
+    fetched rows / total rows = 1/1
+    +--------------+
+    | result       |
+    +--------------+
+    | [1, 3, 5]    |
+    +--------------+
+
+### `REDUCE`
+
+**Description**
+
+`reduce(json_array, start, merge_lambda, finish_lambda)` Applies a binary merge lambda function to a start value and all elements in the json_array, and reduces this to a single state. The final state is converted into the final result by applying a finish lambda function.
+
+**Argument type:** ARRAY, ANY, LAMBDA, LAMBDA
+
+**Return type:** ANY
+
+The final result of applying the lambda functions to the start value and the input json_array.
+
+Example:
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 6         |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 16        |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 60        |
+    +-----------+
diff --git a/docs/ppl-lang/ppl-correlation-command.md b/docs/ppl-lang/ppl-correlation-command.md
index 2e8507a14..74e04da86 100644
--- a/docs/ppl-lang/ppl-correlation-command.md
+++ b/docs/ppl-lang/ppl-correlation-command.md
@@ -1,4 +1,4 @@
-## PPL Correlation Command
+## PPL `correlation` command
 
 > This is an experimental command - it may be removed in future versions
 
diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md
index 28fe7f4a4..4e06d275e 100644
--- a/docs/ppl-lang/ppl-dedup-command.md
+++ b/docs/ppl-lang/ppl-dedup-command.md
@@ -1,6 +1,6 @@
-# PPL dedup command
+## PPL `dedup` command
 
-## Table of contents
+### Table of contents
 
 - [Description](#description)
 - [Syntax](#syntax)
@@ -11,11 +11,11 @@
     - [Example 4: Dedup in consecutive document](#example-4-dedup-in-consecutive-document)
 - [Limitation](#limitation)
 
-## Description
+### Description
 
 Using `dedup` command to remove identical document defined by field from the search result.
 
-## Syntax
+### Syntax
 
 ```sql
 dedup [int] <field-list> [keepempty=<bool>] [consecutive=<bool>]
diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md
index 1908c087c..e98d4d4f2 100644
--- a/docs/ppl-lang/ppl-eval-command.md
+++ b/docs/ppl-lang/ppl-eval-command.md
@@ -1,10 +1,10 @@
-# PPL `eval` command
+## PPL `eval` command
 
-## Description
+### Description
  The ``eval`` command evaluate the expression and append the result to the search result.
 
 
-## Syntax
+### Syntax
 ```sql
 eval <field>=<expression> ["," <field>=<expression> ]...
 ```
diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md
index e37fc644f..4ef041ee2 100644
--- a/docs/ppl-lang/ppl-fields-command.md
+++ b/docs/ppl-lang/ppl-fields-command.md
@@ -1,12 +1,12 @@
 ## PPL `fields` command
 
-**Description**
+### Description
 Using ``field`` command to keep or remove fields from the search result.
 
 
-**Syntax**
+### Syntax
 
-field [+|-] <field-list>
+`field [+|-] <field-list>`
 
 * index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** +
 * field list: mandatory. comma-delimited keep or remove fields.
diff --git a/docs/ppl-lang/ppl-fieldsummary-command.md b/docs/ppl-lang/ppl-fieldsummary-command.md
index 468c2046b..2015cf815 100644
--- a/docs/ppl-lang/ppl-fieldsummary-command.md
+++ b/docs/ppl-lang/ppl-fieldsummary-command.md
@@ -1,11 +1,11 @@
 ## PPL `fieldsummary` command
 
-**Description**
+### Description
 Using `fieldsummary` command to :
  - Calculate basic statistics for each field (count, distinct count, min, max, avg, stddev, mean )
  - Determine the data type of each field
 
-**Syntax**
+### Syntax
 
 `... | fieldsummary <field-list> (nulls=true/false)`
 
diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md
index 06028109b..8d5946563 100644
--- a/docs/ppl-lang/ppl-grok-command.md
+++ b/docs/ppl-lang/ppl-grok-command.md
@@ -1,4 +1,4 @@
-## PPL Correlation Command
+## PPL `grok` command
 
 
 ### Description
diff --git a/docs/ppl-lang/ppl-head-command.md b/docs/ppl-lang/ppl-head-command.md
index e4172b1c6..51a87db3b 100644
--- a/docs/ppl-lang/ppl-head-command.md
+++ b/docs/ppl-lang/ppl-head-command.md
@@ -1,4 +1,4 @@
-## PPL `head` Command
+## PPL `head` command
 
 **Description**
 The ``head`` command returns the first N number of specified results after an optional offset in search order.
diff --git a/docs/ppl-lang/ppl-join-command.md b/docs/ppl-lang/ppl-join-command.md
index b374bce5f..f04f1c5c1 100644
--- a/docs/ppl-lang/ppl-join-command.md
+++ b/docs/ppl-lang/ppl-join-command.md
@@ -1,10 +1,115 @@
-## PPL Join Command
+## PPL `join` command
 
-## Overview
+### Description
 
-[Trace analytics](https://opensearch.org/docs/latest/observability-plugin/trace/ta-dashboards/) considered using SQL/PPL for its queries, but some graphs rely on joining two indices (span index and service map index) together which is not supported by SQL/PPL. Trace analytics was implemented with DSL + javascript, would be good if `join` being added to SQL could support this use case.
+`JOIN` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subquery.
 
-### Schema
+### Syntax
+
+`[joinType] join [leftAlias] [rightAlias] [joinHints] on <joinCriteria> <right-dataset>`
+
+**joinType**
+- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI`
+- Optional
+- Description: The type of join to perform. The default is `INNER` if not specified.
+
+**leftAlias**
+- Syntax: `left = <leftAlias>`
+- Optional
+- Description: The subquery alias to use with the left join side, to avoid ambiguous naming.
+
+**rightAlias**
+- Syntax: `right = <rightAlias>`
+- Optional
+- Description: The subquery alias to use with the right join side, to avoid ambiguous naming.
+
+**joinHints**
+- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]`
+- Optional
+- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.`
+
+**joinCriteria**
+- Syntax: `<expression>`
+- Required
+- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `<leftAlias>.<leftField>=<rightAlias>.<rightField>`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`.
+
+**right-dataset**
+- Required
+- Description: Right dataset could be either an index or a subquery with/without alias.
+
+### Example 1: two indices join
+
+PPL query:
+
+    os> source=customer | join ON c_custkey = o_custkey orders
+        | fields c_custkey, c_nationkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice | head 10
+    fetched rows / total rows = 10/10
+    +----------+-------------+-------------+------------+---------------+-------------+
+    | c_custkey| c_nationkey | c_mktsegment| o_orderkey | o_orderstatus | o_totalprice|
+    +----------+-------------+-------------+------------+---------------+-------------+
+    | 36901    | 13          | AUTOMOBILE  | 1          | O             | 173665.47   |
+    | 78002    | 10          | AUTOMOBILE  | 2          | O             | 46929.18    |
+    | 123314   | 15          | MACHINERY   | 3          | F             | 193846.25   |
+    | 136777   | 10          | HOUSEHOLD   | 4          | O             | 32151.78    |
+    | 44485    | 20          | FURNITURE   | 5          | F             | 144659.2    |
+    | 55624    | 7           | AUTOMOBILE  | 6          | F             | 58749.59    |
+    | 39136    | 5           | FURNITURE   | 7          | O             | 252004.18   |
+    | 130057   | 9           | FURNITURE   | 32         | O             | 208660.75   |
+    | 66958    | 18          | MACHINERY   | 33         | F             | 163243.98   |
+    | 61001    | 3           | FURNITURE   | 34         | O             | 58949.67    |
+    +----------+-------------+-------------+------------+---------------+-------------+
+
+### Example 2: three indices join
+
+PPL query:
+
+    os> source=customer | join ON c_custkey = o_custkey orders | join ON c_nationkey = n_nationkey nation
+        | fields c_custkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice, n_name | head 10
+    fetched rows / total rows = 10/10
+    +----------+-------------+------------+---------------+-------------+--------------+
+    | c_custkey| c_mktsegment| o_orderkey | o_orderstatus | o_totalprice| n_name       |
+    +----------+-------------+------------+---------------+-------------+--------------+
+    | 36901    | AUTOMOBILE  | 1          | O             | 173665.47   | JORDAN       |
+    | 78002    | AUTOMOBILE  | 2          | O             | 46929.18    | IRAN         |
+    | 123314   | MACHINERY   | 3          | F             | 193846.25   | MOROCCO      |
+    | 136777   | HOUSEHOLD   | 4          | O             | 32151.78    | IRAN         |
+    | 44485    | FURNITURE   | 5          | F             | 144659.2    | SAUDI ARABIA |
+    | 55624    | AUTOMOBILE  | 6          | F             | 58749.59    | GERMANY      |
+    | 39136    | FURNITURE   | 7          | O             | 252004.18   | ETHIOPIA     |
+    | 130057   | FURNITURE   | 32         | O             | 208660.75   | INDONESIA    |
+    | 66958    | MACHINERY   | 33         | F             | 163243.98   | CHINA        |
+    | 61001    | FURNITURE   | 34         | O             | 58949.67    | CANADA       |
+    +----------+-------------+------------+---------------+-------------+--------------+
+
+### Example 3: join a subquery in right side
+
+PPL query:
+
+    os>source=supplier| join right = revenue0 ON s_suppkey = supplier_no
+         [
+           source=lineitem | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)
+           | eval supplier_no = l_suppkey | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no
+         ]
+       | fields s_name, s_phone, total_revenue, supplier_no | head 10
+    fetched rows / total rows = 10/10
+    +---------------------+----------------+-------------------+-------------+
+    | s_name              | s_phone        | total_revenue     | supplier_no |
+    +---------------------+----------------+-------------------+-------------+
+    | Supplier#000007747  | 24-911-546-3505| 636204.0279       | 7747        |
+    | Supplier#000007748  | 29-535-184-2277| 538311.8099       | 7748        |
+    | Supplier#000007749  | 18-225-478-7489| 743462.4473000001 | 7749        |
+    | Supplier#000007750  | 28-680-484-7044| 616828.2220999999 | 7750        |
+    | Supplier#000007751  | 20-990-606-7343| 1092975.1925      | 7751        |
+    | Supplier#000007752  | 12-936-258-6650| 1090399.9666      | 7752        |
+    | Supplier#000007753  | 22-394-329-1153| 777130.7457000001 | 7753        |
+    | Supplier#000007754  | 26-941-591-5320| 866600.0501       | 7754        |
+    | Supplier#000007755  | 32-138-467-4225| 702256.7030000001 | 7755        |
+    | Supplier#000007756  | 29-860-205-8019| 1304979.0511999999| 7756        |
+    +---------------------+----------------+-------------------+-------------+
+
+### Example 4: complex example in OTEL
+
+**Schema**
 
 There will be at least 2 indices, `otel-v1-apm-span-*` (large) and `otel-v1-apm-service-map` (small).
 
@@ -30,154 +135,47 @@ Relevant fields from indices:
 
 Full schemas are defined in data-prepper repo: [`otel-v1-apm-span-*`](https://github.com/opensearch-project/data-prepper/blob/04dd7bd18977294800cf4b77d7f01914def75f23/docs/schemas/trace-analytics/otel-v1-apm-span-index-template.md), [`otel-v1-apm-service-map`](https://github.com/opensearch-project/data-prepper/blob/4e5f83814c4a0eed2a1ca9bab0693b9e32240c97/docs/schemas/trace-analytics/otel-v1-apm-service-map-index-template.md)
 
-### Requirement
-
-Support `join` to calculate the following:
+**Requirement**
 
 For each service, join span index on service map index to calculate metrics under different type of filters.
 
 ![image](https://user-images.githubusercontent.com/28062824/194170062-f0dd1d57-c5eb-44db-95e0-6b3b4e52f25a.png)
 
-This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query..
-
-```sql
-SELECT avg(durationInNanos)
-FROM `otel-v1-apm-span-000001` t1
-WHERE t1.serviceName = `order`
-  AND ((t1.name in
-          (SELECT target.resource
-           FROM `otel-v1-apm-service-map`
-           WHERE serviceName = `order`
-             AND traceGroupName = `client_cancel_order`)
-        AND t1.parentSpanId != NULL)
-       OR (t1.parentSpanId = NULL
-           AND t1.name = `client_cancel_order`))
-  AND t1.traceId in
-    (SELECT traceId
-     FROM `otel-v1-apm-span-000001`
-     WHERE serviceName = `order`)
-```
-## Migrate to PPL
-
-### Syntax of Join Command
-
-```sql
-SEARCH source=<left-table>
-| <other piped command>
-| [joinType] JOIN
-    [leftAlias]
-    [rightAlias]
-    [joinHints]
-    ON joinCriteria
-    <right-table>
-| <other piped command>
-```
-**joinType**
-- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI`
-- Optional
-- Description: The type of join to perform. The default is `INNER` if not specified.
+This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query.
 
-**leftAlias**
-- Syntax: `left = <leftAlias>`
-- Optional
-- Description: The subquery alias to use with the left join side, to avoid ambiguous naming.
-
-**rightAlias**
-- Syntax: `right = <rightAlias>`
-- Optional
-- Description: The subquery alias to use with the right join side, to avoid ambiguous naming.
-
-**joinHints**
-- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]`
-- Optional
-- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.`
-
-**joinCriteria**
-- Syntax: `<expression>`
-- Required
-- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `<leftAlias>.<leftField>=<rightAlias>.<rightField>`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`.
-
-**right-table**
-- Required
-- Description: The index or table name of join right-side. Sub-search is unsupported in join right side for now.
-
-### Rewriting
-```sql
-SEARCH source=otel-v1-apm-span-000001
+PPL query:
+```
+source=otel-v1-apm-span-000001
 | WHERE serviceName = 'order'
 | JOIN left=t1 right=t2
     ON t1.traceId = t2.traceId AND t2.serviceName = 'order'
-    otel-v1-apm-span-000001 -- self inner join
-| EVAL s_name = t1.name -- rename to avoid ambiguous
-| EVAL s_parentSpanId = t1.parentSpanId -- RENAME command would be better when it is supported
-| EVAL s_durationInNanos = t1.durationInNanos 
-| FIELDS s_name, s_parentSpanId, s_durationInNanos -- reduce colunms in join
+    otel-v1-apm-span-000001 // self inner join
+| RENAME s_name as t1.name
+| RENAME s_parentSpanId as t1.parentSpanId
+| RENAME s_durationInNanos as t1.durationInNanos 
+| FIELDS s_name, s_parentSpanId, s_durationInNanos // reduce colunms in join
 | LEFT JOIN left=s1 right=t3
     ON s_name = t3.target.resource AND t3.serviceName = 'order' AND t3.traceGroupName = 'client_cancel_order'
     otel-v1-apm-service-map
 | WHERE (s_parentSpanId IS NOT NULL OR (s_parentSpanId IS NULL AND s_name = 'client_cancel_order'))
-| STATS avg(s_durationInNanos) -- no need to add alias if there is no ambiguous
-```
-
-
-### More examples
-
-Migration from SQL query (TPC-H Q13):
-```sql
-SELECT c_count, COUNT(*) AS custdist
-FROM
-  ( SELECT c_custkey, COUNT(o_orderkey) c_count
-    FROM customer LEFT OUTER JOIN orders ON c_custkey = o_custkey
-        AND o_comment NOT LIKE '%unusual%packages%'
-    GROUP BY c_custkey
-  ) AS c_orders
-GROUP BY c_count
-ORDER BY custdist DESC, c_count DESC;
-```
-Rewritten by PPL Join query:
-```sql
-SEARCH source=customer
-| FIELDS c_custkey
-| LEFT OUTER JOIN
-    ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%'
-    orders
-| STATS count(o_orderkey) AS c_count BY c_custkey
-| STATS count() AS custdist BY c_count
-| SORT - custdist, - c_count
-```
-_- **Limitation: sub-searches is unsupported in join right side**_
-
-If sub-searches is supported, above ppl query could be rewritten as:
-```sql
-SEARCH source=customer
-| FIELDS c_custkey
-| LEFT OUTER JOIN
-   ON c_custkey = o_custkey
-   [
-      SEARCH source=orders
-      | WHERE o_comment NOT LIKE '%unusual%packages%'
-      | FIELDS o_orderkey, o_custkey
-   ]
-| STATS count(o_orderkey) AS c_count BY c_custkey
-| STATS count() AS custdist BY c_count
-| SORT - custdist, - c_count
+| STATS avg(s_durationInNanos)
 ```
 
 ### Comparison with [Correlation](ppl-correlation-command)
 
 A primary difference between `correlate` and `join` is that both sides of `correlate` are tables, but both sides of `join` are subqueries. 
 For example:
-```sql
+```
 source = testTable1
- | where country = 'Canada' OR country = 'England'
- | eval cname = lower(name)
- | fields cname, country, year, month
- | inner join left=l, right=r
-     ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4
-     testTable2s
+| where country = 'Canada' OR country = 'England'
+| eval cname = lower(name)
+| fields cname, country, year, month
+| inner join left=l right=r
+    ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4
+    testTable2s
 ```
 The subquery alias `l` does not represent the `testTable1` table itself. Instead, it represents the subquery:
-```sql
+```
 source = testTable1
 | where country = 'Canada' OR country = 'England'
 | eval cname = lower(name)
diff --git a/docs/ppl-lang/ppl-lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md
index 1b8350533..87cf34bac 100644
--- a/docs/ppl-lang/ppl-lookup-command.md
+++ b/docs/ppl-lang/ppl-lookup-command.md
@@ -1,20 +1,18 @@
-## PPL Lookup Command
+## PPL `lookup` command
 
-## Overview
+### Description
 Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table).
 You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched.
 As an alternative of [Join command](ppl-join-command), lookup command is more suitable for enriching the source data with a static dataset.
 
 
-### Syntax of Lookup Command
+### Syntax
 
-```sql
-SEARCH source=<sourceIndex>
-| <other piped command>
-| LOOKUP <lookupIndex> (<lookupMappingField> [AS <sourceMappingField>])...
-    [(REPLACE | APPEND) (<inputField> [AS <outputField>])...]
-| <other piped command>
 ```
+LOOKUP <lookupIndex> (<lookupMappingField> [AS <sourceMappingField>])...
+       [(REPLACE | APPEND) (<inputField> [AS <outputField>])...]
+```
+
 **lookupIndex**
 - Required
 - Description: the name of lookup index (dimension table)
@@ -44,26 +42,49 @@ SEARCH source=<sourceIndex>
 - Description: If you specify REPLACE, matched values in \<lookupIndex\> field overwrite the values in result. If you specify APPEND, matched values in \<lookupIndex\> field only append to the missing values in result.
 
 ### Usage
-> LOOKUP <lookupIndex> id AS cid REPLACE mail AS email</br>
-> LOOKUP <lookupIndex> name REPLACE mail AS email</br>
-> LOOKUP <lookupIndex> id AS cid, name APPEND address, mail AS email</br>
-> LOOKUP <lookupIndex> id</br>
-
-### Example
-```sql
-SEARCH source=<sourceIndex>
-| WHERE orderType = 'Cancelled'
-| LOOKUP account_list, mkt_id AS mkt_code REPLACE amount, account_name AS name
-| STATS count(mkt_code), avg(amount) BY name
-```
-```sql
-SEARCH source=<sourceIndex>
-| DEDUP market_id
-| EVAL category=replace(category, "-", ".")
-| EVAL category=ltrim(category, "dvp.")
-| LOOKUP bounce_category category AS category APPEND classification
-```
-```sql
-SEARCH source=<sourceIndex>
-| LOOKUP bounce_category category
-```
+- `LOOKUP <lookupIndex> id AS cid REPLACE mail AS email`
+- `LOOKUP <lookupIndex> name REPLACE mail AS email`
+- `LOOKUP <lookupIndex> id AS cid, name APPEND address, mail AS email`
+- `LOOKUP <lookupIndex> id`
+
+### Examples 1: replace
+
+PPL query:
+
+    os>source=people | LOOKUP work_info uid AS id REPLACE department | head 10
+    fetched rows / total rows = 10/10
+    +------+-----------+-------------+-----------+--------+------------------+
+    | id   | name      | occupation  | country   | salary | department       |
+    +------+-----------+-------------+-----------+--------+------------------+
+    | 1000 | Daniel    | Teacher     | Canada    | 56486  | CUSTOMER_SERVICE |
+    | 1001 | Joseph    | Lawyer      | Denmark   | 135943 | FINANCE          |
+    | 1002 | David     | Artist      | Finland   | 60391  | DATA             |
+    | 1003 | Charlotte | Lawyer      | Denmark   | 42173  | LEGAL            |
+    | 1004 | Isabella  | Veterinarian| Australia | 117699 | MARKETING        |
+    | 1005 | Lily      | Engineer    | Italy     | 37526  | IT               |
+    | 1006 | Emily     | Dentist     | Denmark   | 125340 | MARKETING        |
+    | 1007 | James     | Lawyer      | Germany   | 56532  | LEGAL            |
+    | 1008 | Lucas     | Lawyer      | Japan     | 87782  | DATA             |
+    | 1009 | Sophia    | Architect   | Sweden    | 37597  | MARKETING        |
+    +------+-----------+-------------+-----------+--------+------------------+
+
+### Examples 2: append
+
+PPL query:
+
+    os>source=people| LOOKUP work_info uid AS ID, name APPEND department | where isnotnull(department) | head 10
+    fetched rows / total rows = 10/10
+    +------+---------+-------------+-------------+--------+------------+
+    | id   | name    | occupation  | country     | salary | department |
+    +------+---------+-------------+-------------+--------+------------+
+    | 1018 | Emma    | Architect   | USA         | 72400  | IT         |
+    | 1032 | James   | Pilot       | Netherlands | 71698  | SALES      |
+    | 1043 | Jane    | Nurse       | Brazil      | 45016  | FINANCE    |
+    | 1046 | Joseph  | Pharmacist  | Mexico      | 109152 | OPERATIONS |
+    | 1064 | Joseph  | Electrician | New Zealand | 50253  | LEGAL      |
+    | 1090 | Matthew | Psychologist| Germany     | 73396  | DATA       |
+    | 1103 | Emily   | Electrician | Switzerland | 98391  | DATA       |
+    | 1114 | Jake    | Nurse       | Denmark     | 53418  | SALES      |
+    | 1115 | Sofia   | Engineer    | Mexico      | 64829  | OPERATIONS |
+    | 1122 | Oliver  | Scientist   | Netherlands | 31146  | DATA       |
+    +------+---------+-------------+-------------+--------+------------+
diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md
index 10be21cc0..0e000756e 100644
--- a/docs/ppl-lang/ppl-parse-command.md
+++ b/docs/ppl-lang/ppl-parse-command.md
@@ -1,4 +1,4 @@
-## PPL Parse Command
+## PPL `parse` command
 
 
 ### Description
diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md
index e3ad21f4e..93967e6fe 100644
--- a/docs/ppl-lang/ppl-rare-command.md
+++ b/docs/ppl-lang/ppl-rare-command.md
@@ -1,11 +1,11 @@
-## PPL rare Command
+## PPL `rare` command
 
-**Description**
-Using ``rare`` command to find the least common tuple of values of all fields in the field list.
+### Description
+Using `rare` command to find the least common tuple of values of all fields in the field list.
 
 **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
 
-**Syntax**
+### Syntax
 `rare [N] <field-list> [by-clause]`
 `rare_approx [N] <field-list> [by-clause]`
 
diff --git a/docs/ppl-lang/ppl-search-command.md b/docs/ppl-lang/ppl-search-command.md
index bccfd04f0..6e1cf0e50 100644
--- a/docs/ppl-lang/ppl-search-command.md
+++ b/docs/ppl-lang/ppl-search-command.md
@@ -1,7 +1,7 @@
 ## PPL `search` command
 
 ### Description
-Using ``search`` command to retrieve document from the index. ``search`` command could be only used as the first command in the PPL query.
+Using `search` command to retrieve document from the index. `search` command could be only used as the first command in the PPL query.
 
 
 ### Syntax
diff --git a/docs/ppl-lang/ppl-sort-command.md b/docs/ppl-lang/ppl-sort-command.md
index c3bf304d7..dd9b4b33d 100644
--- a/docs/ppl-lang/ppl-sort-command.md
+++ b/docs/ppl-lang/ppl-sort-command.md
@@ -1,7 +1,7 @@
-## PPL `sort`command
+## PPL `sort` command
 
 ### Description
-Using ``sort`` command to sorts all the search result by the specified fields.
+Using `sort` command to sorts all the search result by the specified fields.
 
 
 ### Syntax
diff --git a/docs/ppl-lang/ppl-stats-command.md b/docs/ppl-lang/ppl-stats-command.md
index 552f83e46..a73800b26 100644
--- a/docs/ppl-lang/ppl-stats-command.md
+++ b/docs/ppl-lang/ppl-stats-command.md
@@ -1,7 +1,7 @@
 ## PPL `stats` command
 
 ### Description
-Using ``stats`` command to calculate the aggregation from search result.
+Using `stats` command to calculate the aggregation from search result.
  
 ### NULL/MISSING values handling:
 
diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md
index c4a0c337c..766b37130 100644
--- a/docs/ppl-lang/ppl-subquery-command.md
+++ b/docs/ppl-lang/ppl-subquery-command.md
@@ -1,27 +1,27 @@
-## PPL SubQuery Commands:
+## PPL `subquery` command
 
-### Syntax
-The subquery command should be implemented using a clean, logical syntax that integrates with existing PPL structure.
+### Description
+The subquery commands contain 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`.
+`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are subquery expressions, their common usage is in Where clause(`where <boolean expression>`) and Search filter(`search source=* <boolean expression>`).
 
-```sql
-source=logs | where field in [ subquery source=events | where condition | fields field ]
+For example, a subquery expression could be used in boolean expression:
 ```
-
-In this example, the primary search (`source=logs`) is filtered by results from the subquery (`source=events`).
-
-The subquery command should allow nested queries to be as complex as necessary, supporting multiple levels of nesting.
-
-Example:
-
-```sql
-  source=logs | where id in [ subquery source=users | where user in [ subquery source=actions | where action="login" | fields user] | fields uid ]
+| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ]
 ```
+The `orders.order_id in [ source=... ]` is a `<boolean expression>`.
 
-For additional info See [Issue](https://github.com/opensearch-project/opensearch-spark/issues/661)
-
----
+But `RelationSubquery` is not a subquery expression, it is a subquery plan.
+[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**.
 
-### InSubquery usage
+### Syntax
+- `where <field> [not] in [ source=... | ... | ... ]` (InSubquery)
+- `where [not] exists [ source=... | ... | ... ]` (ExistsSubquery)
+- `where <field> = [ source=... | ... | ... ]` (ScalarSubquery)
+- `source=[ source= ...]` (RelationSubquery)
+- `| join ON condition [ source= ]` (RelationSubquery in join right side)
+
+### Usage
+InSubquery:
 - `source = outer | where a in [ source = inner | fields b ]`
 - `source = outer | where (a) in [ source = inner | fields b ]`
 - `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]`
@@ -33,92 +33,9 @@ For additional info See [Issue](https://github.com/opensearch-project/opensearch
 - `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested)
 - `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter)
 
-**_SQL Migration examples with IN-Subquery PPL:_**
-1. tpch q4 (in-subquery with aggregation)
-```sql
-select
-  o_orderpriority,
-  count(*) as order_count
-from
-  orders
-where
-  o_orderdate >= date '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and o_orderkey in (
-    select
-      l_orderkey
-    from
-      lineitem
-    where l_commitdate < l_receiptdate
-  )
-group by
-  o_orderpriority
-order by
-  o_orderpriority
-```
-Rewritten by PPL InSubquery query:
-```sql
-source = orders
-| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN
-  [ source = lineitem
-    | where l_commitdate < l_receiptdate
-    | fields l_orderkey
-  ]
-| stats count(1) as order_count by o_orderpriority
-| sort o_orderpriority
-| fields o_orderpriority, order_count
-```
-2.tpch q20 (nested in-subquery)
-```sql
-select
-  s_name,
-  s_address
-from
-  supplier,
-  nation
-where
-  s_suppkey in (
-    select
-      ps_suppkey
-    from
-      partsupp
-    where
-      ps_partkey in (
-        select
-          p_partkey
-        from
-          part
-        where
-          p_name like 'forest%'
-      )
-  )
-  and s_nationkey = n_nationkey
-  and n_name = 'CANADA'
-order by
-  s_name
-```
-Rewritten by PPL InSubquery query:
-```sql
-source = supplier
-| where s_suppkey IN [
-    source = partsupp
-    | where ps_partkey IN [
-        source = part
-        | where like(p_name, "forest%")
-        | fields p_partkey
-      ]
-    | fields ps_suppkey
-  ]
-| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA'
-  nation
-| sort s_name
-```
----
-
-### ExistsSubquery usage
-
-Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2
+ExistsSubquery:
 
+(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2)
 - `source = outer | where exists [ source = inner | where a = c ]`
 - `source = outer | where not exists [ source = inner | where a = c ]`
 - `source = outer | where exists [ source = inner | where a = c and b = d ]`
@@ -132,48 +49,9 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in
 - `source = outer | where not exists [ source = inner | where c > 10 ]` (uncorrelated exists)
 - `source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l` (special uncorrelated exists)
 
-**_SQL Migration examples with Exists-Subquery PPL:_**
-
-tpch q4 (exists subquery with aggregation)
-```sql
-select
-  o_orderpriority,
-  count(*) as order_count
-from
-  orders
-where
-  o_orderdate >= date '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and exists (
-    select
-      l_orderkey
-    from
-      lineitem
-    where l_orderkey = o_orderkey
-      and l_commitdate < l_receiptdate
-  )
-group by
-  o_orderpriority
-order by
-  o_orderpriority
-```
-Rewritten by PPL ExistsSubquery query:
-```sql
-source = orders
-| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01"
-    and exists [
-      source = lineitem
-      | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate
-    ]
-| stats count(1) as order_count by o_orderpriority
-| sort o_orderpriority
-| fields o_orderpriority, order_count
-```
----
-
-### ScalarSubquery usage
+ScalarSubquery:
 
-Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table nested
+(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table nested)
 
 **Uncorrelated scalar subquery in Select**
 - `source = outer | eval m = [ source = inner | stats max(c) ] | fields m, a`
@@ -203,146 +81,102 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in
 - `source = outer | where a = [ source = inner | stats max(c) | sort c ] OR b = [ source = inner | where c = 1 | stats min(d) | sort d ]`
 - `source = outer | where a = [ source = inner | where c =  [ source = nested | stats max(e) by f | sort f ] | stats max(d) by c | sort c | head 1 ]`
 
-_SQL Migration examples with Scalar-Subquery PPL:_
-Example 1
-```sql
-SELECT *
-FROM   outer
-WHERE  a = (SELECT   max(c)
-            FROM     inner1
-            WHERE c = (SELECT   max(e)
-                       FROM     inner2
-                       GROUP BY f
-                       ORDER BY f
-                       )
-            GROUP BY c
-            ORDER BY c
-            LIMIT 1)
-```
-Rewritten by PPL ScalarSubquery query:
-```sql
-source = spark_catalog.default.outer
-| where a = [
-    source = spark_catalog.default.inner1
-    | where c = [
-        source = spark_catalog.default.inner2
-        | stats max(e) by f
-        | sort f
-      ]
-    | stats max(d) by c
-    | sort c
-    | head 1
-  ]
-```
-Example 2
-```sql
-SELECT * FROM outer
-WHERE  a = (SELECT max(c)
-            FROM   inner
-            ORDER BY c)
-OR     b = (SELECT min(d)
-            FROM   inner
-            WHERE  c = 1
-            ORDER BY d)
-```
-Rewritten by PPL ScalarSubquery query:
-```sql
-source = spark_catalog.default.outer
-| where a = [
-    source = spark_catalog.default.inner | stats max(c) | sort c
-  ] OR b = [
-    source = spark_catalog.default.inner | where c = 1 | stats min(d) | sort d
-  ]
-```
----
-
-### (Relation) Subquery
-`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are all subquery expressions. But `RelationSubquery` is not a subquery expression, it is a subquery plan which is common used in Join or From clause.
-
-- `source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side)
+RelationSubquery:
+- `source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side)
 - `source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1`
 
-**_SQL Migration examples with Subquery PPL:_**
-
-tpch q13
-```sql
-select
-    c_count,
-    count(*) as custdist
-from
-    (
-        select
-            c_custkey,
-            count(o_orderkey) as c_count
-        from
-            customer left outer join orders on
-                c_custkey = o_custkey
-                and o_comment not like '%special%requests%'
-        group by
-            c_custkey
-    ) as c_orders
-group by
-    c_count
-order by
-    custdist desc,
-    c_count desc
-```
-Rewritten by PPL (Relation) Subquery:
-```sql
-SEARCH source = [
-  SEARCH source = customer
-  | LEFT OUTER JOIN left = c right = o ON c_custkey = o_custkey
-    [
-      SEARCH source = orders
-      | WHERE not like(o_comment, '%special%requests%')
-    ]
-  | STATS COUNT(o_orderkey) AS c_count BY c_custkey
-] AS c_orders
-| STATS COUNT(o_orderkey) AS c_count BY c_custkey
-| STATS COUNT(1) AS custdist BY c_count
-| SORT - custdist, - c_count
-```
----
+### Examples 1: TPC-H q20
+
+InSubquery and ScalarSubquery
+
+PPL query:
+
+    os> source=supplier
+        | join ON s_nationkey = n_nationkey nation
+        | where n_name = 'CANADA'
+            and s_suppkey in [                      // InSubquery
+                source = partsupp
+                | where ps_partkey in [             // InSubquery
+                    source = part
+                    | where like(p_name, 'forest%')
+                    | fields p_partkey
+                ]
+                and ps_availqty > [                 // ScalarSubquery
+                    source = lineitem
+                    | where l_partkey = ps_partkey
+                        and l_suppkey = ps_suppkey
+                        and l_shipdate >= date('1994-01-01')
+                        and l_shipdate < date_add(date('1994-01-01'), interval 1 year)
+                    | stats sum(l_quantity) as sum_l_quantity
+                    | eval half_sum_l_quantity = 0.5 * sum_l_quantity
+                    | fields half_sum_l_quantity
+                ]
+            | fields ps_suppkey
+        ]
+        | fields s_suppkey, s_name, s_phone, s_acctbal, n_name | head 10
+    fetched rows / total rows = 10/10
+    +-----------+---------------------+----------------+----------+---------+
+    | s_suppkey | s_name              | s_phone        | s_acctbal| n_name  |
+    +-----------+---------------------+----------------+----------+---------+
+    | 8243      | Supplier#000008243  | 13-707-547-1386| 9067.07  | CANADA  |
+    | 736       | Supplier#000000736  | 13-681-806-8650| 5700.83  | CANADA  |
+    | 9032      | Supplier#000009032  | 13-441-662-5539| 3982.32  | CANADA  |
+    | 3201      | Supplier#000003201  | 13-600-413-7165| 3799.41  | CANADA  |
+    | 3849      | Supplier#000003849  | 13-582-965-9117| 52.33    | CANADA  |
+    | 5505      | Supplier#000005505  | 13-531-190-6523| 2023.4   | CANADA  |
+    | 5195      | Supplier#000005195  | 13-622-661-2956| 3717.34  | CANADA  |
+    | 9753      | Supplier#000009753  | 13-724-256-7877| 4406.93  | CANADA  |
+    | 7135      | Supplier#000007135  | 13-367-994-6705| 4950.29  | CANADA  |
+    | 5256      | Supplier#000005256  | 13-180-538-8836| 5624.79  | CANADA  |
+    +-----------+---------------------+----------------+----------+---------+
+
+
+### Examples 2: TPC-H q22
+
+RelationSubquery, ScalarSubquery and ExistsSubquery
+
+PPL query:
+
+    os> source = [                                  // RelationSubquery
+            source = customer
+            | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
+            and c_acctbal > [                       // ScalarSubquery
+                source = customer
+                | where c_acctbal > 0.00
+                    and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
+                | stats avg(c_acctbal)
+            ]
+            and not exists [                        // ExistsSubquery
+                source = orders
+                | where o_custkey = c_custkey
+            ]
+            | eval cntrycode = substring(c_phone, 1, 2)
+            | fields cntrycode, c_acctbal
+        ] as custsale
+        | stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode
+        | sort cntrycode
+    fetched rows / total rows = 10/10
+    +---------+--------------------+------------+
+    | numcust | totacctbal         | cntrycode  |
+    +---------+--------------------+------------+
+    | 888     | 6737713.989999999  | 13         |
+    | 861     | 6460573.72         | 17         |
+    | 964     | 7236687.4          | 18         |
+    | 892     | 6701457.950000001  | 23         |
+    | 948     | 7158866.630000001  | 29         |
+    | 909     | 6808436.129999999  | 30         |
+    | 922     | 6806670.179999999  | 31         |
+    +---------+--------------------+------------+
 
 ### Additional Context
 
-`InSubquery`, `ExistsSubquery` and `ScalarSubquery` as subquery expressions, their common usage is in `where` clause and `search filter`.
-
-Where command:
-```
-| where <boolean expression> | ...
-```
-Search filter:
-```
-search source=* <boolean expression> | ...
-```
-A subquery expression could be used in boolean expression, for example
-
-```sql
-| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ]
-```
-
-The `orders.order_id in [ source=... ]` is a `<boolean expression>`.
-
-In general, we name this kind of subquery clause the `InSubquery` expression, it is a `<boolean expression>`.
-
-**Subquery with Different Join Types**
+#### RelationSubquery
 
-In issue description is a `ScalarSubquery`:
-
-```sql
-source=employees
-| join source=sales on employees.employee_id = sales.employee_id
-| where sales.sale_amount > [ source=targets | where target_met="true" | fields target_value ]
+RelationSubquery is plan instead of expression, for example
 ```
-
-But `RelationSubquery` is not a subquery expression, it is a subquery plan.
-[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**.
-
-```sql
-SEARCH source=customer
+source=customer
 | FIELDS c_custkey
-| LEFT OUTER JOIN left = c, right = o ON c.c_custkey = o.o_custkey
+| LEFT OUTER JOIN left = c right = o ON c.c_custkey = o.o_custkey
    [
       SEARCH source=orders
       | WHERE o_comment NOT LIKE '%unusual%packages%'
@@ -351,7 +185,7 @@ SEARCH source=customer
 | STATS ...
 ```
 simply into
-```sql
+```
 SEARCH <leftPlan>
 | LEFT OUTER JOIN ON <condition>
    [
@@ -359,21 +193,14 @@ SEARCH <leftPlan>
    ]
 | STATS ...
 ```
-Apply the syntax here and simply into
-
-```sql
-search <leftPlan> | left join on <condition> [ search ... ]
-```
-
-The `[ search ...]` is not a `expression`, it's `plan`, similar to the `relation` plan
 
-**Uncorrelated Subquery**
+#### Uncorrelated Subquery
 
 An uncorrelated subquery is independent of the outer query. It is executed once, and the result is used by the outer query.
 It's **less common** when using `ExistsSubquery` because `ExistsSubquery` typically checks for the presence of rows that are dependent on the outer query’s row.
 
 There is a very special exists subquery which highlight by `(special uncorrelated exists)`:
-```sql
+```
 SELECT 'nonEmpty'
 FROM outer
     WHERE EXISTS (
@@ -382,7 +209,7 @@ FROM outer
     );
 ```
 Rewritten by PPL ExistsSubquery query:
-```sql
+```
 source = outer
 | where exists [
     source = inner
@@ -392,11 +219,11 @@ source = outer
 ```
 This query just print "nonEmpty" if the inner table is not empty.
 
-**Table alias in subquery**
+#### Table alias in subquery
 
 Table alias is useful in query which contains a subquery, for example
 
-```sql
+```
 select a, (
              select sum(b)
              from catalog.schema.table1 as t1
diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md
index 93d3a7148..2bacdba50 100644
--- a/docs/ppl-lang/ppl-top-command.md
+++ b/docs/ppl-lang/ppl-top-command.md
@@ -1,6 +1,6 @@
-## PPL top Command
+## PPL `top` command
 
-**Description**
+### Description
 Using ``top`` command to find the most common tuple of values of all fields in the field list.
 
 
diff --git a/docs/ppl-lang/ppl-trendline-command.md b/docs/ppl-lang/ppl-trendline-command.md
index b466e2e8f..b2be172cd 100644
--- a/docs/ppl-lang/ppl-trendline-command.md
+++ b/docs/ppl-lang/ppl-trendline-command.md
@@ -1,7 +1,7 @@
-## PPL trendline Command
+## PPL `trendline` command
 
-**Description**
-Using ``trendline`` command to calculate moving averages of fields.
+### Description
+Using `trendline` command to calculate moving averages of fields.
 
 ### Syntax - SMA (Simple Moving Average)
 `TRENDLINE [sort <[+|-] sort-field>] SMA(number-of-datapoints, field) [AS alias] [SMA(number-of-datapoints, field) [AS alias]]...`
diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md
index aa7d9299e..ec676ab62 100644
--- a/docs/ppl-lang/ppl-where-command.md
+++ b/docs/ppl-lang/ppl-where-command.md
@@ -1,4 +1,4 @@
-## PPL where Command
+## PPL `where` command
 
 ### Description
 The ``where`` command bool-expression to filter the search result. The ``where`` command only return the result when bool-expression evaluated to true.