From cb813c158f362576f90e5646a3d05ddf22cefc6d Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Sat, 12 Oct 2024 00:10:58 +0800
Subject: [PATCH 1/5] AND should have higher precedence than OR in predicate
 expression (#771)

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 .../src/main/antlr4/OpenSearchPPLParser.g4    |  2 +-
 ...ogicalPlanFiltersTranslatorTestSuite.scala | 34 +++++++++++++------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
index 7521faefe..af7e0ec14 100644
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
@@ -358,8 +358,8 @@ expression
 logicalExpression
    : comparisonExpression                                       # comparsion
    | NOT logicalExpression                                      # logicalNot
-   | left = logicalExpression OR right = logicalExpression      # logicalOr
    | left = logicalExpression (AND)? right = logicalExpression  # logicalAnd
+   | left = logicalExpression OR right = logicalExpression      # logicalOr
    | left = logicalExpression XOR right = logicalExpression     # logicalXor
    | booleanExpression                                          # booleanExpr
    | isEmptyExpression                                          # isEmptyExpr
diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanFiltersTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanFiltersTranslatorTestSuite.scala
index 407b3df84..20809db95 100644
--- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanFiltersTranslatorTestSuite.scala
+++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanFiltersTranslatorTestSuite.scala
@@ -5,23 +5,15 @@
 
 package org.opensearch.flint.spark.ppl
 
-import org.junit.Assert.assertEquals
-import org.mockito.Mockito.when
 import org.opensearch.flint.spark.ppl.PlaneUtils.plan
 import org.opensearch.sql.ppl.{CatalystPlanContext, CatalystQueryPlanVisitor}
 import org.scalatest.matchers.should.Matchers
-import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, TableFunctionRegistry, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
-import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, Ascending, Descending, Divide, EqualTo, Floor, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Like, Literal, NamedExpression, Not, Or, SortOrder, UnixTimestamp}
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{And, Ascending, EqualTo, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Literal, Not, Or, SortOrder}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 class PPLLogicalPlanFiltersTranslatorTestSuite
     extends SparkFunSuite
@@ -219,4 +211,26 @@ class PPLLogicalPlanFiltersTranslatorTestSuite
 
     comparePlans(expectedPlan, logPlan, false)
   }
+
+  test("test order of evaluation of predicate expression") {
+    val context = new CatalystPlanContext
+    val logPlan = planTransformer.visit(
+      plan(
+        pplParser,
+        "source=employees | where department = 'HR' OR job_title = 'Manager' AND salary > 50000"),
+      context)
+
+    val table = UnresolvedRelation(Seq("employees"))
+    val filter =
+      Filter(
+        Or(
+          EqualTo(UnresolvedAttribute("department"), Literal("HR")),
+          And(
+            EqualTo(UnresolvedAttribute("job_title"), Literal("Manager")),
+            GreaterThan(UnresolvedAttribute("salary"), Literal(50000)))),
+        table)
+
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), filter)
+    comparePlans(expectedPlan, logPlan, false)
+  }
 }

From fe5148cb4968917e532a1ddd745a80d4f58e576b Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Sat, 12 Oct 2024 00:19:34 +0800
Subject: [PATCH 2/5] Support table identifier contains dot with backticks
 (#768)

* Support table identifier contains dot with backticks

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* also fix the bug of describe

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* add doc

Signed-off-by: Lantao Jin <ltjin@amazon.com>

---------

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 docs/ppl-lang/PPL-Example-Commands.md         |  5 ++
 .../spark/ppl/FlintSparkPPLBasicITSuite.scala | 83 ++++++++++++++++++-
 .../org/opensearch/sql/ast/tree/Relation.java |  4 +
 .../sql/ppl/CatalystQueryPlanVisitor.java     | 25 ++----
 .../sql/ppl/utils/RelationUtils.java          | 26 +++++-
 ...lPlanBasicQueriesTranslatorTestSuite.scala | 21 ++++-
 6 files changed, 138 insertions(+), 26 deletions(-)

diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md
index c553d483f..7d57651c3 100644
--- a/docs/ppl-lang/PPL-Example-Commands.md
+++ b/docs/ppl-lang/PPL-Example-Commands.md
@@ -2,6 +2,11 @@
 
 #### **Describe**
 - `describe table`  This command is equal to the `DESCRIBE EXTENDED table` SQL command
+- `describe schema.table`
+- `` describe schema.`table` ``
+- `describe catalog.schema.table`
+- `` describe catalog.schema.`table` ``
+- `` describe `catalog`.`schema`.`table` ``
 
 #### **Explain**
 - `explain simple | source = table | where a = 1 | fields a,b,c`
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBasicITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBasicITSuite.scala
index 4c38e1471..cbc4308b0 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBasicITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBasicITSuite.scala
@@ -5,7 +5,7 @@
 
 package org.opensearch.flint.spark.ppl
 
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Descending, EqualTo, IsNotNull, Literal, Not, SortOrder}
@@ -22,12 +22,20 @@ class FlintSparkPPLBasicITSuite
 
   /** Test table and index name */
   private val testTable = "spark_catalog.default.flint_ppl_test"
+  private val t1 = "`spark_catalog`.`default`.`flint_ppl_test1`"
+  private val t2 = "`spark_catalog`.default.`flint_ppl_test2`"
+  private val t3 = "spark_catalog.`default`.`flint_ppl_test3`"
+  private val t4 = "`spark_catalog`.`default`.flint_ppl_test4"
 
   override def beforeAll(): Unit = {
     super.beforeAll()
 
     // Create test table
     createPartitionedStateCountryTable(testTable)
+    createPartitionedStateCountryTable(t1)
+    createPartitionedStateCountryTable(t2)
+    createPartitionedStateCountryTable(t3)
+    createPartitionedStateCountryTable(t4)
   }
 
   protected override def afterEach(): Unit = {
@@ -516,4 +524,77 @@ class FlintSparkPPLBasicITSuite
     // Compare the two plans
     comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
   }
+
+  test("test backtick table names and name contains '.'") {
+    Seq(t1, t2, t3, t4).foreach { table =>
+      val frame = sql(s"""
+           | source = $table| head 2
+           | """.stripMargin)
+      assert(frame.collect().length == 2)
+    }
+    // test read table which is unable to create
+    val t5 = "`spark_catalog`.default.`flint/ppl/test5.log`"
+    val t6 = "spark_catalog.default.`flint_ppl_test6.log`"
+    Seq(t5, t6).foreach { table =>
+      val ex = intercept[AnalysisException](sql(s"""
+           | source = $table| head 2
+           | """.stripMargin))
+      assert(ex.getMessage().contains("TABLE_OR_VIEW_NOT_FOUND"))
+    }
+    val t7 = "spark_catalog.default.flint_ppl_test7.log"
+    val ex = intercept[IllegalArgumentException](sql(s"""
+           | source = $t7| head 2
+           | """.stripMargin))
+    assert(ex.getMessage().contains("Invalid table name"))
+  }
+
+  test("test describe backtick table names and name contains '.'") {
+    Seq(t1, t2, t3, t4).foreach { table =>
+      val frame = sql(s"""
+           | describe $table
+           | """.stripMargin)
+      assert(frame.collect().length > 0)
+    }
+    // test read table which is unable to create
+    val t5 = "`spark_catalog`.default.`flint/ppl/test5.log`"
+    val t6 = "spark_catalog.default.`flint_ppl_test6.log`"
+    Seq(t5, t6).foreach { table =>
+      val ex = intercept[AnalysisException](sql(s"""
+           | describe $table
+           | """.stripMargin))
+      assert(ex.getMessage().contains("TABLE_OR_VIEW_NOT_FOUND"))
+    }
+    val t7 = "spark_catalog.default.flint_ppl_test7.log"
+    val ex = intercept[IllegalArgumentException](sql(s"""
+           | describe $t7
+           | """.stripMargin))
+    assert(ex.getMessage().contains("Invalid table name"))
+  }
+
+  test("test explain backtick table names and name contains '.'") {
+    Seq(t1, t2, t3, t4).foreach { table =>
+      val frame = sql(s"""
+           | explain extended | source = $table
+           | """.stripMargin)
+      assert(frame.collect().length > 0)
+    }
+    // test read table which is unable to create
+    val table = "`spark_catalog`.default.`flint/ppl/test4.log`"
+    val frame = sql(s"""
+           | explain extended | source = $table
+           | """.stripMargin)
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+    val relation = UnresolvedRelation(Seq("spark_catalog", "default", "flint/ppl/test4.log"))
+    val expectedPlan: LogicalPlan =
+      ExplainCommand(
+        Project(Seq(UnresolvedStar(None)), relation),
+        ExplainMode.fromString("extended"))
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+
+    val t7 = "spark_catalog.default.flint_ppl_test7.log"
+    val ex = intercept[IllegalArgumentException](sql(s"""
+          | explain extended | source = $t7
+          | """.stripMargin))
+    assert(ex.getMessage().contains("Invalid table name"))
+  }
 }
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Relation.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Relation.java
index cb9bbd64d..e1732f75f 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Relation.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Relation.java
@@ -49,6 +49,10 @@ public List<String> getTableName() {
     return tableName.stream().map(Object::toString).collect(Collectors.toList());
   }
 
+  public List<QualifiedName> getQualifiedNames() {
+    return tableName.stream().map(t -> (QualifiedName) t).collect(Collectors.toList());
+  }
+
   /**
    * Return alias.
    *
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
index 26ad4198a..38dc4092e 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
@@ -86,7 +86,6 @@
 import org.opensearch.sql.ppl.utils.ParseStrategy;
 import org.opensearch.sql.ppl.utils.SortUtils;
 import scala.Option;
-import scala.Option$;
 import scala.Tuple2;
 import scala.collection.IterableLike;
 import scala.collection.Seq;
@@ -111,6 +110,7 @@
 import static org.opensearch.sql.ppl.utils.LookupTransformer.buildLookupRelationProjectList;
 import static org.opensearch.sql.ppl.utils.LookupTransformer.buildOutputProjectList;
 import static org.opensearch.sql.ppl.utils.LookupTransformer.buildProjectListFromFields;
+import static org.opensearch.sql.ppl.utils.RelationUtils.getTableIdentifier;
 import static org.opensearch.sql.ppl.utils.RelationUtils.resolveField;
 import static org.opensearch.sql.ppl.utils.WindowSpecTransformer.window;
 
@@ -150,22 +150,7 @@ public LogicalPlan visitExplain(Explain node, CatalystPlanContext context) {
     @Override
     public LogicalPlan visitRelation(Relation node, CatalystPlanContext context) {
         if (node instanceof DescribeRelation) {
-            TableIdentifier identifier;
-            if (node.getTableQualifiedName().getParts().size() == 1) {
-                identifier = new TableIdentifier(node.getTableQualifiedName().getParts().get(0));
-            } else if (node.getTableQualifiedName().getParts().size() == 2) {
-                identifier = new TableIdentifier(
-                        node.getTableQualifiedName().getParts().get(1),
-                        Option$.MODULE$.apply(node.getTableQualifiedName().getParts().get(0)));
-            } else if (node.getTableQualifiedName().getParts().size() == 3) {
-                identifier = new TableIdentifier(
-                        node.getTableQualifiedName().getParts().get(2),
-                        Option$.MODULE$.apply(node.getTableQualifiedName().getParts().get(0)),
-                        Option$.MODULE$.apply(node.getTableQualifiedName().getParts().get(1)));
-            } else {
-                throw new IllegalArgumentException("Invalid table name: " + node.getTableQualifiedName()
-                        + " Syntax: [ database_name. ] table_name");
-            }
+            TableIdentifier identifier = getTableIdentifier(node.getTableQualifiedName());
             return context.with(
                     new DescribeTableCommand(
                             identifier,
@@ -174,9 +159,9 @@ public LogicalPlan visitRelation(Relation node, CatalystPlanContext context) {
                             DescribeRelation$.MODULE$.getOutputAttrs()));
         }
         //regular sql algebraic relations
-        node.getTableName().forEach(t ->
+        node.getQualifiedNames().forEach(q ->
                 // Resolving the qualifiedName which is composed of a datasource.schema.table
-                context.withRelation(new UnresolvedRelation(seq(of(t.split("\\."))), CaseInsensitiveStringMap.empty(), false))
+                context.withRelation(new UnresolvedRelation(getTableIdentifier(q).nameParts(), CaseInsensitiveStringMap.empty(), false))
         );
         return context.getPlan();
     }
@@ -325,7 +310,7 @@ public LogicalPlan visitAggregation(Aggregation node, CatalystPlanContext contex
                             seq(new ArrayList<Expression>())));
             context.apply(p -> new org.apache.spark.sql.catalyst.plans.logical.Sort(sortElements, true, logicalPlan));
         }
-        //visit TopAggregation results limit 
+        //visit TopAggregation results limit
         if ((node instanceof TopAggregation) && ((TopAggregation) node).getResults().isPresent()) {
             context.apply(p -> (LogicalPlan) Limit.apply(new org.apache.spark.sql.catalyst.expressions.Literal(
                     ((TopAggregation) node).getResults().get().getValue(), org.apache.spark.sql.types.DataTypes.IntegerType), p));
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/RelationUtils.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/RelationUtils.java
index 33cb5611d..7be7f1f45 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/RelationUtils.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/RelationUtils.java
@@ -1,8 +1,10 @@
 package org.opensearch.sql.ppl.utils;
 
+import org.apache.spark.sql.catalyst.TableIdentifier;
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation;
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
 import org.opensearch.sql.ast.expression.QualifiedName;
+import scala.Option$;
 
 import java.util.List;
 import java.util.Optional;
@@ -15,7 +17,7 @@ public interface RelationUtils {
      *
      * @param relations
      * @param node
-     * @param contextRelations
+     * @param tables
      * @return
      */
     static Optional<QualifiedName> resolveField(List<UnresolvedRelation> relations, QualifiedName node, List<LogicalPlan> tables) {
@@ -29,4 +31,26 @@ static Optional<QualifiedName> resolveField(List<UnresolvedRelation> relations,
                 .findFirst()
                 .map(rel -> node);
     }
+
+    static TableIdentifier getTableIdentifier(QualifiedName qualifiedName) {
+        TableIdentifier identifier;
+        if (qualifiedName.getParts().isEmpty()) {
+            throw new IllegalArgumentException("Empty table name is invalid");
+        } else if (qualifiedName.getParts().size() == 1) {
+            identifier = new TableIdentifier(qualifiedName.getParts().get(0));
+        } else if (qualifiedName.getParts().size() == 2) {
+            identifier = new TableIdentifier(
+                qualifiedName.getParts().get(1),
+                Option$.MODULE$.apply(qualifiedName.getParts().get(0)));
+        } else if (qualifiedName.getParts().size() == 3) {
+            identifier = new TableIdentifier(
+                qualifiedName.getParts().get(2),
+                Option$.MODULE$.apply(qualifiedName.getParts().get(1)),
+                Option$.MODULE$.apply(qualifiedName.getParts().get(0)));
+        } else {
+            throw new IllegalArgumentException("Invalid table name: " + qualifiedName
+                + " Syntax: [ database_name. ] table_name");
+        }
+        return identifier;
+    }
 }
diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala
index cc87e8853..96176982e 100644
--- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala
+++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala
@@ -37,13 +37,26 @@ class PPLLogicalPlanBasicQueriesTranslatorTestSuite
       thrown.getMessage === "Invalid table name: t.b.c.d Syntax: [ database_name. ] table_name")
   }
 
+  test("test describe with backticks") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(plan(pplParser, "describe t.b.`c.d`"), context)
+
+    val expectedPlan = DescribeTableCommand(
+      TableIdentifier("c.d", Option("b"), Option("t")),
+      Map.empty[String, String].empty,
+      isExtended = true,
+      output = DescribeRelation.getOutputAttrs)
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
   test("test describe FQN table clause") {
     val context = new CatalystPlanContext
     val logPlan =
-      planTransformer.visit(plan(pplParser, "describe schema.default.http_logs"), context)
+      planTransformer.visit(plan(pplParser, "describe catalog.schema.http_logs"), context)
 
     val expectedPlan = DescribeTableCommand(
-      TableIdentifier("http_logs", Option("schema"), Option("default")),
+      TableIdentifier("http_logs", Option("schema"), Option("catalog")),
       Map.empty[String, String].empty,
       isExtended = true,
       output = DescribeRelation.getOutputAttrs)
@@ -64,10 +77,10 @@ class PPLLogicalPlanBasicQueriesTranslatorTestSuite
 
   test("test FQN table describe table clause") {
     val context = new CatalystPlanContext
-    val logPlan = planTransformer.visit(plan(pplParser, "describe catalog.t"), context)
+    val logPlan = planTransformer.visit(plan(pplParser, "describe schema.t"), context)
 
     val expectedPlan = DescribeTableCommand(
-      TableIdentifier("t", Option("catalog")),
+      TableIdentifier("t", Option("schema")),
       Map.empty[String, String].empty,
       isExtended = true,
       output = DescribeRelation.getOutputAttrs)

From d83f61dc7845132743d834a13a79c25bba4bf6d1 Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Sat, 12 Oct 2024 11:21:57 +0800
Subject: [PATCH 3/5] Support Exists Subquery in PPL (#769)

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 docs/ppl-lang/PPL-Example-Commands.md         |  61 +--
 docs/ppl-lang/ppl-subquery-command.md         |  90 ++++-
 .../FlintSparkPPLExistsSubqueryITSuite.scala  | 373 ++++++++++++++++++
 .../ppl/FlintSparkPPLInSubqueryITSuite.scala  |   3 -
 .../src/main/antlr4/OpenSearchPPLLexer.g4     |   1 +
 .../src/main/antlr4/OpenSearchPPLParser.g4    |  12 +-
 .../sql/ast/AbstractNodeVisitor.java          |   9 +-
 .../expression/subquery/ExistsSubquery.java   |  29 ++
 .../expression/{ => subquery}/InSubquery.java |   4 +-
 .../{ => subquery}/ScalarSubquery.java        |   3 +-
 .../sql/ppl/CatalystQueryPlanVisitor.java     |  20 +-
 .../sql/ppl/parser/AstExpressionBuilder.java  |  10 +-
 ...lanExistsSubqueryTranslatorTestSuite.scala | 315 +++++++++++++++
 13 files changed, 860 insertions(+), 70 deletions(-)
 create mode 100644 integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLExistsSubqueryITSuite.scala
 create mode 100644 ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ExistsSubquery.java
 rename ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/{ => subquery}/InSubquery.java (87%)
 rename ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/{ => subquery}/ScalarSubquery.java (84%)
 create mode 100644 ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanExistsSubqueryTranslatorTestSuite.scala

diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md
index 7d57651c3..8e6cbaae9 100644
--- a/docs/ppl-lang/PPL-Example-Commands.md
+++ b/docs/ppl-lang/PPL-Example-Commands.md
@@ -273,7 +273,7 @@ _- **Limitation: "REPLACE" or "APPEND" clause must contain "AS"**_
 
 **SQL Migration examples with IN-Subquery PPL:**
 
-1. tpch q4 (in-subquery with aggregation)
+tpch q4 (in-subquery with aggregation)
 ```sql
 select
   o_orderpriority,
@@ -309,52 +309,21 @@ source = orders
 | fields o_orderpriority, order_count
 ```
 
-2.tpch q20 (nested in-subquery)
-```sql
-select
-  s_name,
-  s_address
-from
-  supplier,
-  nation
-where
-  s_suppkey in (
-    select
-      ps_suppkey
-    from
-      partsupp
-    where
-      ps_partkey in (
-        select
-          p_partkey
-        from
-          part
-        where
-          p_name like 'forest%'
-      )
-  )
-  and s_nationkey = n_nationkey
-  and n_name = 'CANADA'
-order by
-  s_name
-```
+#### **ExistsSubquery**
+[See additional command details](ppl-subquery-command.md)
+
+Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2
+- `source = outer | where exists [ source = inner | where a = c ]`
+- `source = outer | where not exists [ source = inner | where a = c ]`
+- `source = outer | where exists [ source = inner | where a = c and b = d ]`
+- `source = outer | where not exists [ source = inner | where a = c and b = d ]`
+- `source = outer | where exists [ source = inner1 | where a = c and exists [ source = inner2 | where c = e ] ]` (nested)
+- `source = outer | where exists [ source = inner1 | where a = c | where exists [ source = inner2 | where c = e ] ]` (nested)
+- `source = outer | where exists [ source = inner | where c > 10 ]` (uncorrelated exists)
+- `source = outer | where not exists [ source = inner | where c > 10 ]` (uncorrelated exists)
+- `source = outer | where exists [ source = inner ] | eval l = "Bala" | fields l` (special uncorrelated exists)
+
 
-Rewritten by PPL InSubquery query:
-```sql
-source = supplier
-| where s_suppkey IN [
-    source = partsupp
-    | where ps_partkey IN [
-        source = part
-        | where like(p_name, "forest%")
-        | fields p_partkey
-      ]
-    | fields ps_suppkey
-  ]
-| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA'
-  nation
-| sort s_name
-```
 #### **ScalarSubquery**
 [See additional command details](ppl-subquery-command.md)
 
diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md
index 1762306d2..ac0f98fe8 100644
--- a/docs/ppl-lang/ppl-subquery-command.md
+++ b/docs/ppl-lang/ppl-subquery-command.md
@@ -112,6 +112,58 @@ source = supplier
 | sort s_name
 ```
 
+**ExistsSubquery usage**
+
+Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2
+
+- `source = outer | where exists [ source = inner | where a = c ]`
+- `source = outer | where not exists [ source = inner | where a = c ]`
+- `source = outer | where exists [ source = inner | where a = c and b = d ]`
+- `source = outer | where not exists [ source = inner | where a = c and b = d ]`
+- `source = outer | where exists [ source = inner1 | where a = c and exists [ source = inner2 | where c = e ] ]` (nested)
+- `source = outer | where exists [ source = inner1 | where a = c | where exists [ source = inner2 | where c = e ] ]` (nested)
+- `source = outer | where exists [ source = inner | where c > 10 ]` (uncorrelated exists)
+- `source = outer | where not exists [ source = inner | where c > 10 ]` (uncorrelated exists)
+- `source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l` (special uncorrelated exists)
+
+**_SQL Migration examples with Exists-Subquery PPL:_**
+
+tpch q4 (exists subquery with aggregation)
+```sql
+select
+  o_orderpriority,
+  count(*) as order_count
+from
+  orders
+where
+  o_orderdate >= date '1993-07-01'
+  and o_orderdate < date '1993-07-01' + interval '3' month
+  and exists (
+    select
+      l_orderkey
+    from
+      lineitem
+    where l_orderkey = o_orderkey
+      and l_commitdate < l_receiptdate
+  )
+group by
+  o_orderpriority
+order by
+  o_orderpriority
+```
+Rewritten by PPL ExistsSubquery query:
+```sql
+source = orders
+| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01"
+    and exists [
+      source = lineitem
+      | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate
+    ]
+| stats count(1) as order_count by o_orderpriority
+| sort o_orderpriority
+| fields o_orderpriority, order_count
+```
+
 **ScalarSubquery usage**
 
 Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table nested
@@ -191,14 +243,14 @@ source = spark_catalog.default.outer
 
 ### **Additional Context**
 
-The most cases in the description is to request a `InSubquery` expression.
+`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are all subquery expression. The common usage of subquery expression is in `where` clause:
 
 The `where` command syntax is:
 
 ```
 | where <boolean expression>
 ```
-So the subquery in description is part of boolean expression, such as
+So the subquery is part of boolean expression, such as
 
 ```sql
 | where orders.order_id in (subquery source=returns | where return_reason="damaged" | return order_id)
@@ -217,10 +269,11 @@ In issue description is a `ScalarSubquery`:
 ```sql
 source=employees
 | join source=sales on employees.employee_id = sales.employee_id
-| where sales.sale_amount > (subquery source=targets | where target_met="true" | return target_value)
+| where sales.sale_amount > [ source=targets | where target_met="true" | fields target_value ]
 ```
 
-Recall the join command doc: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/PPL-Join-command.md#more-examples, the example is a subquery/subsearch **plan**, rather than a **expression**.
+But `RelationSubquery` is not a subquery expression, it is a subquery plan.
+[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**.
 
 ```sql
 SEARCH source=customer
@@ -245,7 +298,32 @@ SEARCH <leftPlan>
 Apply the syntax here and simply into
 
 ```sql
-search <leftPlan> | left join on <condition> (subquery search ...)
+search <leftPlan> | left join on <condition> [ search ... ]
 ```
 
-The `(subquery search ...)` is not a `expression`, it's `plan`, similar to the `relation` plan
\ No newline at end of file
+The `[ search ...]` is not a `expression`, it's `plan`, similar to the `relation` plan
+
+**Uncorrelated Subquery**
+
+An uncorrelated subquery is independent of the outer query. It is executed once, and the result is used by the outer query.
+It's **less common** when using `ExistsSubquery` because `ExistsSubquery` typically checks for the presence of rows that are dependent on the outer query’s row.
+
+There is a very special exists subquery which highlight by `(special uncorrelated exists)`:
+```sql
+SELECT 'nonEmpty'
+FROM outer
+    WHERE EXISTS (
+        SELECT *
+        FROM inner
+    );
+```
+Rewritten by PPL ExistsSubquery query:
+```sql
+source = outer
+| where exists [
+    source = inner
+  ]
+| eval l = "nonEmpty"
+| fields l
+```
+This query just print "nonEmpty" if the inner table is not empty.
\ No newline at end of file
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLExistsSubqueryITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLExistsSubqueryITSuite.scala
new file mode 100644
index 000000000..81bdd99df
--- /dev/null
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLExistsSubqueryITSuite.scala
@@ -0,0 +1,373 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.ppl
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Ascending, Descending, EqualTo, Exists, GreaterThan, InSubquery, ListQuery, Literal, Not, Or, ScalarSubquery, SortOrder}
+import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.streaming.StreamTest
+
+class FlintSparkPPLExistsSubqueryITSuite
+    extends QueryTest
+    with LogicalPlanTestUtils
+    with FlintPPLSuite
+    with StreamTest {
+
+  /** Test table and index name */
+  private val outerTable = "spark_catalog.default.flint_ppl_test1"
+  private val innerTable = "spark_catalog.default.flint_ppl_test2"
+  private val nestedInnerTable = "spark_catalog.default.flint_ppl_test3"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    createPeopleTable(outerTable)
+    sql(s"""
+           | INSERT INTO $outerTable
+           | VALUES (1006, 'Tommy', 'Teacher', 'USA', 30000)
+           | """.stripMargin)
+    createWorkInformationTable(innerTable)
+    createOccupationTable(nestedInnerTable)
+  }
+
+  protected override def afterEach(): Unit = {
+    super.afterEach()
+    // Stop all streaming jobs if any
+    spark.streams.active.foreach { job =>
+      job.stop()
+      job.awaitTermination()
+    }
+  }
+
+  test("test simple exists subquery") {
+    val frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable | where id = uid
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    val expectedResults: Array[Row] = Array(
+      Row(1002, "John", 120000),
+      Row(1003, "David", 120000),
+      Row(1000, "Jake", 100000),
+      Row(1005, "Jane", 90000),
+      Row(1006, "Tommy", 30000))
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val existsSubquery = Filter(
+      Exists(Filter(EqualTo(UnresolvedAttribute("id"), UnresolvedAttribute("uid")), inner)),
+      outer)
+    val sortedPlan = Sort(
+      Seq(SortOrder(UnresolvedAttribute("salary"), Descending)),
+      global = true,
+      existsSubquery)
+    val expectedPlan =
+      Project(
+        Seq(
+          UnresolvedAttribute("id"),
+          UnresolvedAttribute("name"),
+          UnresolvedAttribute("salary")),
+        sortedPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test not exists subquery") {
+    val frame = sql(s"""
+                       | source = $outerTable
+                       | | where not exists [
+                       |     source = $innerTable | where id = uid
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    val expectedResults: Array[Row] = Array(Row(1001, "Hello", 70000), Row(1004, "David", 0))
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val existsSubquery =
+      Filter(
+        Not(
+          Exists(Filter(EqualTo(UnresolvedAttribute("id"), UnresolvedAttribute("uid")), inner))),
+        outer)
+    val sortedPlan = Sort(
+      Seq(SortOrder(UnresolvedAttribute("salary"), Descending)),
+      global = true,
+      existsSubquery)
+    val expectedPlan =
+      Project(
+        Seq(
+          UnresolvedAttribute("id"),
+          UnresolvedAttribute("name"),
+          UnresolvedAttribute("salary")),
+        sortedPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test empty exists subquery") {
+    var frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable | where uid = 0000 AND id = uid
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    var results: Array[Row] = frame.collect()
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
+    var expectedResults: Array[Row] = Array()
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    frame = sql(s"""
+      source = $outerTable
+                   | | where not exists [
+                   |     source = $innerTable | where uid = 0000 AND id = uid
+                   |   ]
+                   | | sort  - salary
+                   | | fields id, name, salary
+                   | """.stripMargin)
+    results = frame.collect()
+    expectedResults = Array(
+      Row(1000, "Jake", 100000),
+      Row(1001, "Hello", 70000),
+      Row(1002, "John", 120000),
+      Row(1003, "David", 120000),
+      Row(1004, "David", 0),
+      Row(1005, "Jane", 90000),
+      Row(1006, "Tommy", 30000))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+  }
+
+  test("test uncorrelated exists subquery") {
+    var frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable | where like(name, 'J%')
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    assert(results.length == 7)
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val existsSubquery =
+      Filter(
+        Exists(
+          Filter(
+            UnresolvedFunction(
+              "like",
+              Seq(UnresolvedAttribute("name"), Literal("J%")),
+              isDistinct = false),
+            inner)),
+        outer)
+    val sortedPlan = Sort(
+      Seq(SortOrder(UnresolvedAttribute("salary"), Descending)),
+      global = true,
+      existsSubquery)
+    val expectedPlan =
+      Project(
+        Seq(
+          UnresolvedAttribute("id"),
+          UnresolvedAttribute("name"),
+          UnresolvedAttribute("salary")),
+        sortedPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+
+    frame = sql(s"""
+                   | source = $outerTable
+                   | | where not exists [
+                   |     source = $innerTable | where like(name, 'J%')
+                   |   ]
+                   | | sort  - salary
+                   | | fields id, name, salary
+                   | """.stripMargin)
+    assert(frame.collect().length == 0)
+
+    frame = sql(s"""
+                   | source = $outerTable
+                   | | where exists [
+                   |     source = $innerTable | where like(name, 'X%')
+                   |   ]
+                   | | sort  - salary
+                   | | fields id, name, salary
+                   | """.stripMargin)
+    assert(frame.collect().length == 0)
+  }
+
+  test("uncorrelated exists subquery check the return content of inner table is empty or not") {
+    var frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable
+                       |   ]
+                       | | eval constant = "Bala"
+                       | | fields constant
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    val expectedResults: Array[Row] = Array(
+      Row("Bala"),
+      Row("Bala"),
+      Row("Bala"),
+      Row("Bala"),
+      Row("Bala"),
+      Row("Bala"),
+      Row("Bala"))
+    assert(results.sameElements(expectedResults))
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val existsSubquery = Filter(Exists(inner), outer)
+    val evalProject =
+      Project(Seq(UnresolvedStar(None), Alias(Literal("Bala"), "constant")()), existsSubquery)
+    val expectedPlan = Project(Seq(UnresolvedAttribute("constant")), evalProject)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+
+    frame = sql(s"""
+                   | source = $outerTable
+                   | | where exists [
+                   |     source = $innerTable | where uid = 999
+                   |   ]
+                   | | eval constant = "Bala"
+                   | | fields constant
+                   | """.stripMargin)
+    frame.show
+    assert(frame.collect().length == 0)
+  }
+
+  test("test nested exists subquery") {
+    val frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable
+                       |     | where exists [
+                       |         source = $nestedInnerTable
+                       |         | where $nestedInnerTable.occupation = $innerTable.occupation
+                       |       ]
+                       |     | where id = uid
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    val expectedResults: Array[Row] = Array(
+      Row(1003, "David", 120000),
+      Row(1002, "John", 120000),
+      Row(1000, "Jake", 100000),
+      Row(1005, "Jane", 90000),
+      Row(1006, "Tommy", 30000))
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner1 = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val inner2 = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test3"))
+    val existsSubqueryForOccupation =
+      Filter(
+        Exists(
+          Filter(
+            EqualTo(
+              UnresolvedAttribute("spark_catalog.default.flint_ppl_test3.occupation"),
+              UnresolvedAttribute("spark_catalog.default.flint_ppl_test2.occupation")),
+            inner2)),
+        inner1)
+    val existsSubqueryForId =
+      Filter(
+        Exists(
+          Filter(
+            EqualTo(UnresolvedAttribute("id"), UnresolvedAttribute("uid")),
+            existsSubqueryForOccupation)),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(
+        Seq(SortOrder(UnresolvedAttribute("salary"), Descending)),
+        global = true,
+        existsSubqueryForId)
+    val expectedPlan =
+      Project(
+        Seq(
+          UnresolvedAttribute("id"),
+          UnresolvedAttribute("name"),
+          UnresolvedAttribute("salary")),
+        sortedPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test exists subquery with conjunction of conditions") {
+    val frame = sql(s"""
+                       | source = $outerTable
+                       | | where exists [
+                       |     source = $innerTable
+                       |     | where id = uid AND
+                       |       $outerTable.name = $innerTable.name AND
+                       |       $outerTable.occupation = $innerTable.occupation
+                       |   ]
+                       | | sort  - salary
+                       | | fields id, name, salary
+                       | """.stripMargin)
+    val results: Array[Row] = frame.collect()
+    val expectedResults: Array[Row] = Array(Row(1003, "David", 120000), Row(1000, "Jake", 100000))
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test2"))
+    val existsSubquery = Filter(
+      Exists(
+        Filter(
+          And(
+            And(
+              EqualTo(UnresolvedAttribute("id"), UnresolvedAttribute("uid")),
+              EqualTo(
+                UnresolvedAttribute("spark_catalog.default.flint_ppl_test1.name"),
+                UnresolvedAttribute("spark_catalog.default.flint_ppl_test2.name"))),
+            EqualTo(
+              UnresolvedAttribute("spark_catalog.default.flint_ppl_test1.occupation"),
+              UnresolvedAttribute("spark_catalog.default.flint_ppl_test2.occupation"))),
+          inner)),
+      outer)
+    val sortedPlan = Sort(
+      Seq(SortOrder(UnresolvedAttribute("salary"), Descending)),
+      global = true,
+      existsSubquery)
+    val expectedPlan =
+      Project(
+        Seq(
+          UnresolvedAttribute("id"),
+          UnresolvedAttribute("name"),
+          UnresolvedAttribute("salary")),
+        sortedPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+}
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLInSubqueryITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLInSubqueryITSuite.scala
index ee08e692a..9d8c2c12d 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLInSubqueryITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLInSubqueryITSuite.scala
@@ -305,8 +305,6 @@ class FlintSparkPPLInSubqueryITSuite
          | | sort  - salary
          | | fields id, name, salary
          | """.stripMargin)
-    frame.show()
-    frame.explain(true)
     val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] =
       Array(Row(1003, "David", 120000), Row(1002, "John", 120000), Row(1006, "Tommy", 30000))
@@ -358,7 +356,6 @@ class FlintSparkPPLInSubqueryITSuite
          |     $innerTable
          | | fields a.id, a.name, a.salary
          | """.stripMargin)
-    frame.explain(true)
     val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] =
       Array(Row(1003, "David", 120000), Row(1002, "John", 120000), Row(1006, "Tommy", 30000))
diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
index dd43007f4..2b916a245 100644
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
@@ -113,6 +113,7 @@ APPEND:                             'APPEND';
 CASE:                               'CASE';
 ELSE:                               'ELSE';
 IN:                                 'IN';
+EXISTS:                             'EXISTS';
 
 // LOGICAL KEYWORDS
 NOT:                                'NOT';
diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
index af7e0ec14..7a6f14839 100644
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
@@ -351,24 +351,21 @@ percentileAggFunction
 // expressions
 expression
    : logicalExpression
-   | comparisonExpression
    | valueExpression
    ;
 
 logicalExpression
-   : comparisonExpression                                       # comparsion
-   | NOT logicalExpression                                      # logicalNot
+   : NOT logicalExpression                                      # logicalNot
+   | comparisonExpression                                       # comparsion
    | left = logicalExpression (AND)? right = logicalExpression  # logicalAnd
    | left = logicalExpression OR right = logicalExpression      # logicalOr
    | left = logicalExpression XOR right = logicalExpression     # logicalXor
    | booleanExpression                                          # booleanExpr
-   | isEmptyExpression                                          # isEmptyExpr
    ;
 
 comparisonExpression
    : left = valueExpression comparisonOperator right = valueExpression  # compareExpr
    | valueExpression IN valueList                                       # inExpr
-   | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS    # inSubqueryExpr
    ;
 
 valueExpressionList
@@ -397,7 +394,10 @@ positionFunction
    ;
 
 booleanExpression
-   : booleanFunctionCall
+   : booleanFunctionCall                                                # booleanFunctionCallExpr
+   | isEmptyExpression                                                  # isEmptyExpr
+   | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS    # inSubqueryExpr
+   | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS                         # existsSubqueryExpr
    ;
 
  isEmptyExpression
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
index d7db9d0c8..c361ded08 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
@@ -19,7 +19,8 @@
 import org.opensearch.sql.ast.expression.FieldsMapping;
 import org.opensearch.sql.ast.expression.Function;
 import org.opensearch.sql.ast.expression.In;
-import org.opensearch.sql.ast.expression.InSubquery;
+import org.opensearch.sql.ast.expression.subquery.ExistsSubquery;
+import org.opensearch.sql.ast.expression.subquery.InSubquery;
 import org.opensearch.sql.ast.expression.Interval;
 import org.opensearch.sql.ast.expression.IsEmpty;
 import org.opensearch.sql.ast.expression.Let;
@@ -28,7 +29,7 @@
 import org.opensearch.sql.ast.expression.Not;
 import org.opensearch.sql.ast.expression.Or;
 import org.opensearch.sql.ast.expression.QualifiedName;
-import org.opensearch.sql.ast.expression.ScalarSubquery;
+import org.opensearch.sql.ast.expression.subquery.ScalarSubquery;
 import org.opensearch.sql.ast.expression.Span;
 import org.opensearch.sql.ast.expression.UnresolvedArgument;
 import org.opensearch.sql.ast.expression.UnresolvedAttribute;
@@ -302,4 +303,8 @@ public T visitFillNull(FillNull fillNull, C context) {
   public T visitScalarSubquery(ScalarSubquery node, C context) {
     return visitChildren(node, context);
   }
+
+  public T visitExistsSubquery(ExistsSubquery node, C context) {
+    return visitChildren(node, context);
+  }
 }
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ExistsSubquery.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ExistsSubquery.java
new file mode 100644
index 000000000..bdd1683ee
--- /dev/null
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ExistsSubquery.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ast.expression.subquery;
+
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+import org.opensearch.sql.ast.AbstractNodeVisitor;
+import org.opensearch.sql.ast.expression.UnresolvedExpression;
+import org.opensearch.sql.ast.tree.UnresolvedPlan;
+
+import java.util.List;
+
+@Getter
+@ToString
+@EqualsAndHashCode(callSuper = false)
+@RequiredArgsConstructor
+public class ExistsSubquery extends UnresolvedExpression {
+    private final UnresolvedPlan query;
+
+    @Override
+    public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
+        return nodeVisitor.visitExistsSubquery(this, context);
+    }
+}
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/InSubquery.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/InSubquery.java
similarity index 87%
rename from ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/InSubquery.java
rename to ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/InSubquery.java
index ed40e4b45..4a15453e5 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/InSubquery.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/InSubquery.java
@@ -3,16 +3,16 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.opensearch.sql.ast.expression;
+package org.opensearch.sql.ast.expression.subquery;
 
 import lombok.EqualsAndHashCode;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import lombok.ToString;
 import org.opensearch.sql.ast.AbstractNodeVisitor;
+import org.opensearch.sql.ast.expression.UnresolvedExpression;
 import org.opensearch.sql.ast.tree.UnresolvedPlan;
 
-import java.util.Arrays;
 import java.util.List;
 
 @Getter
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/ScalarSubquery.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ScalarSubquery.java
similarity index 84%
rename from ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/ScalarSubquery.java
rename to ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ScalarSubquery.java
index cccadb717..7c3721ffb 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/ScalarSubquery.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/expression/subquery/ScalarSubquery.java
@@ -3,13 +3,14 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.opensearch.sql.ast.expression;
+package org.opensearch.sql.ast.expression.subquery;
 
 import lombok.EqualsAndHashCode;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import lombok.ToString;
 import org.opensearch.sql.ast.AbstractNodeVisitor;
+import org.opensearch.sql.ast.expression.UnresolvedExpression;
 import org.opensearch.sql.ast.tree.UnresolvedPlan;
 
 @Getter
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
index 38dc4092e..902fc72e3 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
@@ -13,6 +13,7 @@
 import org.apache.spark.sql.catalyst.expressions.Ascending$;
 import org.apache.spark.sql.catalyst.expressions.CaseWhen;
 import org.apache.spark.sql.catalyst.expressions.Descending$;
+import org.apache.spark.sql.catalyst.expressions.Exists$;
 import org.apache.spark.sql.catalyst.expressions.Expression;
 import org.apache.spark.sql.catalyst.expressions.InSubquery$;
 import org.apache.spark.sql.catalyst.expressions.ListQuery$;
@@ -40,7 +41,8 @@
 import org.opensearch.sql.ast.expression.FieldsMapping;
 import org.opensearch.sql.ast.expression.Function;
 import org.opensearch.sql.ast.expression.In;
-import org.opensearch.sql.ast.expression.InSubquery;
+import org.opensearch.sql.ast.expression.subquery.ExistsSubquery;
+import org.opensearch.sql.ast.expression.subquery.InSubquery;
 import org.opensearch.sql.ast.expression.Interval;
 import org.opensearch.sql.ast.expression.IsEmpty;
 import org.opensearch.sql.ast.expression.Let;
@@ -49,7 +51,7 @@
 import org.opensearch.sql.ast.expression.Or;
 import org.opensearch.sql.ast.expression.ParseMethod;
 import org.opensearch.sql.ast.expression.QualifiedName;
-import org.opensearch.sql.ast.expression.ScalarSubquery;
+import org.opensearch.sql.ast.expression.subquery.ScalarSubquery;
 import org.opensearch.sql.ast.expression.Span;
 import org.opensearch.sql.ast.expression.UnresolvedExpression;
 import org.opensearch.sql.ast.expression.When;
@@ -813,5 +815,19 @@ public Expression visitScalarSubquery(ScalarSubquery node, CatalystPlanContext c
                 Option.empty());
             return context.getNamedParseExpressions().push(scalarSubQuery);
         }
+
+        @Override
+        public Expression visitExistsSubquery(ExistsSubquery node, CatalystPlanContext context) {
+            CatalystPlanContext innerContext = new CatalystPlanContext();
+            UnresolvedPlan outerPlan = node.getQuery();
+            LogicalPlan subSearch = CatalystQueryPlanVisitor.this.visitSubSearch(outerPlan, innerContext);
+            Expression existsSubQuery = Exists$.MODULE$.apply(
+                subSearch,
+                seq(new java.util.ArrayList<Expression>()),
+                NamedExpression.newExprId(),
+                seq(new java.util.ArrayList<Expression>()),
+                Option.empty());
+            return context.getNamedParseExpressions().push(existsSubQuery);
+        }
     }
 }
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
index 4b4697b45..3b98edd77 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
@@ -22,7 +22,8 @@
 import org.opensearch.sql.ast.expression.EqualTo;
 import org.opensearch.sql.ast.expression.Field;
 import org.opensearch.sql.ast.expression.Function;
-import org.opensearch.sql.ast.expression.InSubquery;
+import org.opensearch.sql.ast.expression.subquery.ExistsSubquery;
+import org.opensearch.sql.ast.expression.subquery.InSubquery;
 import org.opensearch.sql.ast.expression.Interval;
 import org.opensearch.sql.ast.expression.IntervalUnit;
 import org.opensearch.sql.ast.expression.IsEmpty;
@@ -31,7 +32,7 @@
 import org.opensearch.sql.ast.expression.Not;
 import org.opensearch.sql.ast.expression.Or;
 import org.opensearch.sql.ast.expression.QualifiedName;
-import org.opensearch.sql.ast.expression.ScalarSubquery;
+import org.opensearch.sql.ast.expression.subquery.ScalarSubquery;
 import org.opensearch.sql.ast.expression.Span;
 import org.opensearch.sql.ast.expression.SpanUnit;
 import org.opensearch.sql.ast.expression.UnresolvedArgument;
@@ -393,6 +394,11 @@ public UnresolvedExpression visitScalarSubqueryExpr(OpenSearchPPLParser.ScalarSu
         return new ScalarSubquery(astBuilder.visitSubSearch(ctx.subSearch()));
     }
 
+    @Override
+    public UnresolvedExpression visitExistsSubqueryExpr(OpenSearchPPLParser.ExistsSubqueryExprContext ctx) {
+        return new ExistsSubquery(astBuilder.visitSubSearch(ctx.subSearch()));
+    }
+
     private QualifiedName visitIdentifiers(List<? extends ParserRuleContext> ctx) {
         return new QualifiedName(
                 ctx.stream()
diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanExistsSubqueryTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanExistsSubqueryTranslatorTestSuite.scala
new file mode 100644
index 000000000..02dfe1096
--- /dev/null
+++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanExistsSubqueryTranslatorTestSuite.scala
@@ -0,0 +1,315 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.ppl
+
+import org.opensearch.flint.spark.ppl.PlaneUtils.plan
+import org.opensearch.sql.ppl.{CatalystPlanContext, CatalystQueryPlanVisitor}
+import org.scalatest.matchers.should.Matchers
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Ascending, Descending, EqualTo, Exists, GreaterThanOrEqual, LessThan, Literal, Not, SortOrder}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+
+class PPLLogicalPlanExistsSubqueryTranslatorTestSuite
+    extends SparkFunSuite
+    with PlanTest
+    with LogicalPlanTestUtils
+    with Matchers {
+
+  // Assume outer table contains fields [a, b]
+  // and inner table contains fields [c, d]
+  private val planTransformer = new CatalystQueryPlanVisitor()
+  private val pplParser = new PPLSyntaxParser()
+
+  test("test where exists (select * from inner where a = c)") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = spark_catalog.default.outer
+             | | where exists [
+             |     source = spark_catalog.default.inner | where a = c
+             |   ]
+             | | sort  - a
+             | | fields a, c
+             | """.stripMargin),
+        context)
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "outer"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "inner"))
+    val subquery =
+      Filter(
+        Exists(Filter(EqualTo(UnresolvedAttribute("a"), UnresolvedAttribute("c")), inner)),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(Seq(SortOrder(UnresolvedAttribute("a"), Descending)), global = true, subquery)
+    val expectedPlan =
+      Project(Seq(UnresolvedAttribute("a"), UnresolvedAttribute("c")), sortedPlan)
+
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  test("test where exists (select * from inner where a = c and b = d)") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = spark_catalog.default.outer
+             | | where exists [
+             |     source = spark_catalog.default.inner | where a = c AND b = d
+             |   ]
+             | | sort  - a
+             | """.stripMargin),
+        context)
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "outer"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "inner"))
+    val existsSubquery =
+      Filter(
+        Exists(
+          Filter(
+            And(
+              EqualTo(UnresolvedAttribute("a"), UnresolvedAttribute("c")),
+              EqualTo(UnresolvedAttribute("b"), UnresolvedAttribute("d"))),
+            inner)),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(Seq(SortOrder(UnresolvedAttribute("a"), Descending)), global = true, existsSubquery)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), sortedPlan)
+
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  test("test where not exists (select * from inner where a = c)") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = spark_catalog.default.outer
+             | | where not exists [
+             |     source = spark_catalog.default.inner | where a = c
+             |   ]
+             | | sort  - a
+             | | fields a, c
+             | """.stripMargin),
+        context)
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "outer"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "inner"))
+    val subquery =
+      Filter(
+        Not(Exists(Filter(EqualTo(UnresolvedAttribute("a"), UnresolvedAttribute("c")), inner))),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(Seq(SortOrder(UnresolvedAttribute("a"), Descending)), global = true, subquery)
+    val expectedPlan =
+      Project(Seq(UnresolvedAttribute("a"), UnresolvedAttribute("c")), sortedPlan)
+
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  test("test where not exists (select * from inner where a = c and b = d)") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = spark_catalog.default.outer
+             | | where not exists [
+             |     source = spark_catalog.default.inner | where a = c AND b = d
+             |   ]
+             | | sort  - a
+             | """.stripMargin),
+        context)
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "outer"))
+    val inner = UnresolvedRelation(Seq("spark_catalog", "default", "inner"))
+    val existsSubquery =
+      Filter(
+        Not(
+          Exists(
+            Filter(
+              And(
+                EqualTo(UnresolvedAttribute("a"), UnresolvedAttribute("c")),
+                EqualTo(UnresolvedAttribute("b"), UnresolvedAttribute("d"))),
+              inner))),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(Seq(SortOrder(UnresolvedAttribute("a"), Descending)), global = true, existsSubquery)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), sortedPlan)
+
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  // Assume outer table contains fields [a, b]
+  // and inner1 table contains fields [c, d]
+  // and inner2 table contains fields [e, f]
+  test("test nested exists subquery") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = spark_catalog.default.outer
+             | | where exists [
+             |     source = spark_catalog.default.inner1
+             |     | where exists [
+             |         source = spark_catalog.default.inner2
+             |         | where c = e
+             |       ]
+             |     | where a = c
+             |   ]
+             | | sort  - a
+             | | fields a, b
+             | """.stripMargin),
+        context)
+    val outer = UnresolvedRelation(Seq("spark_catalog", "default", "outer"))
+    val inner1 = UnresolvedRelation(Seq("spark_catalog", "default", "inner1"))
+    val inner2 = UnresolvedRelation(Seq("spark_catalog", "default", "inner2"))
+    val subqueryOuter =
+      Filter(
+        Exists(Filter(EqualTo(UnresolvedAttribute("c"), UnresolvedAttribute("e")), inner2)),
+        inner1)
+    val subqueryInner =
+      Filter(
+        Exists(
+          Filter(EqualTo(UnresolvedAttribute("a"), UnresolvedAttribute("c")), subqueryOuter)),
+        outer)
+    val sortedPlan: LogicalPlan =
+      Sort(Seq(SortOrder(UnresolvedAttribute("a"), Descending)), global = true, subqueryInner)
+    val expectedPlan =
+      Project(Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b")), sortedPlan)
+
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  test("test tpch q4: exists subquery with aggregation") {
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = orders
+             | | where o_orderdate >= "1993-07-01" AND o_orderdate < "1993-10-01"
+             |     AND exists [
+             |       source = lineitem
+             |       | where l_orderkey = o_orderkey
+             |           AND l_commitdate < l_receiptdate
+             |     ]
+             | | stats count(1) as order_count by o_orderpriority
+             | | sort o_orderpriority
+             | | fields o_orderpriority, order_count
+             | """.stripMargin),
+        context)
+
+    val outer = UnresolvedRelation(Seq("orders"))
+    val inner = UnresolvedRelation(Seq("lineitem"))
+    val inSubquery =
+      Filter(
+        And(
+          And(
+            GreaterThanOrEqual(UnresolvedAttribute("o_orderdate"), Literal("1993-07-01")),
+            LessThan(UnresolvedAttribute("o_orderdate"), Literal("1993-10-01"))),
+          Exists(
+            Filter(
+              And(
+                EqualTo(UnresolvedAttribute("l_orderkey"), UnresolvedAttribute("o_orderkey")),
+                LessThan(
+                  UnresolvedAttribute("l_commitdate"),
+                  UnresolvedAttribute("l_receiptdate"))),
+              inner))),
+        outer)
+    val o_orderpriorityAlias = Alias(UnresolvedAttribute("o_orderpriority"), "o_orderpriority")()
+    val groupByAttributes = Seq(o_orderpriorityAlias)
+    val aggregateExpressions =
+      Alias(
+        UnresolvedFunction(Seq("COUNT"), Seq(Literal(1)), isDistinct = false),
+        "order_count")()
+    val aggregatePlan =
+      Aggregate(groupByAttributes, Seq(aggregateExpressions, o_orderpriorityAlias), inSubquery)
+    val sortedPlan: LogicalPlan =
+      Sort(
+        Seq(SortOrder(UnresolvedAttribute("o_orderpriority"), Ascending)),
+        global = true,
+        aggregatePlan)
+    val expectedPlan = Project(
+      Seq(UnresolvedAttribute("o_orderpriority"), UnresolvedAttribute("order_count")),
+      sortedPlan)
+    comparePlans(expectedPlan, logPlan, false)
+  }
+
+  // We can support q21 when the table alias is supported
+  ignore("test tpch q21 (partial): multiple exists subquery") {
+    // select
+    //    s_name,
+    //    count(*) as numwait
+    // from
+    //    supplier,
+    //    lineitem l1,
+    // where
+    //    s_suppkey = l1.l_suppkey
+    //    and l1.l_receiptdate > l1.l_commitdate
+    //    and exists (
+    //        select
+    //            *
+    //        from
+    //            lineitem l2
+    //        where
+    //            l2.l_orderkey = l1.l_orderkey
+    //            and l2.l_suppkey <> l1.l_suppkey
+    //    )
+    //    and not exists (
+    //        select
+    //            *
+    //        from
+    //            lineitem l3
+    //        where
+    //            l3.l_orderkey = l1.l_orderkey
+    //            and l3.l_suppkey <> l1.l_suppkey
+    //            and l3.l_receiptdate > l3.l_commitdate
+    //    )
+    // group by
+    //    s_name
+    // order by
+    //    numwait desc,
+    //    s_name
+    // limit 100
+    val context = new CatalystPlanContext
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          s"""
+             | source = supplier
+             | | join left=s right=l1 on s_suppkey = l1.l_suppkey
+             |   lineitem as l1
+             | | where l1.l_receiptdate > l1.l_commitdate
+             | | where exists [
+             |     source = lineitem as l2
+             |     | where l2.l_orderkey = l1.l_orderkey and
+             |         l2.l_suppkey <> l1.l_suppkey
+             |   ]
+             | | where not exists [
+             |     source = lineitem as l3
+             |     | where l3.l_orderkey = l1.l_orderkey and
+             |        l3.l_suppkey <> l1.l_suppkey and
+             |        l3.l_receiptdate > l3.l_commitdate
+             |   ]
+             | | stats count(1) as numwait by s_name
+             | | sort - numwait, s_name
+             | | fields s_name, numwait
+             | | limit 100
+             | """.stripMargin),
+        context)
+  }
+}

From a3453739d34053f2700b168e4c5a762399fc2bf6 Mon Sep 17 00:00:00 2001
From: Louis Chu <clingzhi@amazon.com>
Date: Mon, 14 Oct 2024 07:16:03 -0700
Subject: [PATCH 4/5] Support recovery for index with external scheduler (#717)

* Support recovery for index with external scheduler

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* Improve default option update logic

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* Resolve comments

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* Add index metrics

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* Remove debugging log and refactor updateSchedulerMode

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* refactor metrics with aop

Signed-off-by: Louis Chu <clingzhi@amazon.com>

* Add more IT

Signed-off-by: Louis Chu <clingzhi@amazon.com>

---------

Signed-off-by: Louis Chu <clingzhi@amazon.com>
---
 build.sbt                                     |   2 +-
 .../flint/core/metrics/MetricsUtil.java       |  90 +++++---
 .../metrics/source/FlintMetricSource.scala    |  14 +-
 .../opensearch/flint/core/FlintOptions.java   |   7 +-
 .../flint/core/metrics/MetricsUtilTest.java   | 114 +++++++---
 .../sql/flint/config/FlintSparkConf.scala     |   8 +-
 .../opensearch/flint/spark/FlintSpark.scala   | 169 ++++++++++----
 .../flint/spark/FlintSparkIndexBuilder.scala  |  64 +-----
 .../flint/spark/FlintSparkIndexFactory.scala  |  21 ++
 .../flint/spark/FlintSparkIndexOptions.scala  |  64 +++++-
 .../refresh/IncrementalIndexRefresh.scala     |  30 ++-
 .../refresh/util/RefreshMetricsAspect.scala   |  62 +++++
 .../refresh/util/RefreshMetricsHelper.scala   |  75 ++++++
 .../scheduler/AsyncQuerySchedulerBuilder.java |   2 +-
 ...intSparkJobExternalSchedulingService.scala |  68 +++---
 ...intSparkJobInternalSchedulingService.scala |   9 +-
 .../FlintSparkJobSchedulingService.scala      |   9 +
 .../util/RefreshQueryGenerator.scala          |   7 +-
 ...FlintSparkMaterializedViewAstBuilder.scala |   1 -
 .../flint/config/FlintSparkConfSuite.scala    |  13 ++
 .../spark/FlintSparkIndexBuilderSuite.scala   | 214 ++++++++++++------
 .../util/RefreshQueryGeneratorSuite.scala     |  22 +-
 .../apache/spark/sql/FlintJobITSuite.scala    |  45 ++++
 .../FlintSparkCoveringIndexITSuite.scala      |   4 +-
 .../FlintSparkMaterializedViewITSuite.scala   |   4 +-
 ...FlintSparkMaterializedViewSqlITSuite.scala |   4 +-
 .../FlintSparkSkippingIndexITSuite.scala      |   5 +-
 .../spark/FlintSparkUpdateIndexITSuite.scala  | 118 ++++++++++
 28 files changed, 959 insertions(+), 286 deletions(-)
 create mode 100644 flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsAspect.scala
 create mode 100644 flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsHelper.scala

diff --git a/build.sbt b/build.sbt
index 73fb481a6..0a7b8fcb0 100644
--- a/build.sbt
+++ b/build.sbt
@@ -117,7 +117,7 @@ lazy val flintCommons = (project in file("flint-commons"))
       "org.scalatest" %% "scalatest" % "3.2.15" % "test",
       "org.scalatest" %% "scalatest-flatspec" % "3.2.15" % "test",
       "org.scalatestplus" %% "mockito-4-6" % "3.2.15.0" % "test",
-      "org.projectlombok" % "lombok" % "1.18.30",
+      "org.projectlombok" % "lombok" % "1.18.30" % "provided",
     ),
     libraryDependencies ++= deps(sparkVersion),
     publish / skip := true,
diff --git a/flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricsUtil.java b/flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricsUtil.java
index 8e63992f5..81a482d5e 100644
--- a/flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricsUtil.java
+++ b/flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricsUtil.java
@@ -11,6 +11,7 @@
 import com.codahale.metrics.Timer;
 import org.apache.spark.SparkEnv;
 import org.apache.spark.metrics.source.FlintMetricSource;
+import org.apache.spark.metrics.source.FlintIndexMetricSource;
 import org.apache.spark.metrics.source.Source;
 import scala.collection.Seq;
 
@@ -33,10 +34,20 @@ private MetricsUtil() {
      * If the counter does not exist, it is created before being incremented.
      *
      * @param metricName The name of the metric for which the counter is incremented.
-     *                   This name is used to retrieve or create the counter.
      */
     public static void incrementCounter(String metricName) {
-        Counter counter = getOrCreateCounter(metricName);
+        incrementCounter(metricName, false);
+    }
+
+    /**
+     * Increments the Counter metric associated with the given metric name.
+     * If the counter does not exist, it is created before being incremented.
+     *
+     * @param metricName The name of the metric for which the counter is incremented.
+     * @param isIndexMetric Whether this metric is an index-specific metric.
+     */
+    public static void incrementCounter(String metricName, boolean isIndexMetric) {
+        Counter counter = getOrCreateCounter(metricName, isIndexMetric);
         if (counter != null) {
             counter.inc();
         }
@@ -48,7 +59,17 @@ public static void incrementCounter(String metricName) {
      * @param metricName The name of the metric counter to be decremented.
      */
     public static void decrementCounter(String metricName) {
-        Counter counter = getOrCreateCounter(metricName);
+        decrementCounter(metricName, false);
+    }
+
+    /**
+     * Decrements the value of the specified metric counter by one, if the counter exists and its current count is greater than zero.
+     *
+     * @param metricName The name of the metric counter to be decremented.
+     * @param isIndexMetric Whether this metric is an index-specific metric.
+     */
+    public static void decrementCounter(String metricName, boolean isIndexMetric) {
+        Counter counter = getOrCreateCounter(metricName, isIndexMetric);
         if (counter != null && counter.getCount() > 0) {
             counter.dec();
         }
@@ -56,21 +77,30 @@ public static void decrementCounter(String metricName) {
 
     /**
      * Retrieves a {@link Timer.Context} for the specified metric name, creating a new timer if one does not already exist.
-     * This context can be used to measure the duration of a particular operation or event.
      *
      * @param metricName The name of the metric timer to retrieve the context for.
      * @return A {@link Timer.Context} instance for timing operations, or {@code null} if the timer could not be created or retrieved.
      */
     public static Timer.Context getTimerContext(String metricName) {
-        Timer timer = getOrCreateTimer(metricName);
+        return getTimerContext(metricName, false);
+    }
+
+    /**
+     * Retrieves a {@link Timer.Context} for the specified metric name, creating a new timer if one does not already exist.
+     *
+     * @param metricName The name of the metric timer to retrieve the context for.
+     * @param isIndexMetric Whether this metric is an index-specific metric.
+     * @return A {@link Timer.Context} instance for timing operations, or {@code null} if the timer could not be created or retrieved.
+     */
+    public static Timer.Context getTimerContext(String metricName, boolean isIndexMetric) {
+        Timer timer = getOrCreateTimer(metricName, isIndexMetric);
         return timer != null ? timer.time() : null;
     }
 
     /**
-     * Stops the timer associated with the given {@link Timer.Context}, effectively recording the elapsed time since the timer was started
-     * and returning the duration. If the context is {@code null}, this method does nothing and returns {@code null}.
+     * Stops the timer associated with the given {@link Timer.Context}.
      *
-     * @param context The {@link Timer.Context} to stop. May be {@code null}, in which case this method has no effect and returns {@code null}.
+     * @param context The {@link Timer.Context} to stop. May be {@code null}.
      * @return The elapsed time in nanoseconds since the timer was started, or {@code null} if the context was {@code null}.
      */
     public static Long stopTimer(Timer.Context context) {
@@ -79,13 +109,23 @@ public static Long stopTimer(Timer.Context context) {
 
     /**
      * Registers a gauge metric with the provided name and value.
-     * The gauge will reflect the current value of the AtomicInteger provided.
      *
      * @param metricName The name of the gauge metric to register.
-     * @param value      The AtomicInteger whose current value should be reflected by the gauge.
+     * @param value The AtomicInteger whose current value should be reflected by the gauge.
      */
     public static void registerGauge(String metricName, final AtomicInteger value) {
-        MetricRegistry metricRegistry = getMetricRegistry();
+        registerGauge(metricName, value, false);
+    }
+
+    /**
+     * Registers a gauge metric with the provided name and value.
+     *
+     * @param metricName The name of the gauge metric to register.
+     * @param value The AtomicInteger whose current value should be reflected by the gauge.
+     * @param isIndexMetric Whether this metric is an index-specific metric.
+     */
+    public static void registerGauge(String metricName, final AtomicInteger value, boolean isIndexMetric) {
+        MetricRegistry metricRegistry = getMetricRegistry(isIndexMetric);
         if (metricRegistry == null) {
             LOG.warning("MetricRegistry not available, cannot register gauge: " + metricName);
             return;
@@ -93,39 +133,37 @@ public static void registerGauge(String metricName, final AtomicInteger value) {
         metricRegistry.register(metricName, (Gauge<Integer>) value::get);
     }
 
-    // Retrieves or creates a new counter for the given metric name
-    private static Counter getOrCreateCounter(String metricName) {
-        MetricRegistry metricRegistry = getMetricRegistry();
+    private static Counter getOrCreateCounter(String metricName, boolean isIndexMetric) {
+        MetricRegistry metricRegistry = getMetricRegistry(isIndexMetric);
         return metricRegistry != null ? metricRegistry.counter(metricName) : null;
     }
 
-    // Retrieves or creates a new Timer for the given metric name
-    private static Timer getOrCreateTimer(String metricName) {
-        MetricRegistry metricRegistry = getMetricRegistry();
+    private static Timer getOrCreateTimer(String metricName, boolean isIndexMetric) {
+        MetricRegistry metricRegistry = getMetricRegistry(isIndexMetric);
         return metricRegistry != null ? metricRegistry.timer(metricName) : null;
     }
 
-    // Retrieves the MetricRegistry from the current Spark environment.
-    private static MetricRegistry getMetricRegistry() {
+    private static MetricRegistry getMetricRegistry(boolean isIndexMetric) {
         SparkEnv sparkEnv = SparkEnv.get();
         if (sparkEnv == null) {
             LOG.warning("Spark environment not available, cannot access MetricRegistry.");
             return null;
         }
 
-        FlintMetricSource flintMetricSource = getOrInitFlintMetricSource(sparkEnv);
-        return flintMetricSource.metricRegistry();
+        Source metricSource = isIndexMetric ? 
+            getOrInitMetricSource(sparkEnv, FlintMetricSource.FLINT_INDEX_METRIC_SOURCE_NAME(), FlintIndexMetricSource::new) :
+            getOrInitMetricSource(sparkEnv, FlintMetricSource.FLINT_METRIC_SOURCE_NAME(), FlintMetricSource::new);
+        return metricSource.metricRegistry();
     }
 
-    // Gets or initializes the FlintMetricSource
-    private static FlintMetricSource getOrInitFlintMetricSource(SparkEnv sparkEnv) {
-        Seq<Source> metricSourceSeq = sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME());
+    private static Source getOrInitMetricSource(SparkEnv sparkEnv, String sourceName, java.util.function.Supplier<Source> sourceSupplier) {
+        Seq<Source> metricSourceSeq = sparkEnv.metricsSystem().getSourcesByName(sourceName);
 
         if (metricSourceSeq == null || metricSourceSeq.isEmpty()) {
-            FlintMetricSource metricSource = new FlintMetricSource();
+            Source metricSource = sourceSupplier.get();
             sparkEnv.metricsSystem().registerSource(metricSource);
             return metricSource;
         }
-        return (FlintMetricSource) metricSourceSeq.head();
+        return metricSourceSeq.head();
     }
 }
diff --git a/flint-core/src/main/scala/apache/spark/metrics/source/FlintMetricSource.scala b/flint-core/src/main/scala/apache/spark/metrics/source/FlintMetricSource.scala
index d5f241572..7bdfa11e6 100644
--- a/flint-core/src/main/scala/apache/spark/metrics/source/FlintMetricSource.scala
+++ b/flint-core/src/main/scala/apache/spark/metrics/source/FlintMetricSource.scala
@@ -7,13 +7,25 @@ package org.apache.spark.metrics.source
 
 import com.codahale.metrics.MetricRegistry
 
-class FlintMetricSource() extends Source {
+/**
+ * Metric source for general Flint metrics.
+ */
+class FlintMetricSource extends Source {
 
   // Implementing the Source trait
   override val sourceName: String = FlintMetricSource.FLINT_METRIC_SOURCE_NAME
   override val metricRegistry: MetricRegistry = new MetricRegistry
 }
 
+/**
+ * Metric source for Flint index-specific metrics.
+ */
+class FlintIndexMetricSource extends Source {
+  override val sourceName: String = FlintMetricSource.FLINT_INDEX_METRIC_SOURCE_NAME
+  override val metricRegistry: MetricRegistry = new MetricRegistry
+}
+
 object FlintMetricSource {
   val FLINT_METRIC_SOURCE_NAME = "Flint" // Default source name
+  val FLINT_INDEX_METRIC_SOURCE_NAME = "FlintIndex" // Index specific source name
 }
diff --git a/flint-core/src/main/scala/org/opensearch/flint/core/FlintOptions.java b/flint-core/src/main/scala/org/opensearch/flint/core/FlintOptions.java
index e505cf45d..6ddc6ae9c 100644
--- a/flint-core/src/main/scala/org/opensearch/flint/core/FlintOptions.java
+++ b/flint-core/src/main/scala/org/opensearch/flint/core/FlintOptions.java
@@ -105,8 +105,11 @@ public class FlintOptions implements Serializable {
 
   public static final String DEFAULT_SUPPORT_SHARD = "true";
 
+  private static final String UNKNOWN = "UNKNOWN";
+
   public static final String BULK_REQUEST_RATE_LIMIT_PER_NODE = "bulkRequestRateLimitPerNode";
   public static final String DEFAULT_BULK_REQUEST_RATE_LIMIT_PER_NODE = "0";
+  public static final String DEFAULT_EXTERNAL_SCHEDULER_INTERVAL = "5 minutes";
 
   public FlintOptions(Map<String, String> options) {
     this.options = options;
@@ -185,9 +188,9 @@ public String getDataSourceName() {
    * @return the AWS accountId
    */
   public String getAWSAccountId() {
-    String clusterName = System.getenv().getOrDefault("FLINT_CLUSTER_NAME", "");
+    String clusterName = System.getenv().getOrDefault("FLINT_CLUSTER_NAME", UNKNOWN + ":" + UNKNOWN);
     String[] parts = clusterName.split(":");
-    return parts.length == 2 ? parts[0] : "";
+    return parts.length == 2 ? parts[0] : UNKNOWN;
   }
 
   public String getSystemIndexName() {
diff --git a/flint-core/src/test/java/org/opensearch/flint/core/metrics/MetricsUtilTest.java b/flint-core/src/test/java/org/opensearch/flint/core/metrics/MetricsUtilTest.java
index 3b8940536..b54269ce0 100644
--- a/flint-core/src/test/java/org/opensearch/flint/core/metrics/MetricsUtilTest.java
+++ b/flint-core/src/test/java/org/opensearch/flint/core/metrics/MetricsUtilTest.java
@@ -5,6 +5,8 @@
 import com.codahale.metrics.Timer;
 import org.apache.spark.SparkEnv;
 import org.apache.spark.metrics.source.FlintMetricSource;
+import org.apache.spark.metrics.source.FlintIndexMetricSource;
+import org.apache.spark.metrics.source.Source;
 import org.junit.Test;
 import org.junit.jupiter.api.Assertions;
 import org.mockito.MockedStatic;
@@ -26,55 +28,73 @@ public class MetricsUtilTest {
 
     @Test
     public void testIncrementDecrementCounter() {
+        testIncrementDecrementCounterHelper(false);
+    }
+
+    @Test
+    public void testIncrementDecrementCounterForIndexMetrics() {
+        testIncrementDecrementCounterHelper(true);
+    }
+
+    private void testIncrementDecrementCounterHelper(boolean isIndexMetric) {
         try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
             // Mock SparkEnv
             SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
             sparkEnvMock.when(SparkEnv::get).thenReturn(sparkEnv);
 
-            // Mock FlintMetricSource
-            FlintMetricSource flintMetricSource = Mockito.spy(new FlintMetricSource());
-            when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
-                    .thenReturn(flintMetricSource);
+            // Mock appropriate MetricSource
+            String sourceName = isIndexMetric ? FlintMetricSource.FLINT_INDEX_METRIC_SOURCE_NAME() : FlintMetricSource.FLINT_METRIC_SOURCE_NAME();
+            Source metricSource = isIndexMetric ? Mockito.spy(new FlintIndexMetricSource()) : Mockito.spy(new FlintMetricSource());
+            when(sparkEnv.metricsSystem().getSourcesByName(sourceName).head()).thenReturn(metricSource);
 
             // Test the methods
             String testMetric = "testPrefix.2xx.count";
-            MetricsUtil.incrementCounter(testMetric);
-            MetricsUtil.incrementCounter(testMetric);
-            MetricsUtil.decrementCounter(testMetric);
+            MetricsUtil.incrementCounter(testMetric, isIndexMetric);
+            MetricsUtil.incrementCounter(testMetric, isIndexMetric);
+            MetricsUtil.decrementCounter(testMetric, isIndexMetric);
 
             // Verify interactions
             verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
-            verify(flintMetricSource, times(3)).metricRegistry();
-            Counter counter = flintMetricSource.metricRegistry().getCounters().get(testMetric);
+            verify(metricSource, times(3)).metricRegistry();
+            Counter counter = metricSource.metricRegistry().getCounters().get(testMetric);
             Assertions.assertNotNull(counter);
-            Assertions.assertEquals(counter.getCount(), 1);
+            Assertions.assertEquals(1, counter.getCount());
         }
     }
 
     @Test
     public void testStartStopTimer() {
+        testStartStopTimerHelper(false);
+    }
+
+    @Test
+    public void testStartStopTimerForIndexMetrics() {
+        testStartStopTimerHelper(true);
+    }
+
+    private void testStartStopTimerHelper(boolean isIndexMetric) {
         try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
             // Mock SparkEnv
             SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
             sparkEnvMock.when(SparkEnv::get).thenReturn(sparkEnv);
 
-            // Mock FlintMetricSource
-            FlintMetricSource flintMetricSource = Mockito.spy(new FlintMetricSource());
-            when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
-                    .thenReturn(flintMetricSource);
+            // Mock appropriate MetricSource
+            String sourceName = isIndexMetric ? FlintMetricSource.FLINT_INDEX_METRIC_SOURCE_NAME() : FlintMetricSource.FLINT_METRIC_SOURCE_NAME();
+            Source metricSource = isIndexMetric ? Mockito.spy(new FlintIndexMetricSource()) : Mockito.spy(new FlintMetricSource());
+            when(sparkEnv.metricsSystem().getSourcesByName(sourceName).head()).thenReturn(metricSource);
 
             // Test the methods
             String testMetric = "testPrefix.processingTime";
-            Timer.Context context = MetricsUtil.getTimerContext(testMetric);
+            Timer.Context context = MetricsUtil.getTimerContext(testMetric, isIndexMetric);
             TimeUnit.MILLISECONDS.sleep(500);
             MetricsUtil.stopTimer(context);
 
             // Verify interactions
             verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
-            verify(flintMetricSource, times(1)).metricRegistry();
-            Timer timer = flintMetricSource.metricRegistry().getTimers().get(testMetric);
+            verify(metricSource, times(1)).metricRegistry();
+            Timer timer = metricSource.metricRegistry().getTimers().get(testMetric);
             Assertions.assertNotNull(timer);
-            Assertions.assertEquals(timer.getCount(), 1L);
+            Assertions.assertEquals(1L, timer.getCount());
             assertEquals(1.9, timer.getMeanRate(), 0.1);
         } catch (InterruptedException e) {
             throw new RuntimeException(e);
@@ -82,33 +102,71 @@ public void testStartStopTimer() {
     }
 
     @Test
-    public void testRegisterGaugeWhenMetricRegistryIsAvailable() {
+    public void testRegisterGauge() {
+        testRegisterGaugeHelper(false);
+    }
+
+    @Test
+    public void testRegisterGaugeForIndexMetrics() {
+        testRegisterGaugeHelper(true);
+    }
+
+    private void testRegisterGaugeHelper(boolean isIndexMetric) {
         try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
             // Mock SparkEnv
             SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
             sparkEnvMock.when(SparkEnv::get).thenReturn(sparkEnv);
 
-            // Mock FlintMetricSource
-            FlintMetricSource flintMetricSource = Mockito.spy(new FlintMetricSource());
-            when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
-                    .thenReturn(flintMetricSource);
+            // Mock appropriate MetricSource
+            String sourceName = isIndexMetric ? FlintMetricSource.FLINT_INDEX_METRIC_SOURCE_NAME() : FlintMetricSource.FLINT_METRIC_SOURCE_NAME();
+            Source metricSource = isIndexMetric ? Mockito.spy(new FlintIndexMetricSource()) : Mockito.spy(new FlintMetricSource());
+            when(sparkEnv.metricsSystem().getSourcesByName(sourceName).head()).thenReturn(metricSource);
 
             // Setup gauge
             AtomicInteger testValue = new AtomicInteger(1);
             String gaugeName = "test.gauge";
-            MetricsUtil.registerGauge(gaugeName, testValue);
+            MetricsUtil.registerGauge(gaugeName, testValue, isIndexMetric);
 
             verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
-            verify(flintMetricSource, times(1)).metricRegistry();
+            verify(metricSource, times(1)).metricRegistry();
 
-            Gauge gauge = flintMetricSource.metricRegistry().getGauges().get(gaugeName);
+            Gauge gauge = metricSource.metricRegistry().getGauges().get(gaugeName);
             Assertions.assertNotNull(gauge);
-            Assertions.assertEquals(gauge.getValue(), 1);
+            Assertions.assertEquals(1, gauge.getValue());
 
             testValue.incrementAndGet();
             testValue.incrementAndGet();
             testValue.decrementAndGet();
-            Assertions.assertEquals(gauge.getValue(), 2);
+            Assertions.assertEquals(2, gauge.getValue());
+        }
+    }
+
+    @Test
+    public void testDefaultBehavior() {
+        try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
+            // Mock SparkEnv
+            SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
+            sparkEnvMock.when(SparkEnv::get).thenReturn(sparkEnv);
+
+            // Mock FlintMetricSource
+            FlintMetricSource flintMetricSource = Mockito.spy(new FlintMetricSource());
+            when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
+                    .thenReturn(flintMetricSource);
+
+            // Test default behavior (non-index metrics)
+            String testCountMetric = "testDefault.count";
+            String testTimerMetric = "testDefault.time";
+            String testGaugeMetric = "testDefault.gauge";
+            MetricsUtil.incrementCounter(testCountMetric);
+            MetricsUtil.getTimerContext(testTimerMetric);
+            MetricsUtil.registerGauge(testGaugeMetric, new AtomicInteger(0), false);
+
+            // Verify interactions
+            verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
+            verify(flintMetricSource, times(3)).metricRegistry();
+            Assertions.assertNotNull(flintMetricSource.metricRegistry().getCounters().get(testCountMetric));
+            Assertions.assertNotNull(flintMetricSource.metricRegistry().getTimers().get(testTimerMetric));
+            Assertions.assertNotNull(flintMetricSource.metricRegistry().getGauges().get(testGaugeMetric));
         }
     }
 }
\ No newline at end of file
diff --git a/flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala b/flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala
index 43dc43ad0..68721d235 100644
--- a/flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala
+++ b/flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala
@@ -174,7 +174,7 @@ object FlintSparkConf {
   val EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD =
     FlintConfig("spark.flint.job.externalScheduler.interval")
       .doc("Interval threshold in minutes for external scheduler to trigger index refresh")
-      .createWithDefault("5 minutes")
+      .createWithDefault(FlintOptions.DEFAULT_EXTERNAL_SCHEDULER_INTERVAL)
 
   val CHECKPOINT_LOCATION_ROOT_DIR = FlintConfig("spark.flint.index.checkpointLocation.rootDir")
     .doc("Root directory of a user specified checkpoint location for index refresh")
@@ -294,8 +294,10 @@ case class FlintSparkConf(properties: JMap[String, String]) extends Serializable
 
   def isExternalSchedulerEnabled: Boolean = EXTERNAL_SCHEDULER_ENABLED.readFrom(reader).toBoolean
 
-  def externalSchedulerIntervalThreshold(): String =
-    EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD.readFrom(reader)
+  def externalSchedulerIntervalThreshold(): String = {
+    val value = EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD.readFrom(reader)
+    if (value.trim.isEmpty) FlintOptions.DEFAULT_EXTERNAL_SCHEDULER_INTERVAL else value
+  }
 
   def checkpointLocationRootDir: Option[String] = CHECKPOINT_LOCATION_ROOT_DIR.readFrom(reader)
 
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSpark.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSpark.scala
index 72039bddf..779b7e013 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSpark.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSpark.scala
@@ -23,7 +23,7 @@ import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex
 import org.opensearch.flint.spark.mv.FlintSparkMaterializedView
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.RefreshMode._
-import org.opensearch.flint.spark.scheduler.{AsyncQuerySchedulerBuilder, FlintSparkJobSchedulingService}
+import org.opensearch.flint.spark.scheduler.{AsyncQuerySchedulerBuilder, FlintSparkJobExternalSchedulingService, FlintSparkJobInternalSchedulingService, FlintSparkJobSchedulingService}
 import org.opensearch.flint.spark.scheduler.AsyncQuerySchedulerBuilder.AsyncQuerySchedulerAction
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingStrategy.SkippingKindSerializer
@@ -225,17 +225,22 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
    */
   def updateIndex(index: FlintSparkIndex): Option[String] = {
     val indexName = index.name()
-    validateUpdateAllowed(
-      describeIndex(indexName)
-        .getOrElse(throw new IllegalStateException(s"Index $indexName doesn't exist"))
-        .options,
-      index.options)
+    val originalOptions = describeIndex(indexName)
+      .getOrElse(throw new IllegalStateException(s"Index $indexName doesn't exist"))
+      .options
+    validateUpdateAllowed(originalOptions, index.options)
 
+    val isSchedulerModeChanged =
+      index.options.isExternalSchedulerEnabled() != originalOptions.isExternalSchedulerEnabled()
     withTransaction[Option[String]](indexName, "Update Flint index") { tx =>
-      // Relies on validation to forbid auto-to-auto and manual-to-manual updates
-      index.options.autoRefresh() match {
-        case true => updateIndexManualToAuto(index, tx)
-        case false => updateIndexAutoToManual(index, tx)
+      // Relies on validation to prevent:
+      // 1. auto-to-auto updates besides scheduler_mode
+      // 2. any manual-to-manual updates
+      // 3. both refresh_mode and scheduler_mode updated
+      (index.options.autoRefresh(), isSchedulerModeChanged) match {
+        case (true, true) => updateSchedulerMode(index, tx)
+        case (true, false) => updateIndexManualToAuto(index, tx)
+        case (false, false) => updateIndexAutoToManual(index, tx)
       }
     }
   }
@@ -325,19 +330,29 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
   def recoverIndex(indexName: String): Boolean =
     withTransaction[Boolean](indexName, "Recover Flint index") { tx =>
       val index = describeIndex(indexName)
+
       if (index.exists(_.options.autoRefresh())) {
+        val updatedIndex = FlintSparkIndexFactory.createWithDefaultOptions(index.get).get
+        FlintSparkIndexRefresh
+          .create(updatedIndex.name(), updatedIndex)
+          .validate(spark)
+        val jobSchedulingService = FlintSparkJobSchedulingService.create(
+          updatedIndex,
+          spark,
+          flintAsyncQueryScheduler,
+          flintSparkConf,
+          flintIndexMonitor)
         tx
           .initialLog(latest => Set(ACTIVE, REFRESHING, FAILED).contains(latest.state))
           .transientLog(latest =>
             latest.copy(state = RECOVERING, createTime = System.currentTimeMillis()))
           .finalLog(latest => {
-            flintIndexMonitor.startMonitor(indexName)
-            latest.copy(state = REFRESHING)
+            latest.copy(state = jobSchedulingService.stateTransitions.finalStateForUpdate)
           })
           .commit(_ => {
-            FlintSparkIndexRefresh
-              .create(indexName, index.get)
-              .start(spark, flintSparkConf)
+            flintIndexMetadataService.updateIndexMetadata(indexName, updatedIndex.metadata())
+            logInfo("Update index options complete")
+            jobSchedulingService.handleJob(updatedIndex, AsyncQuerySchedulerAction.UPDATE)
             true
           })
       } else {
@@ -430,37 +445,78 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
   private def validateUpdateAllowed(
       originalOptions: FlintSparkIndexOptions,
       updatedOptions: FlintSparkIndexOptions): Unit = {
-    // auto_refresh must change
-    if (updatedOptions.autoRefresh() == originalOptions.autoRefresh()) {
-      throw new IllegalArgumentException("auto_refresh option must be updated")
-    }
+    val isAutoRefreshChanged = updatedOptions.autoRefresh() != originalOptions.autoRefresh()
 
-    val refreshMode = (updatedOptions.autoRefresh(), updatedOptions.incrementalRefresh()) match {
-      case (true, false) => AUTO
-      case (false, false) => FULL
-      case (false, true) => INCREMENTAL
+    val changedOptions = updatedOptions.options.filterNot { case (k, v) =>
+      originalOptions.options.get(k).contains(v)
+    }.keySet
+
+    if (changedOptions.isEmpty) {
+      throw new IllegalArgumentException("No index option updated")
     }
 
-    // validate allowed options depending on refresh mode
-    val allowedOptionNames = refreshMode match {
-      case FULL => Set(AUTO_REFRESH, INCREMENTAL_REFRESH)
-      case AUTO | INCREMENTAL =>
-        Set(
+    // Validate based on auto_refresh state and changes
+    (isAutoRefreshChanged, updatedOptions.autoRefresh()) match {
+      case (true, true) =>
+        // Changing from manual to auto refresh
+        if (updatedOptions.incrementalRefresh()) {
+          throw new IllegalArgumentException(
+            "Altering index to auto refresh while incremental refresh remains true")
+        }
+
+        val allowedOptions = Set(
           AUTO_REFRESH,
           INCREMENTAL_REFRESH,
           SCHEDULER_MODE,
           REFRESH_INTERVAL,
           CHECKPOINT_LOCATION,
           WATERMARK_DELAY)
+        validateChangedOptions(changedOptions, allowedOptions, s"Altering index to auto refresh")
+      case (true, false) =>
+        val allowedOptions = if (updatedOptions.incrementalRefresh()) {
+          // Changing from auto refresh to incremental refresh
+          Set(
+            AUTO_REFRESH,
+            INCREMENTAL_REFRESH,
+            REFRESH_INTERVAL,
+            CHECKPOINT_LOCATION,
+            WATERMARK_DELAY)
+        } else {
+          // Changing from auto refresh to full refresh
+          Set(AUTO_REFRESH)
+        }
+        validateChangedOptions(
+          changedOptions,
+          allowedOptions,
+          "Altering index to full/incremental refresh")
+
+      case (false, true) =>
+        // original refresh_mode is auto, only allow changing scheduler_mode
+        validateChangedOptions(
+          changedOptions,
+          Set(SCHEDULER_MODE),
+          "Altering index when auto_refresh remains true")
+
+      case (false, false) =>
+        // original refresh_mode is full/incremental, not allowed to change any options
+        if (changedOptions.nonEmpty) {
+          throw new IllegalArgumentException(
+            "No options can be updated when auto_refresh remains false")
+        }
     }
+  }
 
-    // Get the changed option names
-    val updateOptionNames = updatedOptions.options.filterNot { case (k, v) =>
-      originalOptions.options.get(k).contains(v)
-    }.keys
-    if (!updateOptionNames.forall(allowedOptionNames.map(_.toString).contains)) {
+  private def validateChangedOptions(
+      changedOptions: Set[String],
+      allowedOptions: Set[OptionName],
+      context: String): Unit = {
+
+    val allowedOptionStrings = allowedOptions.map(_.toString)
+
+    if (!changedOptions.subsetOf(allowedOptionStrings)) {
+      val invalidOptions = changedOptions -- allowedOptionStrings
       throw new IllegalArgumentException(
-        s"Altering index to ${refreshMode} refresh only allows options: ${allowedOptionNames}")
+        s"$context only allows changing: $allowedOptions. Invalid options: $invalidOptions")
     }
   }
 
@@ -477,9 +533,12 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
       flintIndexMonitor)
     tx
       .initialLog(latest =>
-        latest.state == REFRESHING && latest.entryVersion == indexLogEntry.entryVersion)
+        // Index in external scheduler mode should be in active or refreshing state
+        Set(jobSchedulingService.stateTransitions.initialStateForUnschedule).contains(
+          latest.state) && latest.entryVersion == indexLogEntry.entryVersion)
       .transientLog(latest => latest.copy(state = UPDATING))
-      .finalLog(latest => latest.copy(state = ACTIVE))
+      .finalLog(latest =>
+        latest.copy(state = jobSchedulingService.stateTransitions.finalStateForUnschedule))
       .commit(_ => {
         flintIndexMetadataService.updateIndexMetadata(indexName, index.metadata)
         logInfo("Update index options complete")
@@ -501,13 +560,11 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
       flintIndexMonitor)
     tx
       .initialLog(latest =>
-        latest.state == ACTIVE && latest.entryVersion == indexLogEntry.entryVersion)
+        latest.state == jobSchedulingService.stateTransitions.initialStateForUpdate && latest.entryVersion == indexLogEntry.entryVersion)
       .transientLog(latest =>
         latest.copy(state = UPDATING, createTime = System.currentTimeMillis()))
       .finalLog(latest => {
-        logInfo("Scheduling index state monitor")
-        flintIndexMonitor.startMonitor(indexName)
-        latest.copy(state = REFRESHING)
+        latest.copy(state = jobSchedulingService.stateTransitions.finalStateForUpdate)
       })
       .commit(_ => {
         flintIndexMetadataService.updateIndexMetadata(indexName, index.metadata)
@@ -515,4 +572,36 @@ class FlintSpark(val spark: SparkSession) extends FlintSparkTransactionSupport w
         jobSchedulingService.handleJob(index, AsyncQuerySchedulerAction.UPDATE)
       })
   }
+
+  private def updateSchedulerMode(
+      index: FlintSparkIndex,
+      tx: OptimisticTransaction[Option[String]]): Option[String] = {
+    val indexName = index.name
+    val indexLogEntry = index.latestLogEntry.get
+    val internalSchedulingService =
+      new FlintSparkJobInternalSchedulingService(spark, flintIndexMonitor)
+    val externalSchedulingService =
+      new FlintSparkJobExternalSchedulingService(flintAsyncQueryScheduler, flintSparkConf)
+
+    val isExternal = index.options.isExternalSchedulerEnabled()
+    val (initialState, finalState, oldService, newService) =
+      if (isExternal) {
+        (REFRESHING, ACTIVE, internalSchedulingService, externalSchedulingService)
+      } else {
+        (ACTIVE, REFRESHING, externalSchedulingService, internalSchedulingService)
+      }
+
+    tx
+      .initialLog(latest =>
+        latest.state == initialState && latest.entryVersion == indexLogEntry.entryVersion)
+      .transientLog(latest => latest.copy(state = UPDATING))
+      .finalLog(latest => latest.copy(state = finalState))
+      .commit(_ => {
+        flintIndexMetadataService.updateIndexMetadata(indexName, index.metadata)
+        logInfo("Update index options complete")
+        oldService.handleJob(index, AsyncQuerySchedulerAction.UNSCHEDULE)
+        logInfo(s"Unscheduled ${if (isExternal) "internal" else "external"} jobs")
+        newService.handleJob(index, AsyncQuerySchedulerAction.UPDATE)
+      })
+  }
 }
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexBuilder.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexBuilder.scala
index afd53724e..0391741cf 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexBuilder.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexBuilder.scala
@@ -5,20 +5,14 @@
 
 package org.opensearch.flint.spark
 
-import java.util.Collections
-
 import scala.collection.JavaConverters.mapAsJavaMapConverter
 
-import org.opensearch.flint.spark.FlintSparkIndexOptions.OptionName.{CHECKPOINT_LOCATION, REFRESH_INTERVAL, SCHEDULER_MODE}
-import org.opensearch.flint.spark.FlintSparkIndexOptions.empty
+import org.opensearch.flint.spark.FlintSparkIndexOptions.{empty, updateOptionsWithDefaults}
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh
-import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.SchedulerMode
-import org.opensearch.flint.spark.scheduler.util.IntervalSchedulerParser
 
 import org.apache.spark.sql.catalog.Column
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.flint.{findField, loadTable, parseTableName, qualifyTableName}
-import org.apache.spark.sql.flint.config.FlintSparkConf
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
@@ -156,60 +150,4 @@ abstract class FlintSparkIndexBuilder(flint: FlintSpark) {
       isPartition = false, // useless for now so just set to false
       isBucket = false)
   }
-
-  /**
-   * Updates the options with a default values for Create and Alter index.
-   *
-   * @param indexName
-   *   The index name string
-   * @param options
-   *   The original FlintSparkIndexOptions
-   * @return
-   *   Updated FlintSparkIndexOptions
-   */
-  private def updateOptionsWithDefaults(
-      indexName: String,
-      options: FlintSparkIndexOptions): FlintSparkIndexOptions = {
-    val flintSparkConf = new FlintSparkConf(Collections.emptyMap[String, String])
-
-    val updatedOptions =
-      new scala.collection.mutable.HashMap[String, String]() ++= options.options
-
-    // Add checkpoint location if not present
-    options.checkpointLocation(indexName, flintSparkConf).foreach { location =>
-      updatedOptions += (CHECKPOINT_LOCATION.toString -> location)
-    }
-
-    // Update scheduler mode and refresh interval only if auto refresh is enabled
-    if (!options.autoRefresh()) {
-      return FlintSparkIndexOptions(updatedOptions.toMap)
-    }
-
-    val externalSchedulerEnabled = flintSparkConf.isExternalSchedulerEnabled
-    val thresholdInterval =
-      IntervalSchedulerParser.parse(flintSparkConf.externalSchedulerIntervalThreshold())
-    val currentInterval = options.refreshInterval().map(IntervalSchedulerParser.parse)
-
-    (
-      externalSchedulerEnabled,
-      currentInterval,
-      updatedOptions.get(SCHEDULER_MODE.toString)) match {
-      case (true, Some(interval), _) if interval.getInterval >= thresholdInterval.getInterval =>
-        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.EXTERNAL.toString)
-      case (true, None, Some("external")) =>
-        updatedOptions += (REFRESH_INTERVAL.toString -> flintSparkConf
-          .externalSchedulerIntervalThreshold())
-      case (true, None, None) =>
-        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.EXTERNAL.toString)
-        updatedOptions += (REFRESH_INTERVAL.toString -> flintSparkConf
-          .externalSchedulerIntervalThreshold())
-      case (false, _, Some("external")) =>
-        throw new IllegalArgumentException(
-          "External scheduler mode spark conf is not enabled but refresh interval is set to external scheduler mode")
-      case _ =>
-        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.INTERNAL.toString)
-    }
-
-    FlintSparkIndexOptions(updatedOptions.toMap)
-  }
 }
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexFactory.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexFactory.scala
index 6c34e00e1..78636d992 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexFactory.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexFactory.scala
@@ -7,6 +7,7 @@ package org.opensearch.flint.spark
 
 import java.util.Collections
 
+import scala.collection.JavaConverters.mapAsJavaMapConverter
 import scala.collection.JavaConverters.mapAsScalaMapConverter
 
 import org.opensearch.flint.common.metadata.FlintMetadata
@@ -49,6 +50,26 @@ object FlintSparkIndexFactory extends Logging {
     }
   }
 
+  /**
+   * Creates Flint index with default options.
+   *
+   * @param index
+   *   Flint index
+   * @param metadata
+   *   Flint metadata
+   * @return
+   *   Flint index with default options
+   */
+  def createWithDefaultOptions(index: FlintSparkIndex): Option[FlintSparkIndex] = {
+    val originalOptions = index.options
+    val updatedOptions =
+      FlintSparkIndexOptions.updateOptionsWithDefaults(index.name(), originalOptions)
+    val updatedMetadata = index
+      .metadata()
+      .copy(options = updatedOptions.options.mapValues(_.asInstanceOf[AnyRef]).asJava)
+    this.create(updatedMetadata)
+  }
+
   private def doCreate(metadata: FlintMetadata): FlintSparkIndex = {
     val indexOptions = FlintSparkIndexOptions(
       metadata.options.asScala.mapValues(_.asInstanceOf[String]).toMap)
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexOptions.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexOptions.scala
index e73e07d79..4bfc50c55 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexOptions.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndexOptions.scala
@@ -5,14 +5,16 @@
 
 package org.opensearch.flint.spark
 
-import java.util.UUID
+import java.util.{Collections, UUID}
 
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.native.JsonMethods._
 import org.json4s.native.Serialization
+import org.opensearch.flint.core.logging.CustomLogging.logInfo
 import org.opensearch.flint.spark.FlintSparkIndexOptions.OptionName.{AUTO_REFRESH, CHECKPOINT_LOCATION, EXTRA_OPTIONS, INCREMENTAL_REFRESH, INDEX_SETTINGS, OptionName, OUTPUT_MODE, REFRESH_INTERVAL, SCHEDULER_MODE, WATERMARK_DELAY}
 import org.opensearch.flint.spark.FlintSparkIndexOptions.validateOptionNames
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.SchedulerMode
+import org.opensearch.flint.spark.scheduler.util.IntervalSchedulerParser
 
 import org.apache.spark.sql.flint.config.FlintSparkConf
 
@@ -201,4 +203,64 @@ object FlintSparkIndexOptions {
 
     require(invalidOptions.isEmpty, s"option name ${invalidOptions.mkString(",")} is invalid")
   }
+
+  /**
+   * Updates the options with default values.
+   *
+   * @param indexName
+   *   The index name string
+   * @param options
+   *   The original FlintSparkIndexOptions
+   * @return
+   *   Updated FlintSparkIndexOptions
+   */
+  def updateOptionsWithDefaults(
+      indexName: String,
+      options: FlintSparkIndexOptions): FlintSparkIndexOptions = {
+    val flintSparkConf = new FlintSparkConf(Collections.emptyMap[String, String])
+
+    val updatedOptions =
+      new scala.collection.mutable.HashMap[String, String]() ++= options.options
+
+    // Add checkpoint location if not present
+    options.checkpointLocation(indexName, flintSparkConf).foreach { location =>
+      updatedOptions += (CHECKPOINT_LOCATION.toString -> location)
+    }
+
+    // Update scheduler mode and refresh interval only if auto refresh is enabled
+    if (!options.autoRefresh()) {
+      return FlintSparkIndexOptions(updatedOptions.toMap)
+    }
+
+    val externalSchedulerEnabled = flintSparkConf.isExternalSchedulerEnabled
+    val thresholdInterval =
+      IntervalSchedulerParser.parse(flintSparkConf.externalSchedulerIntervalThreshold())
+    val currentInterval = options.refreshInterval().map(IntervalSchedulerParser.parse)
+    (
+      externalSchedulerEnabled,
+      currentInterval.isDefined,
+      updatedOptions.get(SCHEDULER_MODE.toString)) match {
+      case (true, true, None | Some("external"))
+          if currentInterval.get.getInterval >= thresholdInterval.getInterval =>
+        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.EXTERNAL.toString)
+      case (true, true, Some("external"))
+          if currentInterval.get.getInterval < thresholdInterval.getInterval =>
+        throw new IllegalArgumentException(
+          s"Input refresh_interval is ${options.refreshInterval().get}, required above the interval threshold of external scheduler: ${flintSparkConf
+              .externalSchedulerIntervalThreshold()}")
+      case (true, false, Some("external")) =>
+        updatedOptions += (REFRESH_INTERVAL.toString -> flintSparkConf
+          .externalSchedulerIntervalThreshold())
+      case (true, false, None) =>
+        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.EXTERNAL.toString)
+        updatedOptions += (REFRESH_INTERVAL.toString -> flintSparkConf
+          .externalSchedulerIntervalThreshold())
+      case (false, _, Some("external")) =>
+        throw new IllegalArgumentException(
+          "spark.flint.job.externalScheduler.enabled is false but refresh interval is set to external scheduler mode")
+      case _ =>
+        updatedOptions += (SCHEDULER_MODE.toString -> SchedulerMode.INTERNAL.toString)
+    }
+    FlintSparkIndexOptions(updatedOptions.toMap)
+  }
 }
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/IncrementalIndexRefresh.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/IncrementalIndexRefresh.scala
index 98f0d838f..f675df75a 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/IncrementalIndexRefresh.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/IncrementalIndexRefresh.scala
@@ -7,6 +7,7 @@ package org.opensearch.flint.spark.refresh
 
 import org.opensearch.flint.spark.{FlintSparkIndex, FlintSparkValidationHelper}
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.RefreshMode.{INCREMENTAL, RefreshMode}
+import org.opensearch.flint.spark.refresh.util.RefreshMetricsAspect
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.flint.config.FlintSparkConf
@@ -19,9 +20,10 @@ import org.apache.spark.sql.flint.config.FlintSparkConf
  * @param index
  *   Flint index
  */
-class IncrementalIndexRefresh(indexName: String, index: FlintSparkIndex)
+class IncrementalIndexRefresh(val indexName: String, index: FlintSparkIndex)
     extends FlintSparkIndexRefresh
-    with FlintSparkValidationHelper {
+    with FlintSparkValidationHelper
+    with RefreshMetricsAspect {
 
   override def refreshMode: RefreshMode = INCREMENTAL
 
@@ -43,15 +45,21 @@ class IncrementalIndexRefresh(indexName: String, index: FlintSparkIndex)
   override def start(spark: SparkSession, flintSparkConf: FlintSparkConf): Option[String] = {
     logInfo(s"Start refreshing index $indexName in incremental mode")
 
-    // Reuse auto refresh which uses AvailableNow trigger and will stop once complete
-    val jobId =
-      new AutoIndexRefresh(indexName, index)
-        .start(spark, flintSparkConf)
+    val clientId = flintSparkConf.flintOptions().getAWSAccountId()
+    val dataSource = flintSparkConf.flintOptions().getDataSourceName()
 
-    // Blocks the calling thread until the streaming query finishes
-    spark.streams
-      .get(jobId.get)
-      .awaitTermination()
-    None
+    withMetrics(clientId, dataSource, indexName, "incrementalRefresh") {
+      // Reuse auto refresh which uses AvailableNow trigger and will stop once complete
+      val jobId =
+        new AutoIndexRefresh(indexName, index)
+          .start(spark, flintSparkConf)
+
+      // Blocks the calling thread until the streaming query finishes
+      spark.streams
+        .get(jobId.get)
+        .awaitTermination()
+
+      None
+    }
   }
 }
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsAspect.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsAspect.scala
new file mode 100644
index 000000000..c5832e01c
--- /dev/null
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsAspect.scala
@@ -0,0 +1,62 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.refresh.util
+
+import org.opensearch.flint.core.metrics.MetricsUtil
+
+/**
+ * A trait that provides aspect-oriented metrics functionality for refresh operations.
+ *
+ * This trait can be mixed into classes that need to track metrics for various operations,
+ * particularly those related to index refreshing. It provides a method to wrap operations with
+ * metric collection, including timing and success/failure counting.
+ */
+trait RefreshMetricsAspect {
+
+  /**
+   * Wraps an operation with metric collection.
+   *
+   * @param clientId
+   *   The ID of the client performing the operation
+   * @param dataSource
+   *   The name of the data source being used
+   * @param indexName
+   *   The name of the index being operated on
+   * @param metricPrefix
+   *   The prefix for the metrics (e.g., "incrementalRefresh")
+   * @param block
+   *   The operation to be performed and measured
+   * @return
+   *   The result of the operation
+   *
+   * This method will:
+   *   1. Start a timer for the operation 2. Execute the provided operation 3. Increment a success
+   *      or failure counter based on the outcome 4. Stop the timer 5. Return the result of the
+   *      operation or throw any exception that occurred
+   */
+  def withMetrics(clientId: String, dataSource: String, indexName: String, metricPrefix: String)(
+      block: => Option[String]): Option[String] = {
+    val refreshMetricsHelper = new RefreshMetricsHelper(clientId, dataSource, indexName)
+
+    val processingTimeMetric = s"$metricPrefix.processingTime"
+    val successMetric = s"$metricPrefix.success.count"
+    val failedMetric = s"$metricPrefix.failed.count"
+
+    val timerContext = refreshMetricsHelper.getTimerContext(processingTimeMetric)
+
+    try {
+      val result = block
+      refreshMetricsHelper.incrementCounter(successMetric)
+      result
+    } catch {
+      case e: Exception =>
+        refreshMetricsHelper.incrementCounter(failedMetric)
+        throw e
+    } finally {
+      MetricsUtil.stopTimer(timerContext)
+    }
+  }
+}
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsHelper.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsHelper.scala
new file mode 100644
index 000000000..4b91b0be2
--- /dev/null
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/refresh/util/RefreshMetricsHelper.scala
@@ -0,0 +1,75 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.refresh.util
+
+import com.codahale.metrics.Timer
+import org.opensearch.flint.core.metrics.MetricsUtil
+
+/**
+ * Helper class for constructing dimensioned metric names used in refresh operations.
+ */
+class RefreshMetricsHelper(clientId: String, dataSource: String, indexName: String) {
+  private val isIndexMetric = true
+
+  /**
+   * Increments a counter metric with the specified dimensioned name.
+   *
+   * @param metricName
+   *   The name of the metric to increment
+   */
+  def incrementCounter(metricName: String): Unit = {
+    MetricsUtil.incrementCounter(
+      RefreshMetricsHelper.constructDimensionedMetricName(
+        metricName,
+        clientId,
+        dataSource,
+        indexName),
+      isIndexMetric)
+  }
+
+  /**
+   * Gets a timer context for the specified metric name.
+   *
+   * @param metricName
+   *   The name of the metric
+   * @return
+   *   A Timer.Context object
+   */
+  def getTimerContext(metricName: String): Timer.Context = {
+    MetricsUtil.getTimerContext(
+      RefreshMetricsHelper.constructDimensionedMetricName(
+        metricName,
+        clientId,
+        dataSource,
+        indexName),
+      isIndexMetric)
+  }
+}
+
+object RefreshMetricsHelper {
+
+  /**
+   * Constructs a dimensioned metric name for external scheduler request count.
+   *
+   * @param metricName
+   *   The name of the metric
+   * @param clientId
+   *   The ID of the client making the request
+   * @param dataSource
+   *   The data source being used
+   * @param indexName
+   *   The name of the index being refreshed
+   * @return
+   *   A formatted string representing the dimensioned metric name
+   */
+  private def constructDimensionedMetricName(
+      metricName: String,
+      clientId: String,
+      dataSource: String,
+      indexName: String): String = {
+    s"${metricName}[clientId##${clientId},dataSource##${dataSource},indexName##${indexName}]"
+  }
+}
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/AsyncQuerySchedulerBuilder.java b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/AsyncQuerySchedulerBuilder.java
index 9865081c8..3620608b0 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/AsyncQuerySchedulerBuilder.java
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/AsyncQuerySchedulerBuilder.java
@@ -30,13 +30,13 @@ public enum AsyncQuerySchedulerAction {
 
   public static AsyncQueryScheduler build(FlintOptions options) {
     String className = options.getCustomAsyncQuerySchedulerClass();
-    logger.info("Attempting to instantiate AsyncQueryScheduler with class name: {}", className);
 
     if (className.isEmpty()) {
       return new OpenSearchAsyncQueryScheduler(options);
     }
 
     // Attempts to instantiate AsyncQueryScheduler using reflection
+    logger.info("Attempting to instantiate AsyncQueryScheduler with class name: {}", className);
     try {
       Class<?> asyncQuerySchedulerClass = Class.forName(className);
       Constructor<?> constructor = asyncQuerySchedulerClass.getConstructor();
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobExternalSchedulingService.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobExternalSchedulingService.scala
index 87226d99f..d043746c0 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobExternalSchedulingService.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobExternalSchedulingService.scala
@@ -7,9 +7,12 @@ package org.opensearch.flint.spark.scheduler
 
 import java.time.Instant
 
+import org.opensearch.flint.common.metadata.log.FlintMetadataLogEntry.IndexState
 import org.opensearch.flint.common.scheduler.AsyncQueryScheduler
 import org.opensearch.flint.common.scheduler.model.{AsyncQuerySchedulerRequest, LangType}
+import org.opensearch.flint.core.storage.OpenSearchClientUtils
 import org.opensearch.flint.spark.FlintSparkIndex
+import org.opensearch.flint.spark.refresh.util.RefreshMetricsAspect
 import org.opensearch.flint.spark.scheduler.AsyncQuerySchedulerBuilder.AsyncQuerySchedulerAction
 import org.opensearch.flint.spark.scheduler.util.RefreshQueryGenerator
 
@@ -32,45 +35,56 @@ class FlintSparkJobExternalSchedulingService(
     flintAsyncQueryScheduler: AsyncQueryScheduler,
     flintSparkConf: FlintSparkConf)
     extends FlintSparkJobSchedulingService
+    with RefreshMetricsAspect
     with Logging {
 
+  override val stateTransitions: StateTransitions = StateTransitions(
+    initialStateForUpdate = IndexState.ACTIVE,
+    finalStateForUpdate = IndexState.ACTIVE,
+    initialStateForUnschedule = IndexState.ACTIVE,
+    finalStateForUnschedule = IndexState.ACTIVE)
+
   override def handleJob(
       index: FlintSparkIndex,
       action: AsyncQuerySchedulerAction): Option[String] = {
     val dataSource = flintSparkConf.flintOptions().getDataSourceName()
     val clientId = flintSparkConf.flintOptions().getAWSAccountId()
-    val indexName = index.name()
+    // This is to make sure jobId is consistent with the index name
+    val indexName = OpenSearchClientUtils.sanitizeIndexName(index.name())
 
     logInfo(s"handleAsyncQueryScheduler invoked: $action")
 
-    val baseRequest = AsyncQuerySchedulerRequest
-      .builder()
-      .accountId(clientId)
-      .jobId(indexName)
-      .dataSource(dataSource)
+    withMetrics(clientId, dataSource, indexName, "externalScheduler") {
+      val baseRequest = AsyncQuerySchedulerRequest
+        .builder()
+        .accountId(clientId)
+        .jobId(indexName)
+        .dataSource(dataSource)
 
-    val request = action match {
-      case AsyncQuerySchedulerAction.SCHEDULE | AsyncQuerySchedulerAction.UPDATE =>
-        val currentTime = Instant.now()
-        baseRequest
-          .scheduledQuery(RefreshQueryGenerator.generateRefreshQuery(index))
-          .queryLang(LangType.SQL)
-          .interval(index.options.refreshInterval().get)
-          .enabled(true)
-          .enabledTime(currentTime)
-          .lastUpdateTime(currentTime)
-          .build()
-      case _ => baseRequest.build()
-    }
+      val request = action match {
+        case AsyncQuerySchedulerAction.SCHEDULE | AsyncQuerySchedulerAction.UPDATE =>
+          val currentTime = Instant.now()
+          baseRequest
+            .scheduledQuery(RefreshQueryGenerator.generateRefreshQuery(index))
+            .queryLang(LangType.SQL)
+            .interval(index.options.refreshInterval().get)
+            .enabled(true)
+            .enabledTime(currentTime)
+            .lastUpdateTime(currentTime)
+            .build()
+        case _ => baseRequest.build()
+      }
 
-    action match {
-      case AsyncQuerySchedulerAction.SCHEDULE => flintAsyncQueryScheduler.scheduleJob(request)
-      case AsyncQuerySchedulerAction.UPDATE => flintAsyncQueryScheduler.updateJob(request)
-      case AsyncQuerySchedulerAction.UNSCHEDULE => flintAsyncQueryScheduler.unscheduleJob(request)
-      case AsyncQuerySchedulerAction.REMOVE => flintAsyncQueryScheduler.removeJob(request)
-      case _ => throw new IllegalArgumentException(s"Unsupported action: $action")
-    }
+      action match {
+        case AsyncQuerySchedulerAction.SCHEDULE => flintAsyncQueryScheduler.scheduleJob(request)
+        case AsyncQuerySchedulerAction.UPDATE => flintAsyncQueryScheduler.updateJob(request)
+        case AsyncQuerySchedulerAction.UNSCHEDULE =>
+          flintAsyncQueryScheduler.unscheduleJob(request)
+        case AsyncQuerySchedulerAction.REMOVE => flintAsyncQueryScheduler.removeJob(request)
+        case _ => throw new IllegalArgumentException(s"Unsupported action: $action")
+      }
 
-    None // Return None for all cases
+      None // Return None for all cases
+    }
   }
 }
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobInternalSchedulingService.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobInternalSchedulingService.scala
index ab22941bb..d22eff2c9 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobInternalSchedulingService.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobInternalSchedulingService.scala
@@ -7,6 +7,7 @@ package org.opensearch.flint.spark.scheduler
 
 import scala.collection.JavaConverters.mapAsJavaMapConverter
 
+import org.opensearch.flint.common.metadata.log.FlintMetadataLogEntry.IndexState
 import org.opensearch.flint.spark.{FlintSparkIndex, FlintSparkIndexMonitor}
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh
 import org.opensearch.flint.spark.scheduler.AsyncQuerySchedulerBuilder.AsyncQuerySchedulerAction
@@ -33,6 +34,12 @@ class FlintSparkJobInternalSchedulingService(
     extends FlintSparkJobSchedulingService
     with Logging {
 
+  override val stateTransitions: StateTransitions = StateTransitions(
+    initialStateForUpdate = IndexState.ACTIVE,
+    finalStateForUpdate = IndexState.REFRESHING,
+    initialStateForUnschedule = IndexState.REFRESHING,
+    finalStateForUnschedule = IndexState.ACTIVE)
+
   /**
    * Handles job-related actions for a given Flint Spark index.
    *
@@ -52,7 +59,7 @@ class FlintSparkJobInternalSchedulingService(
     action match {
       case AsyncQuerySchedulerAction.SCHEDULE => None // No-op
       case AsyncQuerySchedulerAction.UPDATE =>
-        logInfo("Updating index state monitor")
+        logInfo("Scheduling index state monitor")
         flintIndexMonitor.startMonitor(indexName)
         startRefreshingJob(index)
       case AsyncQuerySchedulerAction.UNSCHEDULE =>
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobSchedulingService.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobSchedulingService.scala
index 40ef9fcbe..6e25d8a8c 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobSchedulingService.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/FlintSparkJobSchedulingService.scala
@@ -5,6 +5,7 @@
 
 package org.opensearch.flint.spark.scheduler
 
+import org.opensearch.flint.common.metadata.log.FlintMetadataLogEntry.IndexState.IndexState
 import org.opensearch.flint.common.scheduler.AsyncQueryScheduler
 import org.opensearch.flint.spark.{FlintSparkIndex, FlintSparkIndexMonitor}
 import org.opensearch.flint.spark.scheduler.AsyncQuerySchedulerBuilder.AsyncQuerySchedulerAction
@@ -17,6 +18,14 @@ import org.apache.spark.sql.flint.config.FlintSparkConf
  */
 trait FlintSparkJobSchedulingService {
 
+  case class StateTransitions(
+      initialStateForUpdate: IndexState,
+      finalStateForUpdate: IndexState,
+      initialStateForUnschedule: IndexState,
+      finalStateForUnschedule: IndexState)
+
+  val stateTransitions: StateTransitions
+
   /**
    * Handles a job action for a given Flint Spark index.
    *
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGenerator.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGenerator.scala
index 510e0b9d5..86363b252 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGenerator.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGenerator.scala
@@ -6,6 +6,7 @@
 package org.opensearch.flint.spark.scheduler.util
 
 import org.opensearch.flint.spark.FlintSparkIndex
+import org.opensearch.flint.spark.FlintSparkIndex.quotedTableName
 import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex
 import org.opensearch.flint.spark.mv.FlintSparkMaterializedView
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex
@@ -25,11 +26,11 @@ object RefreshQueryGenerator {
   def generateRefreshQuery(index: FlintSparkIndex): String = {
     index match {
       case skippingIndex: FlintSparkSkippingIndex =>
-        s"REFRESH SKIPPING INDEX ON ${skippingIndex.tableName}"
+        s"REFRESH SKIPPING INDEX ON ${quotedTableName(skippingIndex.tableName)}"
       case coveringIndex: FlintSparkCoveringIndex =>
-        s"REFRESH INDEX ${coveringIndex.indexName} ON ${coveringIndex.tableName}"
+        s"REFRESH INDEX ${coveringIndex.indexName} ON ${quotedTableName(coveringIndex.tableName)}"
       case materializedView: FlintSparkMaterializedView =>
-        s"REFRESH MATERIALIZED VIEW ${materializedView.mvName}"
+        s"REFRESH MATERIALIZED VIEW ${quotedTableName(materializedView.mvName)}"
       case _ =>
         throw new IllegalArgumentException(
           s"Unsupported index type: ${index.getClass.getSimpleName}")
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/sql/mv/FlintSparkMaterializedViewAstBuilder.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/sql/mv/FlintSparkMaterializedViewAstBuilder.scala
index ede5379a1..8f3aa9917 100644
--- a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/sql/mv/FlintSparkMaterializedViewAstBuilder.scala
+++ b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/sql/mv/FlintSparkMaterializedViewAstBuilder.scala
@@ -10,7 +10,6 @@ import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
 import org.antlr.v4.runtime.tree.RuleNode
 import org.opensearch.flint.spark.FlintSpark
 import org.opensearch.flint.spark.mv.FlintSparkMaterializedView
-import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.SchedulerMode
 import org.opensearch.flint.spark.sql.{FlintSparkSqlCommand, FlintSparkSqlExtensionsVisitor, SparkSqlAstBuilder}
 import org.opensearch.flint.spark.sql.FlintSparkSqlAstBuilder.{getFullTableName, getSqlText, IndexBelongsTo}
 import org.opensearch.flint.spark.sql.FlintSparkSqlExtensionsParser._
diff --git a/flint-spark-integration/src/test/scala/org/apache/spark/sql/flint/config/FlintSparkConfSuite.scala b/flint-spark-integration/src/test/scala/org/apache/spark/sql/flint/config/FlintSparkConfSuite.scala
index 1a164a9f2..0cde6ab0f 100644
--- a/flint-spark-integration/src/test/scala/org/apache/spark/sql/flint/config/FlintSparkConfSuite.scala
+++ b/flint-spark-integration/src/test/scala/org/apache/spark/sql/flint/config/FlintSparkConfSuite.scala
@@ -114,6 +114,19 @@ class FlintSparkConfSuite extends FlintSuite {
     }
   }
 
+  test("externalSchedulerIntervalThreshold should return default value when empty") {
+    val options = FlintSparkConf(Map("spark.flint.job.externalScheduler.interval" -> "").asJava)
+    assert(options
+      .externalSchedulerIntervalThreshold() === FlintOptions.DEFAULT_EXTERNAL_SCHEDULER_INTERVAL)
+  }
+
+  test("externalSchedulerIntervalThreshold should return configured value when set") {
+    val configuredValue = "30"
+    val options =
+      FlintSparkConf(Map("spark.flint.job.externalScheduler.interval" -> configuredValue).asJava)
+    assert(options.externalSchedulerIntervalThreshold() === configuredValue)
+  }
+
   /**
    * Delete index `indexNames` after calling `f`.
    */
diff --git a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/FlintSparkIndexBuilderSuite.scala b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/FlintSparkIndexBuilderSuite.scala
index a2ec85df9..063c32074 100644
--- a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/FlintSparkIndexBuilderSuite.scala
+++ b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/FlintSparkIndexBuilderSuite.scala
@@ -7,13 +7,20 @@ package org.opensearch.flint.spark
 
 import org.opensearch.flint.spark.FlintSparkIndexOptions.OptionName.{CHECKPOINT_LOCATION, REFRESH_INTERVAL, SCHEDULER_MODE}
 import org.opensearch.flint.spark.refresh.FlintSparkIndexRefresh.SchedulerMode
+import org.scalatest.Inspectors.forAll
+import org.scalatest.matchers.should.Matchers
 import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 import org.scalatest.matchers.should.Matchers.not.include
+import org.scalatest.prop.TableDrivenPropertyChecks
+import org.scalatest.wordspec.AnyWordSpec
 
 import org.apache.spark.FlintSuite
 import org.apache.spark.sql.flint.config.FlintSparkConf
 
-class FlintSparkIndexBuilderSuite extends FlintSuite {
+class FlintSparkIndexBuilderSuite
+    extends FlintSuite
+    with Matchers
+    with TableDrivenPropertyChecks {
 
   val indexName: String = "test_index"
   val testCheckpointLocation = "/test/checkpoints/"
@@ -143,71 +150,148 @@ class FlintSparkIndexBuilderSuite extends FlintSuite {
     }
   }
 
-  test(
-    "updateOptionsWithDefaults should set internal scheduler mode when auto refresh is false") {
-    val options = FlintSparkIndexOptions(Map("auto_refresh" -> "false"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val updatedOptions = builder.options(options, indexName).testOptions
-    updatedOptions.options.get(SCHEDULER_MODE.toString) shouldBe None
-  }
-
-  test(
-    "updateOptionsWithDefaults should set internal scheduler mode when external scheduler is disabled") {
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, false)
-    val options = FlintSparkIndexOptions(Map("auto_refresh" -> "true"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val updatedOptions = builder.options(options, indexName).testOptions
-    updatedOptions.options(SCHEDULER_MODE.toString) shouldBe SchedulerMode.INTERNAL.toString
-  }
-
-  test(
-    "updateOptionsWithDefaults should set external scheduler mode when interval is above threshold") {
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, true)
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD, "5 minutes")
-    val options =
-      FlintSparkIndexOptions(Map("auto_refresh" -> "true", "refresh_interval" -> "10 minutes"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val updatedOptions = builder.options(options, indexName).testOptions
-    updatedOptions.options(SCHEDULER_MODE.toString) shouldBe SchedulerMode.EXTERNAL.toString
-  }
-
-  test(
-    "updateOptionsWithDefaults should set external scheduler mode and default interval when no interval is provided") {
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, true)
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD, "5 minutes")
-    val options = FlintSparkIndexOptions(Map("auto_refresh" -> "true"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val updatedOptions = builder.options(options, indexName).testOptions
-    updatedOptions.options(SCHEDULER_MODE.toString) shouldBe SchedulerMode.EXTERNAL.toString
-    updatedOptions.options(REFRESH_INTERVAL.toString) shouldBe "5 minutes"
-  }
-
-  test("updateOptionsWithDefaults should set external scheduler mode when explicitly specified") {
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, true)
-    val options =
-      FlintSparkIndexOptions(Map("auto_refresh" -> "true", "scheduler_mode" -> "external"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val updatedOptions = builder.options(options, indexName).testOptions
-    updatedOptions.options(SCHEDULER_MODE.toString) shouldBe SchedulerMode.EXTERNAL.toString
-  }
-
-  test(
-    "updateOptionsWithDefaults should throw exception when external scheduler is disabled but mode is external") {
-    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, false)
-    val options =
-      FlintSparkIndexOptions(Map("auto_refresh" -> "true", "scheduler_mode" -> "external"))
-    val builder = new FakeFlintSparkIndexBuilder
-
-    val exception = intercept[IllegalArgumentException] {
-      builder.options(options, indexName)
+  test("updateOptionsWithDefaults scenarios") {
+    val scenarios = Table(
+      (
+        "testName",
+        "externalSchedulerEnabled",
+        "thresholdInterval",
+        "inputOptions",
+        "expectedMode",
+        "expectedInterval",
+        "expectedException"),
+      (
+        "set internal mode when auto refresh is false",
+        false,
+        "5 minutes",
+        Map("auto_refresh" -> "false"),
+        None,
+        None,
+        None),
+      (
+        "set internal mode when external scheduler is disabled",
+        false,
+        "5 minutes",
+        Map("auto_refresh" -> "true"),
+        Some(SchedulerMode.INTERNAL.toString),
+        None,
+        None),
+      (
+        "set external mode when interval is above threshold",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true", "refresh_interval" -> "10 minutes"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        Some("10 minutes"),
+        None),
+      (
+        "set external mode and default interval when no interval provided",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        Some("5 minutes"),
+        None),
+      (
+        "set external mode when explicitly specified",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true", "scheduler_mode" -> "external"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        None,
+        None),
+      (
+        "throw exception when external scheduler disabled but mode is external",
+        false,
+        "5 minutes",
+        Map("auto_refresh" -> "true", "scheduler_mode" -> "external"),
+        None,
+        None,
+        Some(
+          "spark.flint.job.externalScheduler.enabled is false but refresh interval is set to external scheduler mode")),
+      (
+        "set external mode when interval above threshold and no mode specified",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true", "refresh_interval" -> "10 minutes"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        Some("10 minutes"),
+        None),
+      (
+        "throw exception when interval below threshold but mode is external",
+        true,
+        "5 minutes",
+        Map(
+          "auto_refresh" -> "true",
+          "refresh_interval" -> "1 minute",
+          "scheduler_mode" -> "external"),
+        None,
+        None,
+        Some("Input refresh_interval is 1 minute, required above the interval threshold")),
+      (
+        "set external mode when interval above threshold and mode specified",
+        true,
+        "5 minutes",
+        Map(
+          "auto_refresh" -> "true",
+          "refresh_interval" -> "10 minute",
+          "scheduler_mode" -> "external"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        None,
+        None),
+      (
+        "set default interval when mode is external but no interval provided",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true", "scheduler_mode" -> "external"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        Some("5 minutes"),
+        None),
+      (
+        "set external mode when external scheduler enabled but no mode or interval specified",
+        true,
+        "5 minutes",
+        Map("auto_refresh" -> "true"),
+        Some(SchedulerMode.EXTERNAL.toString),
+        None,
+        None))
+
+    forAll(scenarios) {
+      (
+          testName,
+          externalSchedulerEnabled,
+          thresholdInterval,
+          inputOptions,
+          expectedMode,
+          expectedInterval,
+          expectedException) =>
+        withClue(s"Scenario: $testName - ") {
+          setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, externalSchedulerEnabled)
+          setFlintSparkConf(
+            FlintSparkConf.EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD,
+            thresholdInterval)
+
+          val options = FlintSparkIndexOptions(inputOptions)
+          val builder = new FakeFlintSparkIndexBuilder
+
+          expectedException match {
+            case Some(exceptionMessage) =>
+              val exception = intercept[IllegalArgumentException] {
+                builder.options(options, indexName)
+              }
+              exception.getMessage should include(exceptionMessage)
+
+            case None =>
+              val updatedOptions = builder.options(options, indexName).testOptions
+              expectedMode.foreach { mode =>
+                updatedOptions.options(SCHEDULER_MODE.toString) shouldBe mode
+              }
+              expectedInterval.foreach { interval =>
+                updatedOptions.options(REFRESH_INTERVAL.toString) shouldBe interval
+              }
+          }
+        }
     }
-    exception.getMessage should include(
-      "External scheduler mode is not enabled in the configuration")
   }
 
   override def afterEach(): Unit = {
diff --git a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGeneratorSuite.scala b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGeneratorSuite.scala
index 1cd83c38d..0d154e407 100644
--- a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGeneratorSuite.scala
+++ b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/scheduler/util/RefreshQueryGeneratorSuite.scala
@@ -6,6 +6,7 @@
 package org.opensearch.flint.spark.scheduler.util;
 
 import org.mockito.Mockito._
+import org.opensearch.flint.common.metadata.FlintMetadata
 import org.opensearch.flint.spark.FlintSparkIndex
 import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex
 import org.opensearch.flint.spark.mv.FlintSparkMaterializedView
@@ -16,33 +17,42 @@ import org.apache.spark.SparkFunSuite
 
 class RefreshQueryGeneratorTest extends SparkFunSuite with Matchers {
 
+  val testTable = "dummy.default.testTable"
+  val expectedTableName = "dummy.default.`testTable`"
+
+  val mockMetadata = mock(classOf[FlintMetadata])
+
   test("generateRefreshQuery should return correct query for FlintSparkSkippingIndex") {
     val mockIndex = mock(classOf[FlintSparkSkippingIndex])
-    when(mockIndex.tableName).thenReturn("testTable")
+    when(mockIndex.metadata()).thenReturn(mockMetadata)
+    when(mockIndex.tableName).thenReturn(testTable)
 
     val result = RefreshQueryGenerator.generateRefreshQuery(mockIndex)
-    result shouldBe "REFRESH SKIPPING INDEX ON testTable"
+    result shouldBe s"REFRESH SKIPPING INDEX ON ${expectedTableName}"
   }
 
   test("generateRefreshQuery should return correct query for FlintSparkCoveringIndex") {
     val mockIndex = mock(classOf[FlintSparkCoveringIndex])
     when(mockIndex.indexName).thenReturn("testIndex")
-    when(mockIndex.tableName).thenReturn("testTable")
+    when(mockIndex.tableName).thenReturn(testTable)
 
     val result = RefreshQueryGenerator.generateRefreshQuery(mockIndex)
-    result shouldBe "REFRESH INDEX testIndex ON testTable"
+    result shouldBe s"REFRESH INDEX testIndex ON ${expectedTableName}"
   }
 
   test("generateRefreshQuery should return correct query for FlintSparkMaterializedView") {
     val mockIndex = mock(classOf[FlintSparkMaterializedView])
-    when(mockIndex.mvName).thenReturn("testMV")
+    when(mockIndex.metadata()).thenReturn(mockMetadata)
+    when(mockIndex.mvName).thenReturn(testTable)
 
     val result = RefreshQueryGenerator.generateRefreshQuery(mockIndex)
-    result shouldBe "REFRESH MATERIALIZED VIEW testMV"
+    result shouldBe s"REFRESH MATERIALIZED VIEW ${expectedTableName}"
   }
 
   test("generateRefreshQuery should throw IllegalArgumentException for unsupported index type") {
     val mockIndex = mock(classOf[FlintSparkIndex])
+    when(mockIndex.metadata()).thenReturn(mockMetadata)
+    when(mockIndex.metadata().source).thenReturn(testTable)
 
     val exception = intercept[IllegalArgumentException] {
       RefreshQueryGenerator.generateRefreshQuery(mockIndex)
diff --git a/integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala b/integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala
index 57277440e..11bc7271c 100644
--- a/integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala
+++ b/integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala
@@ -23,6 +23,7 @@ import org.scalatest.matchers.must.Matchers.{contain, defined}
 import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 
 import org.apache.spark.sql.flint.FlintDataSourceV2.FLINT_DATASOURCE
+import org.apache.spark.sql.flint.config.FlintSparkConf
 import org.apache.spark.sql.flint.config.FlintSparkConf._
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.streaming.StreamingQueryListener._
@@ -209,6 +210,50 @@ class FlintJobITSuite extends FlintSparkSuite with JobTest {
     }
   }
 
+  test("create skipping index with invalid refresh interval") {
+    setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, "true")
+
+    val query =
+      s"""
+         | CREATE SKIPPING INDEX ON $testTable
+         | (
+         |   year PARTITION,
+         |   name VALUE_SET,
+         |   age MIN_MAX
+         | )
+         | WITH (auto_refresh = true, refresh_interval = '2 minutes', scheduler_mode = 'external')
+         | """.stripMargin
+    val queryStartTime = System.currentTimeMillis()
+    val jobRunId = "00ff4o3b5091080t"
+    threadLocalFuture.set(startJob(query, jobRunId))
+
+    val validation: REPLResult => Boolean = result => {
+      assert(
+        result.results.size == 0,
+        s"expected result size is 0, but got ${result.results.size}")
+      assert(
+        result.schemas.size == 0,
+        s"expected schema size is 0, but got ${result.schemas.size}")
+
+      assert(result.status == "FAILED", s"expected status is FAILED, but got ${result.status}")
+      assert(!result.error.isEmpty, s"we expect error, but got ${result.error}")
+
+      // Check for the specific error message
+      assert(
+        result.error.contains(
+          "Input refresh_interval is 2 minutes, required above the interval threshold of external scheduler: 5 minutes"),
+        s"Expected error message about invalid refresh interval, but got: ${result.error}")
+
+      commonAssert(result, jobRunId, query, queryStartTime)
+      true
+    }
+    pollForResultAndAssert(validation, jobRunId)
+
+    // Ensure no streaming job was started
+    assert(spark.streams.active.isEmpty, "No streaming job should have been started")
+    conf.unsetConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED.key)
+  }
+
   test("create skipping index with auto refresh and streaming job early exit") {
     // Custom listener to force streaming job to fail at the beginning
     val listener = new StreamingQueryListener {
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkCoveringIndexITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkCoveringIndexITSuite.scala
index 54d3ba6dc..378131eb0 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkCoveringIndexITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkCoveringIndexITSuite.scala
@@ -15,6 +15,7 @@ import org.opensearch.client.RequestOptions
 import org.opensearch.flint.common.FlintVersion.current
 import org.opensearch.flint.core.FlintOptions
 import org.opensearch.flint.core.storage.{FlintOpenSearchIndexMetadataService, OpenSearchClientUtils}
+import org.opensearch.flint.spark.FlintSparkIndex.quotedTableName
 import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex.getFlintIndexName
 import org.opensearch.flint.spark.scheduler.OpenSearchAsyncQueryScheduler
 import org.scalatest.matchers.must.Matchers.{contain, defined}
@@ -194,7 +195,8 @@ class FlintSparkCoveringIndexITSuite extends FlintSparkSuite {
       val sourceMap = response.getSourceAsMap
 
       sourceMap.get("jobId") shouldBe testFlintIndex
-      sourceMap.get("scheduledQuery") shouldBe s"REFRESH INDEX $testIndex ON $testTable"
+      sourceMap
+        .get("scheduledQuery") shouldBe s"REFRESH INDEX $testIndex ON ${quotedTableName(testTable)}"
       sourceMap.get("enabled") shouldBe true
       sourceMap.get("queryLang") shouldBe "sql"
 
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewITSuite.scala
index c00e982e0..c2f0f9101 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewITSuite.scala
@@ -16,6 +16,7 @@ import org.opensearch.client.RequestOptions
 import org.opensearch.flint.common.FlintVersion.current
 import org.opensearch.flint.core.FlintOptions
 import org.opensearch.flint.core.storage.{FlintOpenSearchIndexMetadataService, OpenSearchClientUtils}
+import org.opensearch.flint.spark.FlintSparkIndex.quotedTableName
 import org.opensearch.flint.spark.FlintSparkIndexOptions.OptionName.CHECKPOINT_LOCATION
 import org.opensearch.flint.spark.mv.FlintSparkMaterializedView.getFlintIndexName
 import org.opensearch.flint.spark.scheduler.OpenSearchAsyncQueryScheduler
@@ -365,7 +366,8 @@ class FlintSparkMaterializedViewITSuite extends FlintSparkSuite {
       val sourceMap = response.getSourceAsMap
 
       sourceMap.get("jobId") shouldBe testFlintIndex
-      sourceMap.get("scheduledQuery") shouldBe s"REFRESH MATERIALIZED VIEW $testMvName"
+      sourceMap
+        .get("scheduledQuery") shouldBe s"REFRESH MATERIALIZED VIEW ${quotedTableName(testMvName)}"
       sourceMap.get("enabled") shouldBe true
       sourceMap.get("queryLang") shouldBe "sql"
 
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewSqlITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewSqlITSuite.scala
index f569bf123..9e75078d2 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewSqlITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewSqlITSuite.scala
@@ -47,6 +47,8 @@ class FlintSparkMaterializedViewSqlITSuite extends FlintSparkSuite {
     super.afterEach()
     deleteTestIndex(testFlintIndex)
     sql(s"DROP TABLE $testTable")
+    conf.unsetConf(FlintSparkConf.CUSTOM_FLINT_SCHEDULER_CLASS.key)
+    conf.unsetConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED.key)
   }
 
   test("create materialized view with auto refresh") {
@@ -119,8 +121,6 @@ class FlintSparkMaterializedViewSqlITSuite extends FlintSparkSuite {
 
       // Drop index with test scheduler
       sql(s"DROP MATERIALIZED VIEW $testMvName")
-      conf.unsetConf(FlintSparkConf.CUSTOM_FLINT_SCHEDULER_CLASS.key)
-      conf.unsetConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED.key)
     }
   }
 
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSkippingIndexITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSkippingIndexITSuite.scala
index b535173e3..a2a7c9799 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSkippingIndexITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSkippingIndexITSuite.scala
@@ -16,7 +16,7 @@ import org.opensearch.client.RequestOptions
 import org.opensearch.flint.common.FlintVersion.current
 import org.opensearch.flint.core.FlintOptions
 import org.opensearch.flint.core.storage.{FlintOpenSearchIndexMetadataService, OpenSearchClientUtils}
-import org.opensearch.flint.spark.FlintSparkIndex.ID_COLUMN
+import org.opensearch.flint.spark.FlintSparkIndex.{quotedTableName, ID_COLUMN}
 import org.opensearch.flint.spark.scheduler.OpenSearchAsyncQueryScheduler
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingFileIndex
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex.getSkippingIndexName
@@ -338,7 +338,8 @@ class FlintSparkSkippingIndexITSuite extends FlintSparkSuite {
       val sourceMap = response.getSourceAsMap
 
       sourceMap.get("jobId") shouldBe testIndex
-      sourceMap.get("scheduledQuery") shouldBe s"REFRESH SKIPPING INDEX ON $testTable"
+      sourceMap
+        .get("scheduledQuery") shouldBe s"REFRESH SKIPPING INDEX ON ${quotedTableName(testTable)}"
       sourceMap.get("enabled") shouldBe true
       sourceMap.get("queryLang") shouldBe "sql"
 
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkUpdateIndexITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkUpdateIndexITSuite.scala
index 7bbf24567..53889045f 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkUpdateIndexITSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkUpdateIndexITSuite.scala
@@ -15,6 +15,8 @@ import org.opensearch.index.reindex.DeleteByQueryRequest
 import org.scalatest.matchers.must.Matchers._
 import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 
+import org.apache.spark.sql.flint.config.FlintSparkConf
+
 class FlintSparkUpdateIndexITSuite extends FlintSparkSuite {
 
   /** Test table and index name */
@@ -32,6 +34,7 @@ class FlintSparkUpdateIndexITSuite extends FlintSparkSuite {
     // Delete all test indices
     deleteTestIndex(testIndex)
     sql(s"DROP TABLE $testTable")
+    conf.unsetConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED.key)
   }
 
   test("update index with index options successfully") {
@@ -177,6 +180,121 @@ class FlintSparkUpdateIndexITSuite extends FlintSparkSuite {
     }
   }
 
+  // Test update options validation failure with external scheduler
+  Seq(
+    (
+      "update index without changing index option",
+      Seq(
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("auto_refresh" -> "true")),
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("checkpoint_location" -> "s3a://test/")),
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"))),
+      "No index option updated"),
+    (
+      "update index option when auto_refresh is false",
+      Seq(
+        (
+          Map.empty[String, String],
+          Map("auto_refresh" -> "false", "checkpoint_location" -> "s3a://test/")),
+        (
+          Map.empty[String, String],
+          Map("incremental_refresh" -> "true", "checkpoint_location" -> "s3a://test/")),
+        (Map.empty[String, String], Map("checkpoint_location" -> "s3a://test/"))),
+      "No options can be updated when auto_refresh remains false"),
+    (
+      "update other index option besides scheduler_mode when auto_refresh is true",
+      Seq(
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("watermark_delay" -> "1 Minute"))),
+      "Altering index when auto_refresh remains true only allows changing: Set(scheduler_mode). Invalid options"),
+    (
+      "convert to full refresh with disallowed options",
+      Seq(
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("auto_refresh" -> "false", "scheduler_mode" -> "internal")),
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("auto_refresh" -> "false", "refresh_interval" -> "5 Minute")),
+        (
+          Map("auto_refresh" -> "true", "checkpoint_location" -> "s3a://test/"),
+          Map("auto_refresh" -> "false", "watermark_delay" -> "1 Minute"))),
+      "Altering index to full/incremental refresh only allows changing"),
+    (
+      "convert to auto refresh with disallowed options",
+      Seq(
+        (
+          Map.empty[String, String],
+          Map(
+            "auto_refresh" -> "true",
+            "output_mode" -> "complete",
+            "checkpoint_location" -> "s3a://test/"))),
+      "Altering index to auto refresh only allows changing: Set(auto_refresh, watermark_delay, scheduler_mode, " +
+        "refresh_interval, incremental_refresh, checkpoint_location). Invalid options: Set(output_mode)"),
+    (
+      "convert to invalid refresh mode",
+      Seq(
+        (
+          Map.empty[String, String],
+          Map(
+            "auto_refresh" -> "true",
+            "incremental_refresh" -> "true",
+            "checkpoint_location" -> "s3a://test/"))),
+      "Altering index to auto refresh while incremental refresh remains true"))
+    .foreach { case (testName, testCases, expectedErrorMessage) =>
+      test(s"should fail if $testName and external scheduler enabled") {
+        setFlintSparkConf(FlintSparkConf.EXTERNAL_SCHEDULER_ENABLED, "true")
+        testCases.foreach { case (initialOptionsMap, updateOptionsMap) =>
+          logInfo(s"initialOptionsMap: ${initialOptionsMap}")
+          logInfo(s"updateOptionsMap: ${updateOptionsMap}")
+
+          withTempDir { checkpointDir =>
+            flint
+              .skippingIndex()
+              .onTable(testTable)
+              .addPartitions("year", "month")
+              .options(
+                FlintSparkIndexOptions(
+                  initialOptionsMap
+                    .get("checkpoint_location")
+                    .map(_ =>
+                      initialOptionsMap
+                        .updated("checkpoint_location", checkpointDir.getAbsolutePath))
+                    .getOrElse(initialOptionsMap)),
+                testIndex)
+              .create()
+            flint.refreshIndex(testIndex)
+
+            val index = flint.describeIndex(testIndex).get
+            val exception = the[IllegalArgumentException] thrownBy {
+              val updatedIndex = flint
+                .skippingIndex()
+                .copyWithUpdate(
+                  index,
+                  FlintSparkIndexOptions(
+                    updateOptionsMap
+                      .get("checkpoint_location")
+                      .map(_ =>
+                        updateOptionsMap
+                          .updated("checkpoint_location", checkpointDir.getAbsolutePath))
+                      .getOrElse(updateOptionsMap)))
+              flint.updateIndex(updatedIndex)
+            }
+
+            exception.getMessage should include(expectedErrorMessage)
+
+            deleteTestIndex(testIndex)
+          }
+        }
+      }
+    }
+
   // Test update options validation success
   Seq(
     (

From 0b106f184d8ea8017def8084a7c6ca88b36ba772 Mon Sep 17 00:00:00 2001
From: Louis Chu <clingzhi@amazon.com>
Date: Mon, 14 Oct 2024 13:12:20 -0700
Subject: [PATCH 5/5] Add auth-crt as runtime dependency (#778)

Signed-off-by: Louis Chu <clingzhi@amazon.com>
---
 build.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sbt b/build.sbt
index 0a7b8fcb0..f7653c50c 100644
--- a/build.sbt
+++ b/build.sbt
@@ -88,7 +88,7 @@ lazy val flintCore = (project in file("flint-core"))
         exclude ("com.fasterxml.jackson.core", "jackson-databind"),
       "com.amazonaws" % "aws-java-sdk-cloudwatch" % "1.12.593"
         exclude("com.fasterxml.jackson.core", "jackson-databind"),
-      "software.amazon.awssdk" % "auth-crt" % "2.28.10" % "provided",
+      "software.amazon.awssdk" % "auth-crt" % "2.28.10",
       "org.scalactic" %% "scalactic" % "3.2.15" % "test",
       "org.scalatest" %% "scalatest" % "3.2.15" % "test",
       "org.scalatest" %% "scalatest-flatspec" % "3.2.15" % "test",