diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index b1902aab5f01..d7255d841afb 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -12,7 +12,7 @@ _If medium or high, explain what verification was done to mitigate the risks._
 
 ### Documentation Update
 
-_Describe any necessary documentation update if there is any new feature, config, or user-facing change_
+_Describe any necessary documentation update if there is any new feature, config, or user-facing change. If not, put "none"._
 
 - _The config description must be updated if new configs are added or the default value of the configs are changed_
 - _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the
diff --git a/.github/workflows/azure_ci.js b/.github/workflows/azure_ci.js
new file mode 100644
index 000000000000..737b8db9917d
--- /dev/null
+++ b/.github/workflows/azure_ci.js
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+async function checkAzureCiAndCreateCommitStatus({ github, context, prNumber, latestCommitHash }) {
+  console.log(`- Checking Azure CI status of PR: ${prNumber} ${latestCommitHash}`);
+  const botUsername = 'hudi-bot';
+
+  const comments = await github.paginate(github.rest.issues.listComments, {
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: prNumber,
+    sort: 'updated',
+    direction: 'desc',
+    per_page: 100
+  });
+
+  // Find the latest comment from hudi-bot containing the Azure CI report
+  const botComments = comments.filter(comment => comment.user.login === botUsername);
+
+  let status = 'pending';
+  let message = 'In progress';
+  let azureRunLink = '';
+
+  if (botComments.length > 0) {
+    const lastComment = botComments[0];
+    const reportPrefix = `${latestCommitHash} Azure: `
+    const successReportString = `${reportPrefix}[SUCCESS]`
+    const failureReportString = `${reportPrefix}[FAILURE]`
+
+    if (lastComment.body.includes(reportPrefix)) {
+      if (lastComment.body.includes(successReportString)) {
+        message = 'Successful on the latest commit';
+        status = 'success';
+      } else if (lastComment.body.includes(failureReportString)) {
+        message = 'Failed on the latest commit';
+        status = 'failure';
+      }
+    }
+
+    const linkRegex = /\[[a-zA-Z]+\]\((https?:\/\/[^\s]+)\)/;
+    const parts = lastComment.body.split(reportPrefix);
+    const secondPart = parts.length > 1 ? parts[1] : '';
+    const match = secondPart.match(linkRegex);
+
+    if (match) {
+      azureRunLink = match[1];
+    }
+  }
+
+  console.log(`Status: ${status}`);
+  console.log(`Azure Run Link: ${azureRunLink}`);
+  console.log(`${message}`);
+
+  console.log(`- Create commit status of PR based on Azure CI status: ${prNumber} ${latestCommitHash}`);
+  // Create or update the commit status for Azure CI
+  await github.rest.repos.createCommitStatus({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    sha: latestCommitHash,
+    state: status,
+    target_url: azureRunLink,
+    description: message,
+    context: 'Azure CI'
+  });
+
+  return { status, message, azureRunLink };
+}
+
+module.exports = checkAzureCiAndCreateCommitStatus;
diff --git a/.github/workflows/azure_ci_check.yml b/.github/workflows/azure_ci_check.yml
new file mode 100644
index 000000000000..1e33e6b8fa50
--- /dev/null
+++ b/.github/workflows/azure_ci_check.yml
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Azure CI
+
+on:
+  issue_comment:
+    types: [ created, edited, deleted ]
+
+permissions:
+  statuses: write
+  pull-requests: read
+  issues: read
+
+jobs:
+  check-azure-ci-and-add-commit-status:
+    if: |
+      github.event.issue.pull_request != null &&
+      github.event.comment.user.login == 'hudi-bot'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Check PR state
+        id: check_pr_state
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            const issueNumber = context.issue.number;
+            const { data: pullRequest } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: issueNumber
+            });
+            
+            // Only check open PRs and a PR that is not a HOTFIX
+            const shouldSkip = (pullRequest.body.includes('HOTFIX: SKIP AZURE CI')
+              || pullRequest.state != 'open');
+            
+            if (!shouldSkip) {
+              const commitHash = pullRequest.head.sha;
+              console.log(`Latest commit hash: ${commitHash}`);
+              // Set the output variable to be used in subsequent step
+              core.setOutput("latest_commit_hash", commitHash);
+            }
+            console.log(`Should skip Azure CI? ${shouldSkip}`);
+            return shouldSkip;
+
+      - name: Check Azure CI report and create commit status to PR
+        if: steps.check_pr_state.outputs.result != 'true'
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            const latestCommitHash = '${{ steps.check_pr_state.outputs.latest_commit_hash }}'            
+            const issueNumber = context.issue.number;
+            const checkAzureCiAndCreateCommitStatus = require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/azure_ci.js`);
+            
+            await checkAzureCiAndCreateCommitStatus({
+              github,
+              context,
+              prNumber: issueNumber,
+              latestCommitHash: latestCommitHash
+            });
diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index a31c2e3ea35c..123660b119e3 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -5,6 +5,7 @@ on:
     branches:
       - master
       - 'release-*'
+      - branch-0.x
   pull_request:
     paths-ignore:
       - '**.bmp'
@@ -20,10 +21,11 @@ on:
     branches:
       - master
       - 'release-*'
+      - branch-0.x
 
 concurrency:
   group: ${{ github.ref }}
-  cancel-in-progress: ${{ !contains(github.ref, 'master') }}
+  cancel-in-progress: ${{ !contains(github.ref, 'master') && !contains(github.ref, 'branch-0.x') }}
 
 env:
   MVN_ARGS: -e -ntp -B -V -Dgpg.skip -Djacoco.skip -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5
@@ -38,7 +40,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Check Binary Files
@@ -51,7 +53,7 @@ jobs:
       - name: RAT check
         run: ./scripts/release/validate_source_rat.sh
 
-  test-spark:
+  test-spark-java-tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -90,7 +92,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Build Project
@@ -105,22 +107,87 @@ jobs:
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
         run:
           mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS
-      - name: UT - Common & Spark
+      - name: Java UT - Common & Spark
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
         if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
         run:
-          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
-      - name: FT - Spark
+          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DwildcardSuites=skipScalaTests -DfailIfNoTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: Java FT - Spark
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
         if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
         run:
-          mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+          mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DwildcardSuites=skipScalaTests -DfailIfNoTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+
+  test-spark-scala-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.11"
+            sparkProfile: "spark2.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark2"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.0"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.0.x"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.1"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.1.x"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.2"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.2.x"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.5"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: Build Project
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,$SPARK_COMMON_MODULES,$SPARK_MODULES"
+      - name: Scala UT - Common & Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests -DfailIfNoTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: Scala FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests -DfailIfNoTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
   test-hudi-hadoop-mr-and-hudi-java-client:
     runs-on: ubuntu-latest
@@ -138,7 +205,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Generate Maven Wrapper
@@ -159,7 +226,7 @@ jobs:
         run:
           ./mvnw test -Punit-tests -fae -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -pl hudi-hadoop-mr,hudi-client/hudi-java-client $MVN_ARGS
 
-  test-spark-java17:
+  test-spark-java17-java-tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -180,7 +247,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Build Project
@@ -193,7 +260,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '17'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Quickstart Test
@@ -201,16 +268,16 @@ jobs:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
         run:
-          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS
-      - name: UT - Common & Spark
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DwildcardSuites=skipScalaTests -DfailIfNoTests=false -pl hudi-examples/hudi-examples-spark $MVN_ARGS
+      - name: Java UT - Common & Spark
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
         if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
         run:
-          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
-      - name: FT - Spark
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DwildcardSuites=skipScalaTests -DfailIfNoTests=false -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: Java FT - Spark
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
@@ -219,6 +286,60 @@ jobs:
         run:
           mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
+  test-spark-java17-scala-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.5"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: Build Project
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
+      - name: Set up JDK 17
+        uses: actions/setup-java@v3
+        with:
+          java-version: '17'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: Scala UT - Common & Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests -DfailIfNoTests=false -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: Scala FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests -DfailIfNoTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+
   test-flink:
     runs-on: ubuntu-latest
     strategy:
@@ -235,7 +356,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Build Project
@@ -277,7 +398,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: UT/FT - Docker Test - OpenJDK 17
@@ -326,7 +447,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Build Project
@@ -368,78 +489,6 @@ jobs:
           HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
           ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk17
 
-  validate-release-candidate-bundles:
-    if: false
-    runs-on: ubuntu-latest
-    env:
-      HUDI_VERSION: 0.13.1-rcx
-      STAGING_REPO_NUM: 1123
-    strategy:
-      matrix:
-        include:
-          - flinkProfile: 'flink1.18'
-            sparkProfile: 'spark3'
-            sparkRuntime: 'spark3.5.0'
-          - flinkProfile: 'flink1.18'
-            sparkProfile: 'spark3.5'
-            sparkRuntime: 'spark3.5.0'
-          - flinkProfile: 'flink1.18'
-            sparkProfile: 'spark3.4'
-            sparkRuntime: 'spark3.4.0'
-          - flinkProfile: 'flink1.17'
-            sparkProfile: 'spark3.3'
-            sparkRuntime: 'spark3.3.2'
-          - flinkProfile: 'flink1.16'
-            sparkProfile: 'spark3.3'
-            sparkRuntime: 'spark3.3.1'
-          - flinkProfile: 'flink1.15'
-            sparkProfile: 'spark3.2'
-            sparkRuntime: 'spark3.2.3'
-          - flinkProfile: 'flink1.14'
-            sparkProfile: 'spark3.1'
-            sparkRuntime: 'spark3.1.3'
-          - flinkProfile: 'flink1.14'
-            sparkProfile: 'spark3.0'
-            sparkRuntime: 'spark3.0.2'
-          - flinkProfile: 'flink1.14'
-            sparkProfile: 'spark'
-            sparkRuntime: 'spark2.4.8'
-          - flinkProfile: 'flink1.14'
-            sparkProfile: 'spark2.4'
-            sparkRuntime: 'spark2.4.8'
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up JDK 8
-        uses: actions/setup-java@v3
-        with:
-          java-version: '8'
-          distribution: 'adopt'
-          architecture: x64
-          cache: maven
-      - name: IT - Bundle Validation - OpenJDK 8
-        env:
-          FLINK_PROFILE: ${{ matrix.flinkProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
-        run: |
-          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk8 $STAGING_REPO_NUM
-      - name: IT - Bundle Validation - OpenJDK 11
-        env:
-          FLINK_PROFILE: ${{ matrix.flinkProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
-        if: ${{ startsWith(env.SPARK_PROFILE, 'spark3') }} # Only Spark 3.x supports Java 11 as of now
-        run: |
-          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk11 $STAGING_REPO_NUM
-      - name: IT - Bundle Validation - OpenJDK 17
-        env:
-          FLINK_PROFILE: ${{ matrix.flinkProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
-        if: ${{ endsWith(env.SPARK_PROFILE, '3.3') }} # Only Spark 3.3 supports Java 17 as of now
-        run: |
-          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk17 $STAGING_REPO_NUM
-
   integration-tests:
     runs-on: ubuntu-latest
     strategy:
@@ -453,7 +502,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           java-version: '8'
-          distribution: 'adopt'
+          distribution: 'temurin'
           architecture: x64
           cache: maven
       - name: Build Project
diff --git a/.github/workflows/labeler.js b/.github/workflows/labeler.js
new file mode 100644
index 000000000000..77cd48337fb1
--- /dev/null
+++ b/.github/workflows/labeler.js
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+async function labelDocsPr({ github, context, prNumber }) {
+  await github.rest.issues.addLabels({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: prNumber,
+    labels: ['docs']
+  });
+
+  console.log(`- Labeled Docs PR: ${prNumber}`);
+}
+
+async function labelPrWithSize({ github, context, prNumber, prData }) {
+  console.log(`Label PR based on size: ${prNumber} ${prData.html_url}`);
+  const additions = prData.additions;
+  const deletions = prData.deletions;
+  const totalChanges = additions + deletions;
+
+  let newSizeLabel = "";
+
+  if (totalChanges <= 10) {
+    // size:XS : <= 10 LoC
+    newSizeLabel = "size:XS";
+  } else if (totalChanges <= 100) {
+    // size:S : (10, 100] LoC
+    newSizeLabel = "size:S";
+  } else if (totalChanges <= 300) {
+    // size:M : (100, 300] LoC
+    newSizeLabel = "size:M";
+  } else if (totalChanges <= 1000) {
+    // size:L : (300, 1000] LoC
+    newSizeLabel = "size:L";
+  } else {
+    // size:XL : > 1000 LoC
+    newSizeLabel = "size:XL";
+  }
+
+  // Check existing size label
+  const { data: labels } = await github.rest.issues.listLabelsOnIssue({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: prNumber
+  });
+
+  const existingSizeLabels = labels.filter(label => label.name.startsWith("size:") && label.name !== newSizeLabel);
+  const newSizeLabelInExisting = labels.filter(label => label.name === newSizeLabel);
+
+  // Remove stale labels that do not match the new one
+  for (const label of existingSizeLabels) {
+    await github.rest.issues.removeLabel({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: prNumber,
+      name: label.name,
+    });
+    console.log(`Removed stale size label: ${label.name}`);
+  }
+
+  console.log(`Total lines of changes: ${totalChanges}`);
+
+  // Add the new size label if needed
+  if (newSizeLabelInExisting.length > 0) {
+    console.log(`Accurate size Label already exists: ${newSizeLabel}`);
+  } else {
+    // Add the new label
+    await github.rest.issues.addLabels({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: prNumber,
+      labels: [newSizeLabel]
+    });
+    console.log(`Added size Label: ${newSizeLabel}`);
+  }
+}
+
+module.exports = {
+  labelDocsPr,
+  labelPrWithSize
+};
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
deleted file mode 100644
index d0b809c29587..000000000000
--- a/.github/workflows/labeler.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: Label PR
-
-on: [ pull_request ]
-
-jobs:
-  labeler:
-    runs-on: ubuntu-latest
-    name: Label the PR size
-    steps:
-      - uses: codelytv/pr-size-labeler@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          xs_label: 'size-xs'
-          xs_max_size: '10'
-          s_label: 'size-s'
-          s_max_size: '100'
-          m_label: 'size-m'
-          m_max_size: '500'
-          l_label: 'size-l'
-          l_max_size: '1000'
-          xl_label: 'size-xl'
-          fail_if_xl: 'false'
-          github_api_url: 'api.github.com'
-          files_to_ignore: ''
\ No newline at end of file
diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml
index 3f58ceafcf3d..104a933db7d0 100644
--- a/.github/workflows/pr_compliance.yml
+++ b/.github/workflows/pr_compliance.yml
@@ -4,6 +4,7 @@ on:
     types: [opened, edited, reopened, synchronize]
     branches:
       - master
+      - branch-0.x
 
 jobs:
   validate-pr:
diff --git a/.github/workflows/release_candidate_validation.yml b/.github/workflows/release_candidate_validation.yml
new file mode 100644
index 000000000000..02a598888ea1
--- /dev/null
+++ b/.github/workflows/release_candidate_validation.yml
@@ -0,0 +1,100 @@
+name: Release Candidate Validation
+
+on:
+  push:
+    branches:
+      - 'release-*'
+  pull_request:
+    paths-ignore:
+      - '**.bmp'
+      - '**.gif'
+      - '**.jpg'
+      - '**.jpeg'
+      - '**.md'
+      - '**.pdf'
+      - '**.png'
+      - '**.svg'
+      - '**.yaml'
+      - '.gitignore'
+    branches:
+      - 'release-*'
+
+concurrency:
+  group: ${{ github.ref }}
+  cancel-in-progress: ${{ !contains(github.ref, 'master') }}
+
+env:
+  MVN_ARGS: -e -ntp -B -V -Dgpg.skip -Djacoco.skip -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5
+  SPARK_COMMON_MODULES: hudi-spark-datasource/hudi-spark,hudi-spark-datasource/hudi-spark-common
+
+jobs:
+  validate-release-candidate-bundles:
+    runs-on: ubuntu-latest
+    env:
+      HUDI_VERSION: 0.14.1
+      STAGING_REPO_NUM: 1123
+    strategy:
+      matrix:
+        include:
+          - flinkProfile: 'flink1.18'
+            sparkProfile: 'spark3'
+            sparkRuntime: 'spark3.5.0'
+          - flinkProfile: 'flink1.18'
+            sparkProfile: 'spark3.5'
+            sparkRuntime: 'spark3.5.0'
+          - flinkProfile: 'flink1.18'
+            sparkProfile: 'spark3.4'
+            sparkRuntime: 'spark3.4.0'
+          - flinkProfile: 'flink1.17'
+            sparkProfile: 'spark3.3'
+            sparkRuntime: 'spark3.3.2'
+          - flinkProfile: 'flink1.16'
+            sparkProfile: 'spark3.3'
+            sparkRuntime: 'spark3.3.1'
+          - flinkProfile: 'flink1.15'
+            sparkProfile: 'spark3.2'
+            sparkRuntime: 'spark3.2.3'
+          - flinkProfile: 'flink1.14'
+            sparkProfile: 'spark3.1'
+            sparkRuntime: 'spark3.1.3'
+          - flinkProfile: 'flink1.14'
+            sparkProfile: 'spark3.0'
+            sparkRuntime: 'spark3.0.2'
+          - flinkProfile: 'flink1.14'
+            sparkProfile: 'spark'
+            sparkRuntime: 'spark2.4.8'
+          - flinkProfile: 'flink1.14'
+            sparkProfile: 'spark2.4'
+            sparkRuntime: 'spark2.4.8'
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: IT - Bundle Validation - OpenJDK 8
+        env:
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
+        run: |
+          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk8 $STAGING_REPO_NUM
+      - name: IT - Bundle Validation - OpenJDK 11
+        env:
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
+        if: ${{ startsWith(env.SPARK_PROFILE, 'spark3') }} # Only Spark 3.x supports Java 11 as of now
+        run: |
+          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk11 $STAGING_REPO_NUM
+      - name: IT - Bundle Validation - OpenJDK 17
+        env:
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
+        if: ${{ endsWith(env.SPARK_PROFILE, '3.3') }} # Only Spark 3.3 supports Java 17 as of now
+        run: |
+          ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk17 $STAGING_REPO_NUM
diff --git a/.github/workflows/scheduled_workflow.yml b/.github/workflows/scheduled_workflow.yml
new file mode 100644
index 000000000000..e6992d6b3838
--- /dev/null
+++ b/.github/workflows/scheduled_workflow.yml
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Scheduled Workflow
+
+on:
+  schedule:
+    # Runs every 5 minutes
+    - cron: '*/5 * * * *'
+
+permissions:
+  statuses: write
+  pull-requests: write
+  issues: read
+
+jobs:
+  process-new-and-updated-prs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Process new and updated PRs
+        # We have to run any actions that require write permissions here
+        # since the workflow triggered by events from a PR in a fork
+        # (not apache/hudi but other_owner/hudi) does not run on a
+        # GITHUB_TOKEN with write permissions (this is prohibited by
+        # Apache).
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            // Cron schedule may not be reliable so giving buffer time to avoid missing recent PRs
+            const since = new Date(new Date().getTime() - (900 * 1000)).toISOString();
+            const query = `repo:${context.repo.owner}/${context.repo.repo} type:pr state:open updated:>=${since}`;
+            const openPrs = await github.paginate(github.rest.search.issuesAndPullRequests, {
+              q: query,
+              sort: 'updated',
+              order: 'desc',
+              per_page: 100
+            });
+            
+            const { labelDocsPr, labelPrWithSize } = require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/labeler.js`);
+            const checkAzureCiAndCreateCommitStatus = require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/azure_ci.js`);
+            
+            console.log(`Number of PRs to process: ${openPrs.length}`);
+            
+            for (const pr of openPrs) {
+              console.log(`*** Processing PR: ${pr.title}, URL: ${pr.html_url}`);
+            
+              const { data: pullRequest } = await github.rest.pulls.get({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: pr.number
+              });
+            
+              const targetBase = pullRequest.base.ref;
+              console.log(`Target base branch: ${targetBase}`);
+            
+              // Label docs PR (targeting "asf-site" branch)
+              if (targetBase === 'asf-site') {
+                await labelDocsPr({
+                  github,
+                  context,
+                  prNumber: pr.number
+                });
+              }
+            
+              // Label PR size
+              await labelPrWithSize({
+                github,
+                context,
+                prNumber: pr.number,
+                prData: pullRequest
+              });
+            
+              // Check Azure CI and create commit status (targeting "master", "release*", or "branch-0.x" branch)
+              const targetBaseRegex = /^(master|release.*|branch-0\.x)$/;
+              if (targetBaseRegex.test(targetBase)
+                && !pr.body.includes('HOTFIX: SKIP AZURE CI')) {
+                const latestCommitHash = pullRequest.head.sha;
+            
+                // Create commit status based on Azure CI report to PR
+                await checkAzureCiAndCreateCommitStatus({
+                  github,
+                  context,
+                  prNumber: pr.number,
+                  latestCommitHash: latestCommitHash
+                });
+              }
+            }
diff --git a/.gitignore b/.gitignore
index 6c77bdab59de..3f72a1fced51 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,6 +65,7 @@ local.properties
 .out
 .idea/*
 !.idea/vcs.xml
+!.idea/icon.png
 *.ipr
 *.iws
 *.iml
diff --git a/.idea/icon.png b/.idea/icon.png
new file mode 100644
index 000000000000..94e623516d86
Binary files /dev/null and b/.idea/icon.png differ
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000000..71b2f1077a09
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use a home made image as the base, which includes:
+# utuntu:latest
+# git
+# thrift
+# maven
+# java8
+FROM apachehudi/hudi-ci-bundle-validation-base:azure_ci_test_base_new
+
+CMD ["java", "-version"]
+
+# Set the working directory to /app
+WORKDIR /hudi
+
+# Copy git repo into the working directory
+COPY . /hudi
diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml
index 0767d179b243..e61057a4649d 100644
--- a/azure-pipelines-20230430.yml
+++ b/azure-pipelines-20230430.yml
@@ -30,6 +30,10 @@ parameters:
     type: object
     default:
       - 'hudi-common'
+      - 'hudi-client/hudi-spark-client'
+  - name: job2UTModules
+    type: object
+    default:
       - 'hudi-flink-datasource'
       - 'hudi-flink-datasource/hudi-flink'
       - 'hudi-flink-datasource/hudi-flink1.14.x'
@@ -37,12 +41,20 @@ parameters:
       - 'hudi-flink-datasource/hudi-flink1.16.x'
       - 'hudi-flink-datasource/hudi-flink1.17.x'
       - 'hudi-flink-datasource/hudi-flink1.18.x'
-  - name: job2Modules
+  - name: job2FTModules
     type: object
     default:
+      - 'hudi-common'
+      - 'hudi-flink-datasource'
+      - 'hudi-flink-datasource/hudi-flink'
+      - 'hudi-flink-datasource/hudi-flink1.14.x'
+      - 'hudi-flink-datasource/hudi-flink1.15.x'
+      - 'hudi-flink-datasource/hudi-flink1.16.x'
+      - 'hudi-flink-datasource/hudi-flink1.17.x'
+      - 'hudi-flink-datasource/hudi-flink1.18.x'
       - 'hudi-client/hudi-spark-client'
       - 'hudi-spark-datasource/hudi-spark'
-  - name: job3UTModules
+  - name: job34UTModules
     type: object
     default:
       - 'hudi-spark-datasource'
@@ -51,12 +63,13 @@ parameters:
       - 'hudi-spark-datasource/hudi-spark3.2plus-common'
       - 'hudi-spark-datasource/hudi-spark3-common'
       - 'hudi-spark-datasource/hudi-spark-common'
-  - name: job4UTModules
+  - name: job6UTModules
     type: object
     default:
       - '!hudi-hadoop-mr'
       - '!hudi-client/hudi-java-client'
       - '!hudi-client/hudi-spark-client'
+      - '!hudi-cli'
       - '!hudi-common'
       - '!hudi-examples'
       - '!hudi-examples/hudi-examples-common'
@@ -76,10 +89,11 @@ parameters:
       - '!hudi-spark-datasource/hudi-spark3.2plus-common'
       - '!hudi-spark-datasource/hudi-spark3-common'
       - '!hudi-spark-datasource/hudi-spark-common'
-  - name: job4FTModules
+  - name: job6FTModules
     type: object
     default:
       - '!hudi-client/hudi-spark-client'
+      - '!hudi-cli'
       - '!hudi-common'
       - '!hudi-examples'
       - '!hudi-examples/hudi-examples-common'
@@ -94,50 +108,59 @@ parameters:
       - '!hudi-flink-datasource/hudi-flink1.17.x'
       - '!hudi-flink-datasource/hudi-flink1.18.x'
       - '!hudi-spark-datasource/hudi-spark'
+  - name: job4HudiSparkDmlOthersWildcardSuites
+    type: object
+    default:
+      - 'org.apache.hudi'
+      - 'org.apache.spark.hudi'
+      - 'org.apache.spark.sql.avro'
+      - 'org.apache.spark.sql.execution'
+      - 'org.apache.spark.sql.hudi.analysis'
+      - 'org.apache.spark.sql.hudi.command'
+      - 'org.apache.spark.sql.hudi.common'
+      - 'org.apache.spark.sql.hudi.dml'
 
 variables:
   BUILD_PROFILES: '-Dscala-2.12 -Dspark3.2 -Dflink1.18'
   PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp -B -V -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
-  MVN_OPTS_INSTALL: '-Phudi-platform-service -DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS) -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5'
+  MVN_OPTS_INSTALL: '-T 3 -Phudi-platform-service -DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS) -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5'
   MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)'
+  JAVA_MVN_TEST_FILTER: '-DwildcardSuites=skipScalaTests -DfailIfNoTests=false'
+  SCALA_MVN_TEST_FILTER: '-Dtest=skipJavaTests -DfailIfNoTests=false'
   JOB1_MODULES: ${{ join(',',parameters.job1Modules) }}
-  JOB2_MODULES: ${{ join(',',parameters.job2Modules) }}
-  JOB3_MODULES: ${{ join(',',parameters.job3UTModules) }}
-  JOB4_UT_MODULES: ${{ join(',',parameters.job4UTModules) }}
-  JOB4_FT_MODULES: ${{ join(',',parameters.job4FTModules) }}
+  JOB2_UT_MODULES: ${{ join(',',parameters.job2UTModules) }}
+  JOB2_FT_MODULES: ${{ join(',',parameters.job2FTModules) }}
+  JOB34_MODULES: ${{ join(',',parameters.job34UTModules) }}
+  JOB3_SPARK_DDL_WILDCARD_SUITES: 'org.apache.spark.sql.hudi.ddl'
+  JOB6_SPARK_PROCEDURE_WILDCARD_SUITES: 'org.apache.spark.sql.hudi.procedure'
+  JOB4_SPARK_DML_OTHERS_WILDCARD_SUITES: ${{ join(',',parameters.job4HudiSparkDmlOthersWildcardSuites) }}
+  JOB6_UT_MODULES: ${{ join(',',parameters.job6UTModules) }}
+  JOB6_FT_MODULES: ${{ join(',',parameters.job6FTModules) }}
 
 stages:
   - stage: test
+    variables:
+      - name: DOCKER_BUILDKIT
+        value: 1
     jobs:
       - job: UT_FT_1
-        displayName: UT FT common & flink & UT client/spark-client
-        timeoutInMinutes: '150'
+        displayName: UT common & client/spark-client
+        timeoutInMinutes: '90'
         steps:
           - task: Maven@4
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'clean install'
-              options: $(MVN_OPTS_INSTALL)
-              publishJUnitResults: true
-              testResultsFiles: '**/surefire-reports/TEST-*.xml'
+              options: $(MVN_OPTS_INSTALL) -pl $(JOB1_MODULES) -am
+              publishJUnitResults: false
               jdkVersionOption: '1.8'
           - task: Maven@4
-            displayName: UT common flink client/spark-client
+            displayName: UT common & client/spark-client
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_MODULES),hudi-client/hudi-spark-client
-              publishJUnitResults: true
-              testResultsFiles: '**/surefire-reports/TEST-*.xml'
-              jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx4g'
-          - task: Maven@4
-            displayName: FT common flink
-            inputs:
-              mavenPomFile: 'pom.xml'
-              goals: 'test'
-              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB1_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
@@ -146,24 +169,32 @@ stages:
               grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
             displayName: Top 100 long-running testcases
       - job: UT_FT_2
-        displayName: FT client/spark-client & hudi-spark-datasource/hudi-spark
-        timeoutInMinutes: '150'
+        displayName: UT flink & FT common & flink & spark-client & hudi-spark
+        timeoutInMinutes: '90'
         steps:
           - task: Maven@4
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'clean install'
-              options: $(MVN_OPTS_INSTALL)
-              publishJUnitResults: true
-              testResultsFiles: '**/surefire-reports/TEST-*.xml'
+              options: $(MVN_OPTS_INSTALL) -pl $(JOB2_FT_MODULES) -am
+              publishJUnitResults: false
               jdkVersionOption: '1.8'
           - task: Maven@4
-            displayName: FT client/spark-client & hudi-spark-datasource/hudi-spark
+            displayName: UT flink
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB2_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB2_UT_MODULES)
+              publishJUnitResults: false
+              jdkVersionOption: '1.8'
+              mavenOptions: '-Xmx4g'
+          - task: Maven@4
+            displayName: FT common & flink & client/spark-client & hudi-spark-datasource/hudi-spark
+            inputs:
+              mavenPomFile: 'pom.xml'
+              goals: 'test'
+              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB2_FT_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
@@ -172,24 +203,32 @@ stages:
               grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
             displayName: Top 100 long-running testcases
       - job: UT_FT_3
-        displayName: UT spark-datasource
-        timeoutInMinutes: '240'
+        displayName: UT spark-datasource Java Tests & DDL
+        timeoutInMinutes: '90'
         steps:
           - task: Maven@4
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'clean install'
-              options: $(MVN_OPTS_INSTALL)
-              publishJUnitResults: true
-              testResultsFiles: '**/surefire-reports/TEST-*.xml'
+              options: $(MVN_OPTS_INSTALL) -pl $(JOB34_MODULES) -am
+              publishJUnitResults: false
               jdkVersionOption: '1.8'
           - task: Maven@4
-            displayName: UT spark-datasource
+            displayName: Java UT spark-datasource
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB3_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER) -pl $(JOB34_MODULES)
+              publishJUnitResults: false
+              jdkVersionOption: '1.8'
+              mavenOptions: '-Xmx4g'
+          - task: Maven@4
+            displayName: Scala UT spark-datasource DDL
+            inputs:
+              mavenPomFile: 'pom.xml'
+              goals: 'test'
+              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) -DwildcardSuites="$(JOB3_SPARK_DDL_WILDCARD_SUITES)" -pl $(JOB34_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
@@ -198,38 +237,116 @@ stages:
               grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
             displayName: Top 100 long-running testcases
       - job: UT_FT_4
-        displayName: UT FT other modules
-        timeoutInMinutes: '240'
+        displayName: UT spark-datasource DML & others
+        timeoutInMinutes: '90'
         steps:
           - task: Maven@4
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'clean install'
-              options: $(MVN_OPTS_INSTALL)
-              publishJUnitResults: true
-              testResultsFiles: '**/surefire-reports/TEST-*.xml'
+              options: $(MVN_OPTS_INSTALL) -pl $(JOB34_MODULES) -am
+              publishJUnitResults: false
               jdkVersionOption: '1.8'
           - task: Maven@4
-            displayName: UT other modules
+            displayName: Scala UT spark-datasource DML & others
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB4_UT_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) -DwildcardSuites="$(JOB4_SPARK_DML_OTHERS_WILDCARD_SUITES)" -pl $(JOB34_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
-          - task: Maven@4
-            displayName: FT other modules
+          - script: |
+              grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
+            displayName: Top 100 long-running testcases
+      - job: UT_FT_5
+        displayName: UT FT Hudi Streamer
+        timeoutInMinutes: '90'
+        steps:
+          - task: Docker@2
+            displayName: "login to docker hub"
             inputs:
-              mavenPomFile: 'pom.xml'
-              goals: 'test'
-              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB4_FT_MODULES)
-              publishJUnitResults: true
+              command: "login"
+              containerRegistry: "apachehudi-docker-hub"
+          - task: Docker@2
+            displayName: "load repo into image"
+            inputs:
+              containerRegistry: 'apachehudi-docker-hub'
+              repository: 'apachehudi/hudi-ci-bundle-validation-base'
+              command: 'build'
+              Dockerfile: '**/Dockerfile'
+              ImageName: $(Build.BuildId)
+          - task: Docker@2
+            displayName: "UT FT other modules"
+            inputs:
+              containerRegistry: 'apachehudi-docker-hub'
+              repository: 'apachehudi/hudi-ci-bundle-validation-base'
+              command: 'run'
+              arguments: >
+                -v $(Build.SourcesDirectory):/hudi
+                -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
+                /bin/bash -c "pwd
+                && rm -rf /hudi/scripts/ci/results
+                && mvn clean install $(MVN_OPTS_INSTALL) -Phudi-platform-service -Pthrift-gen-source -pl hudi-utilities -am
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests -Dtest="Test*DeltaStreamer*" -DfailIfNoTests=false -pl hudi-utilities
+                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests -Dtest="Test*DeltaStreamer*" -DfailIfNoTests=false -pl hudi-utilities
+                && ./scripts/ci/move_surefire_reports.sh /hudi /hudi/scripts/ci/results
+                && echo 'All surefire report files:'
+                && find . -type f -name \"TEST-*.xml\""
+          - task: PublishTestResults@2
+            displayName: 'Publish Test Results'
+            inputs:
+              testResultsFormat: 'JUnit'
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
-              jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx4g'
+              searchFolder: '$(Build.SourcesDirectory)/scripts/ci/results'
+              failTaskOnFailedTests: true
           - script: |
-              grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
+              grep "testcase" scripts/ci/results/*/target/surefire-reports/*.xml scripts/ci/results/*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
+            displayName: Top 100 long-running testcases
+      - job: UT_FT_6
+        displayName: UT FT other modules
+        timeoutInMinutes: '90'
+        steps:
+          - task: Docker@2
+            displayName: "login to docker hub"
+            inputs:
+              command: "login"
+              containerRegistry: "apachehudi-docker-hub"
+          - task: Docker@2
+            displayName: "load repo into image"
+            inputs:
+              containerRegistry: 'apachehudi-docker-hub'
+              repository: 'apachehudi/hudi-ci-bundle-validation-base'
+              command: 'build'
+              Dockerfile: '**/Dockerfile'
+              ImageName: $(Build.BuildId)
+          - task: Docker@2
+            displayName: "UT FT other modules"
+            inputs:
+              containerRegistry: 'apachehudi-docker-hub'
+              repository: 'apachehudi/hudi-ci-bundle-validation-base'
+              command: 'run'
+              arguments: >
+                -v $(Build.SourcesDirectory):/hudi
+                -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
+                /bin/bash -c "pwd
+                && rm -rf /hudi/scripts/ci/results
+                && mvn clean install $(MVN_OPTS_INSTALL) -Phudi-platform-service -Pthrift-gen-source
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) -DwildcardSuites="$(JOB6_SPARK_PROCEDURE_WILDCARD_SUITES)" -pl $(JOB34_MODULES)
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests -Dtest="!Test*DeltaStreamer*" -DfailIfNoTests=false -pl $(JOB6_UT_MODULES)
+                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests -Dtest="!Test*DeltaStreamer*" -DfailIfNoTests=false -pl $(JOB6_FT_MODULES)
+                && ./scripts/ci/move_surefire_reports.sh /hudi /hudi/scripts/ci/results
+                && echo 'All surefire report files:'
+                && find . -type f -name \"TEST-*.xml\""
+          - task: PublishTestResults@2
+            displayName: 'Publish Test Results'
+            inputs:
+              testResultsFormat: 'JUnit'
+              testResultsFiles: '**/surefire-reports/TEST-*.xml'
+              searchFolder: '$(Build.SourcesDirectory)/scripts/ci/results'
+              failTaskOnFailedTests: true
+          - script: |
+              grep "testcase" scripts/ci/results/*/target/surefire-reports/*.xml scripts/ci/results/*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
             displayName: Top 100 long-running testcases
diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml
index 9da22ff32c2a..5aadd6b1cca9 100644
--- a/hudi-aws/pom.xml
+++ b/hudi-aws/pom.xml
@@ -299,9 +299,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 </project>
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
index 1e19b44a4992..2050e4f60ffe 100644
--- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
@@ -22,6 +22,8 @@
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.CustomizedThreadFactory;
+import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.GlueCatalogSyncClientConfig;
 import org.apache.hudi.hive.HiveSyncConfig;
@@ -37,6 +39,8 @@
 import software.amazon.awssdk.services.glue.model.BatchCreatePartitionResponse;
 import software.amazon.awssdk.services.glue.model.BatchDeletePartitionRequest;
 import software.amazon.awssdk.services.glue.model.BatchDeletePartitionResponse;
+import software.amazon.awssdk.services.glue.model.BatchGetPartitionRequest;
+import software.amazon.awssdk.services.glue.model.BatchGetPartitionResponse;
 import software.amazon.awssdk.services.glue.model.BatchUpdatePartitionRequest;
 import software.amazon.awssdk.services.glue.model.BatchUpdatePartitionRequestEntry;
 import software.amazon.awssdk.services.glue.model.BatchUpdatePartitionResponse;
@@ -59,6 +63,7 @@
 import software.amazon.awssdk.services.glue.model.PartitionIndexDescriptor;
 import software.amazon.awssdk.services.glue.model.PartitionInput;
 import software.amazon.awssdk.services.glue.model.PartitionValueList;
+import software.amazon.awssdk.services.glue.model.Segment;
 import software.amazon.awssdk.services.glue.model.SerDeInfo;
 import software.amazon.awssdk.services.glue.model.StorageDescriptor;
 import software.amazon.awssdk.services.glue.model.Table;
@@ -81,14 +86,21 @@
 import java.util.Objects;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
 import static org.apache.hudi.common.util.MapUtils.containsAll;
 import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty;
-import static org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING;
+import static org.apache.hudi.config.GlueCatalogSyncClientConfig.CHANGED_PARTITIONS_READ_PARALLELISM;
 import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS;
 import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS_ENABLE;
+import static org.apache.hudi.config.GlueCatalogSyncClientConfig.PARTITION_CHANGE_PARALLELISM;
+import static org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING;
+import static org.apache.hudi.config.GlueCatalogSyncClientConfig.ALL_PARTITIONS_READ_PARALLELISM;
 import static org.apache.hudi.config.HoodieAWSConfig.AWS_GLUE_ENDPOINT;
 import static org.apache.hudi.config.HoodieAWSConfig.AWS_GLUE_REGION;
 import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE;
@@ -109,12 +121,12 @@
 public class AWSGlueCatalogSyncClient extends HoodieSyncClient {
 
   private static final Logger LOG = LoggerFactory.getLogger(AWSGlueCatalogSyncClient.class);
-  private static final int MAX_PARTITIONS_PER_REQUEST = 100;
+  private static final int MAX_PARTITIONS_PER_CHANGE_REQUEST = 100;
+  private static final int MAX_PARTITIONS_PER_READ_REQUEST = 1000;
   private static final int MAX_DELETE_PARTITIONS_PER_REQUEST = 25;
   protected final GlueAsyncClient awsGlue;
   private static final String GLUE_PARTITION_INDEX_ENABLE = "partition_filtering.enabled";
   private static final int PARTITION_INDEX_MAX_NUMBER = 3;
-  private static final int GLUE_EXPRESSION_MAX_CHARS = 2048;
   /**
    * athena v2/v3 table property
    * see https://docs.aws.amazon.com/athena/latest/ug/querying-hudi.html
@@ -124,6 +136,9 @@ public class AWSGlueCatalogSyncClient extends HoodieSyncClient {
 
   private final Boolean skipTableArchive;
   private final String enableMetadataTable;
+  private final int allPartitionsReadParallelism;
+  private final int changedPartitionsReadParallelism;
+  private final int changeParallelism;
 
   public AWSGlueCatalogSyncClient(HiveSyncConfig config) {
     super(config);
@@ -141,105 +156,196 @@ public AWSGlueCatalogSyncClient(HiveSyncConfig config) {
     this.databaseName = config.getStringOrDefault(META_SYNC_DATABASE_NAME);
     this.skipTableArchive = config.getBooleanOrDefault(GlueCatalogSyncClientConfig.GLUE_SKIP_TABLE_ARCHIVE);
     this.enableMetadataTable = Boolean.toString(config.getBoolean(GLUE_METADATA_FILE_LISTING)).toUpperCase();
+    this.allPartitionsReadParallelism = config.getIntOrDefault(ALL_PARTITIONS_READ_PARALLELISM);
+    this.changedPartitionsReadParallelism = config.getIntOrDefault(CHANGED_PARTITIONS_READ_PARALLELISM);
+    this.changeParallelism = config.getIntOrDefault(PARTITION_CHANGE_PARALLELISM);
+  }
+
+  private List<Partition> getPartitionsSegment(Segment segment, String tableName) {
+    try {
+      List<Partition> partitions = new ArrayList<>();
+      String nextToken = null;
+      do {
+        GetPartitionsResponse result = awsGlue.getPartitions(GetPartitionsRequest.builder()
+            .databaseName(databaseName)
+            .tableName(tableName)
+            .segment(segment)
+            .nextToken(nextToken)
+            .build()).get();
+        partitions.addAll(result.partitions().stream()
+            .map(p -> new Partition(p.values(), p.storageDescriptor().location()))
+            .collect(Collectors.toList()));
+        nextToken = result.nextToken();
+      } while (nextToken != null);
+      return partitions;
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(databaseName, tableName), e);
+    }
   }
 
   @Override
   public List<Partition> getAllPartitions(String tableName) {
+    ExecutorService executorService = Executors.newFixedThreadPool(this.allPartitionsReadParallelism, new CustomizedThreadFactory("glue-sync-all-partitions", true));
     try {
-      return getPartitions(GetPartitionsRequest.builder()
-              .databaseName(databaseName)
-              .tableName(tableName));
+      List<Segment> segments = new ArrayList<>();
+      for (int i = 0; i < allPartitionsReadParallelism; i++) {
+        segments.add(Segment.builder()
+            .segmentNumber(i)
+            .totalSegments(allPartitionsReadParallelism).build());
+      }
+      List<Future<List<Partition>>> futures = segments.stream()
+          .map(segment -> executorService.submit(() -> this.getPartitionsSegment(segment, tableName)))
+          .collect(Collectors.toList());
+
+      List<Partition> partitions = new ArrayList<>();
+      for (Future<List<Partition>> future : futures) {
+        partitions.addAll(future.get());
+      }
+
+      return partitions;
     } catch (Exception e) {
       throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(databaseName, tableName), e);
+    } finally {
+      executorService.shutdownNow();
     }
   }
 
   @Override
-  public List<Partition> getPartitionsByFilter(String tableName, String filter) {
+  public List<Partition> getPartitionsFromList(String tableName, List<String> partitionList) {
+    if (partitionList.isEmpty()) {
+      LOG.info("No partitions to read for " + tableId(this.databaseName, tableName));
+      return Collections.emptyList();
+    }
+    HoodieTimer timer = HoodieTimer.start();
+    List<List<String>> batches = CollectionUtils.batches(partitionList, MAX_PARTITIONS_PER_READ_REQUEST);
+    ExecutorService executorService = Executors.newFixedThreadPool(
+        Math.min(this.changedPartitionsReadParallelism, batches.size()),
+        new CustomizedThreadFactory("glue-sync-get-partitions-" + tableName, true)
+    );
     try {
-      if (filter.length() <= GLUE_EXPRESSION_MAX_CHARS) {
-        LOG.info("Pushdown filters: {}", filter);
-        return getPartitions(GetPartitionsRequest.builder()
-              .databaseName(databaseName)
-              .tableName(tableName)
-              .expression(filter));
-      } else {
-        LOG.warn("Falling back to listing all partition since expression filter length > {}", GLUE_EXPRESSION_MAX_CHARS);
-        return getAllPartitions(tableName);
+      List<Future<List<Partition>>> futures = batches
+          .stream()
+          .map(batch -> executorService.submit(() -> this.getChangedPartitions(batch, tableName)))
+          .collect(Collectors.toList());
+
+      List<Partition> partitions = new ArrayList<>();
+      for (Future<List<Partition>> future : futures) {
+        partitions.addAll(future.get());
       }
+      LOG.info(
+          "Requested {} partitions, found existing {} partitions, new {} partitions",
+          partitionList.size(),
+          partitions.size(),
+          partitionList.size() - partitions.size());
+
+      return partitions;
     } catch (Exception e) {
-      throw new HoodieGlueSyncException("Failed to get partitions for table " + tableId(databaseName, tableName) + " from expression: " + filter, e);
+      throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(this.databaseName, tableName), e);
+    } finally {
+      executorService.shutdownNow();
+      LOG.info("Took {} ms to get {} partitions for table {}", timer.endTimer(), partitionList.size(), tableId(this.databaseName, tableName));
     }
   }
 
-  private List<Partition> getPartitions(GetPartitionsRequest.Builder partitionRequestBuilder) throws InterruptedException, ExecutionException {
-    List<Partition> partitions = new ArrayList<>();
-    String nextToken = null;
-    do {
-      GetPartitionsResponse result = awsGlue.getPartitions(partitionRequestBuilder
-              .excludeColumnSchema(true)
-              .nextToken(nextToken)
-              .build()).get();
-      partitions.addAll(result.partitions().stream()
-              .map(p -> new Partition(p.values(), p.storageDescriptor().location()))
-              .collect(Collectors.toList()));
-      nextToken = result.nextToken();
-    } while (nextToken != null);
-    return partitions;
+  private List<Partition> getChangedPartitions(List<String> changedPartitions, String tableName) throws ExecutionException, InterruptedException {
+    List<PartitionValueList> partitionValueList = changedPartitions.stream().map(str ->
+        PartitionValueList.builder().values(partitionValueExtractor.extractPartitionValuesInPath(str)).build()
+    ).collect(Collectors.toList());
+    BatchGetPartitionRequest request = BatchGetPartitionRequest.builder()
+        .databaseName(this.databaseName)
+        .tableName(tableName)
+        .partitionsToGet(partitionValueList)
+        .build();
+    BatchGetPartitionResponse callResult = awsGlue.batchGetPartition(request).get();
+    List<Partition> result = callResult
+        .partitions()
+        .stream()
+        .map(p -> new Partition(p.values(), p.storageDescriptor().location()))
+        .collect(Collectors.toList());
+
+    return result;
   }
 
   @Override
   public void addPartitionsToTable(String tableName, List<String> partitionsToAdd) {
-    if (partitionsToAdd.isEmpty()) {
-      LOG.info("No partitions to add for " + tableId(databaseName, tableName));
-      return;
-    }
-    LOG.info("Adding " + partitionsToAdd.size() + " partition(s) in table " + tableId(databaseName, tableName));
+    HoodieTimer timer = HoodieTimer.start();
     try {
+      if (partitionsToAdd.isEmpty()) {
+        LOG.info("No partitions to add for " + tableId(this.databaseName, tableName));
+        return;
+      }
       Table table = getTable(awsGlue, databaseName, tableName);
+      parallelizeChange(partitionsToAdd, this.changeParallelism, partitions -> this.addPartitionsToTableInternal(table, partitions), MAX_PARTITIONS_PER_CHANGE_REQUEST);
+    } finally {
+      LOG.info("Added {} partitions to table {} in {} ms", partitionsToAdd.size(), tableId(this.databaseName, tableName), timer.endTimer());
+    }
+  }
+
+  private <T> void parallelizeChange(List<T> items, int parallelism, Consumer<List<T>> consumer, int sliceSize) {
+    List<List<T>> batches = CollectionUtils.batches(items, sliceSize);
+    ExecutorService executorService = Executors.newFixedThreadPool(Math.min(parallelism, batches.size()), new CustomizedThreadFactory("glue-sync", true));
+    try {
+      List<Future<?>> futures = batches.stream()
+          .map(item -> executorService.submit(() -> {
+            consumer.accept(item);
+          }))
+          .collect(Collectors.toList());
+      for (Future<?> future : futures) {
+        future.get();
+      }
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Failed to parallelize operation", e);
+    } finally {
+      executorService.shutdownNow();
+    }
+  }
+
+  private void addPartitionsToTableInternal(Table table, List<String> partitionsToAdd) {
+    try {
       StorageDescriptor sd = table.storageDescriptor();
-      List<PartitionInput> partitionInputs = partitionsToAdd.stream().map(partition -> {
+      List<PartitionInput> partitionInputList = partitionsToAdd.stream().map(partition -> {
         String fullPartitionPath = FSUtils.getPartitionPath(s3aToS3(getBasePath()), partition).toString();
         List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
         StorageDescriptor partitionSD = sd.copy(copySd -> copySd.location(fullPartitionPath));
         return PartitionInput.builder().values(partitionValues).storageDescriptor(partitionSD).build();
       }).collect(Collectors.toList());
 
-      List<CompletableFuture<BatchCreatePartitionResponse>> futures = new ArrayList<>();
-
-      for (List<PartitionInput> batch : CollectionUtils.batches(partitionInputs, MAX_PARTITIONS_PER_REQUEST)) {
-        BatchCreatePartitionRequest request = BatchCreatePartitionRequest.builder()
-                .databaseName(databaseName).tableName(tableName).partitionInputList(batch).build();
-        futures.add(awsGlue.batchCreatePartition(request));
-      }
-
-      for (CompletableFuture<BatchCreatePartitionResponse> future : futures) {
-        BatchCreatePartitionResponse response = future.get();
-        if (CollectionUtils.nonEmpty(response.errors())) {
-          if (response.errors().stream()
-              .allMatch(
-                  (error) -> "AlreadyExistsException".equals(error.errorDetail().errorCode()))) {
-            LOG.warn("Partitions already exist in glue: " + response.errors());
-          } else {
-            throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, tableName)
+      BatchCreatePartitionRequest request = BatchCreatePartitionRequest.builder()
+          .databaseName(databaseName).tableName(table.name()).partitionInputList(partitionInputList).build();
+      CompletableFuture<BatchCreatePartitionResponse> future = awsGlue.batchCreatePartition(request);
+      BatchCreatePartitionResponse response = future.get();
+      if (CollectionUtils.nonEmpty(response.errors())) {
+        if (response.errors().stream()
+            .allMatch(
+                (error) -> "AlreadyExistsException".equals(error.errorDetail().errorCode()))) {
+          LOG.warn("Partitions already exist in glue: " + response.errors());
+        } else {
+          throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, table.name())
               + " with error(s): " + response.errors());
-          }
         }
       }
     } catch (Exception e) {
-      throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, tableName), e);
+      throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, table.name()), e);
     }
   }
 
   @Override
   public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
-    if (changedPartitions.isEmpty()) {
-      LOG.info("No partitions to change for " + tableName);
-      return;
-    }
-    LOG.info("Updating " + changedPartitions.size() + "partition(s) in table " + tableId(databaseName, tableName));
+    HoodieTimer timer = HoodieTimer.start();
     try {
+      if (changedPartitions.isEmpty()) {
+        LOG.info("No partitions to update for " + tableId(this.databaseName, tableName));
+        return;
+      }
       Table table = getTable(awsGlue, databaseName, tableName);
+      parallelizeChange(changedPartitions, this.changeParallelism, partitions -> this.updatePartitionsToTableInternal(table, partitions), MAX_PARTITIONS_PER_CHANGE_REQUEST);
+    } finally {
+      LOG.info("Updated {} partitions to table {} in {} ms", changedPartitions.size(), tableId(this.databaseName, tableName), timer.endTimer());
+    }
+  }
+
+  private void updatePartitionsToTableInternal(Table table, List<String> changedPartitions) {
+    try {
       StorageDescriptor sd = table.storageDescriptor();
       List<BatchUpdatePartitionRequestEntry> updatePartitionEntries = changedPartitions.stream().map(partition -> {
         String fullPartitionPath = FSUtils.getPartitionPath(s3aToS3(getBasePath()), partition).toString();
@@ -249,57 +355,52 @@ public void updatePartitionsToTable(String tableName, List<String> changedPartit
         return BatchUpdatePartitionRequestEntry.builder().partitionInput(partitionInput).partitionValueList(partitionValues).build();
       }).collect(Collectors.toList());
 
-      List<CompletableFuture<BatchUpdatePartitionResponse>> futures = new ArrayList<>();
-      for (List<BatchUpdatePartitionRequestEntry> batch : CollectionUtils.batches(updatePartitionEntries, MAX_PARTITIONS_PER_REQUEST)) {
-        BatchUpdatePartitionRequest request = BatchUpdatePartitionRequest.builder()
-                .databaseName(databaseName).tableName(tableName).entries(batch).build();
-        futures.add(awsGlue.batchUpdatePartition(request));
-      }
+      BatchUpdatePartitionRequest request = BatchUpdatePartitionRequest.builder()
+              .databaseName(databaseName).tableName(table.name()).entries(updatePartitionEntries).build();
+      CompletableFuture<BatchUpdatePartitionResponse> future = awsGlue.batchUpdatePartition(request);
 
-      for (CompletableFuture<BatchUpdatePartitionResponse> future : futures) {
-        BatchUpdatePartitionResponse response = future.get();
-        if (CollectionUtils.nonEmpty(response.errors())) {
-          throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, tableName)
-              + " with error(s): " + response.errors());
-        }
+      BatchUpdatePartitionResponse response = future.get();
+      if (CollectionUtils.nonEmpty(response.errors())) {
+        throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, table.name())
+            + " with error(s): " + response.errors());
       }
     } catch (Exception e) {
-      throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, tableName), e);
+      throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, table.name()), e);
     }
   }
 
   @Override
   public void dropPartitions(String tableName, List<String> partitionsToDrop) {
-    if (CollectionUtils.isNullOrEmpty(partitionsToDrop)) {
-      LOG.info("No partitions to drop for " + tableName);
-      return;
-    }
-    LOG.info("Drop " + partitionsToDrop.size() + "partition(s) in table " + tableId(databaseName, tableName));
+    HoodieTimer timer = HoodieTimer.start();
     try {
-      List<CompletableFuture<BatchDeletePartitionResponse>> futures = new ArrayList<>();
-      for (List<String> batch : CollectionUtils.batches(partitionsToDrop, MAX_DELETE_PARTITIONS_PER_REQUEST)) {
+      if (partitionsToDrop.isEmpty()) {
+        LOG.info("No partitions to drop for " + tableId(this.databaseName, tableName));
+        return;
+      }
+      parallelizeChange(partitionsToDrop, this.changeParallelism, partitions -> this.dropPartitionsInternal(tableName, partitions), MAX_DELETE_PARTITIONS_PER_REQUEST);
+    } finally {
+      LOG.info("Deleted {} partitions to table {} in {} ms", partitionsToDrop.size(), tableId(this.databaseName, tableName), timer.endTimer());
+    }
+  }
 
-        List<PartitionValueList> partitionValueLists = batch.stream().map(partition -> {
-          PartitionValueList partitionValueList = PartitionValueList.builder()
-                  .values(partitionValueExtractor.extractPartitionValuesInPath(partition))
-                  .build();
-          return partitionValueList;
-        }).collect(Collectors.toList());
+  private void dropPartitionsInternal(String tableName, List<String> partitionsToDrop) {
+    try {
+      List<PartitionValueList> partitionValueLists = partitionsToDrop.stream().map(partition -> PartitionValueList.builder()
+            .values(partitionValueExtractor.extractPartitionValuesInPath(partition))
+            .build()
+      ).collect(Collectors.toList());
 
-        BatchDeletePartitionRequest batchDeletePartitionRequest = BatchDeletePartitionRequest.builder()
-            .databaseName(databaseName)
-            .tableName(tableName)
-            .partitionsToDelete(partitionValueLists)
-            .build();
-        futures.add(awsGlue.batchDeletePartition(batchDeletePartitionRequest));
-      }
+      BatchDeletePartitionRequest batchDeletePartitionRequest = BatchDeletePartitionRequest.builder()
+          .databaseName(databaseName)
+          .tableName(tableName)
+          .partitionsToDelete(partitionValueLists)
+          .build();
+      CompletableFuture<BatchDeletePartitionResponse> future = awsGlue.batchDeletePartition(batchDeletePartitionRequest);
 
-      for (CompletableFuture<BatchDeletePartitionResponse> future : futures) {
-        BatchDeletePartitionResponse response = future.get();
-        if (CollectionUtils.nonEmpty(response.errors())) {
-          throw new HoodieGlueSyncException("Fail to drop partitions to " + tableId(databaseName, tableName)
-              + " with error(s): " + response.errors());
-        }
+      BatchDeletePartitionResponse response = future.get();
+      if (CollectionUtils.nonEmpty(response.errors())) {
+        throw new HoodieGlueSyncException("Fail to drop partitions to " + tableId(databaseName, tableName)
+            + " with error(s): " + response.errors());
       }
     } catch (Exception e) {
       throw new HoodieGlueSyncException("Fail to drop partitions to " + tableId(databaseName, tableName), e);
diff --git a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
index 21244e651547..0f6ac76a166e 100644
--- a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
+++ b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
@@ -26,6 +26,8 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 
+import java.util.stream.IntStream;
+
 /**
  * Hoodie Configs for Glue.
  */
@@ -43,6 +45,28 @@ public class GlueCatalogSyncClientConfig extends HoodieConfig {
       .sinceVersion("0.14.0")
       .withDocumentation("Glue catalog sync based client will skip archiving the table version if this config is set to true");
 
+  public static final ConfigProperty<Integer> ALL_PARTITIONS_READ_PARALLELISM = ConfigProperty
+      .key(GLUE_CLIENT_PROPERTY_PREFIX + "all_partitions_read_parallelism")
+      .defaultValue(1)
+      .markAdvanced()
+      .withValidValues(IntStream.rangeClosed(1, 10).mapToObj(Integer::toString).toArray(String[]::new))
+      .sinceVersion("1.0.0")
+      .withDocumentation("Parallelism for listing all partitions(first time sync). Should be in interval [1, 10].");
+
+  public static final ConfigProperty<Integer> CHANGED_PARTITIONS_READ_PARALLELISM = ConfigProperty
+      .key(GLUE_CLIENT_PROPERTY_PREFIX + "changed_partitions_read_parallelism")
+      .defaultValue(1)
+      .markAdvanced()
+      .sinceVersion("1.0.0")
+      .withDocumentation("Parallelism for listing changed partitions(second and subsequent syncs).");
+
+  public static final ConfigProperty<Integer> PARTITION_CHANGE_PARALLELISM = ConfigProperty
+      .key(GLUE_CLIENT_PROPERTY_PREFIX + "partition_change_parallelism")
+      .defaultValue(1)
+      .markAdvanced()
+      .sinceVersion("1.0.0")
+      .withDocumentation("Parallelism for change operations - such as create/update/delete.");
+
   public static final ConfigProperty<Boolean> GLUE_METADATA_FILE_LISTING = ConfigProperty
       .key(GLUE_CLIENT_PROPERTY_PREFIX + "metadata_file_listing")
       .defaultValue(false)
diff --git a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java
index b0aa34bdfce1..9601482b65af 100644
--- a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java
+++ b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java
@@ -31,6 +31,7 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import software.amazon.awssdk.services.glue.model.Column;
 import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest;
@@ -47,15 +48,14 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.time.Instant;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 
-import static org.apache.hudi.hive.HiveSyncConfig.HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE;
 import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
 import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
 
+@Disabled("HUDI-7475 The tests do not work. Disabling them to unblock Azure CI")
 public class ITTestGluePartitionPushdown {
 
   private static final String MOTO_ENDPOINT = "http://localhost:5000";
@@ -125,35 +125,14 @@ private void createPartitions(String...partitions) throws ExecutionException, In
 
   @Test
   public void testEmptyPartitionShouldReturnEmpty() {
-    Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME,
-            glueSync.generatePushDownFilter(Arrays.asList("1/bar"), partitionsFieldSchema)).size());
+    Assertions.assertEquals(0, glueSync.getPartitionsFromList(TABLE_NAME,
+            Arrays.asList("1/bar")).size());
   }
 
   @Test
   public void testPresentPartitionShouldReturnIt() throws ExecutionException, InterruptedException {
     createPartitions("1", "b'ar");
-    Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME,
-            glueSync.generatePushDownFilter(Arrays.asList("1/b'ar", "2/foo", "1/b''ar"), partitionsFieldSchema)).size());
-  }
-
-  @Test
-  public void testPresentPartitionShouldReturnAllWhenExpressionFilterLengthTooLong() throws ExecutionException, InterruptedException {
-    createPartitions("1", "b'ar");
-
-    // this will generate an expression larger than GLUE_EXPRESSION_MAX_CHARS
-    List<String> tooLargePartitionPredicate = new ArrayList<>();
-    for (int i = 0; i < 500; i++) {
-      tooLargePartitionPredicate.add(i + "/foo");
-    }
-    Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME,
-            glueSync.generatePushDownFilter(tooLargePartitionPredicate, partitionsFieldSchema)).size(),
-            "Should fallback to listing all existing partitions");
-
-    // now set the pushdown max size to a low value to transform the expression in lower/upper bound
-    hiveSyncProps.setProperty(HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE.key(), "10");
-    glueSync = new AWSGlueCatalogSyncClient(new HiveSyncConfig(hiveSyncProps));
-    Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME,
-            glueSync.generatePushDownFilter(tooLargePartitionPredicate, partitionsFieldSchema)).size(),
-            "No partitions should match");
+    Assertions.assertEquals(1, glueSync.getPartitionsFromList(TABLE_NAME,
+            Arrays.asList("1/b'ar", "2/foo", "1/b''ar")).size());
   }
 }
diff --git a/hudi-aws/src/test/java/org/apache/hudi/aws/transaction/integ/ITTestDynamoDBBasedLockProvider.java b/hudi-aws/src/test/java/org/apache/hudi/aws/transaction/integ/ITTestDynamoDBBasedLockProvider.java
index 473861712595..b874f4f3c3cc 100644
--- a/hudi-aws/src/test/java/org/apache/hudi/aws/transaction/integ/ITTestDynamoDBBasedLockProvider.java
+++ b/hudi-aws/src/test/java/org/apache/hudi/aws/transaction/integ/ITTestDynamoDBBasedLockProvider.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.aws.transaction.integ;
 
+import org.junit.jupiter.api.Disabled;
 import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
 import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
 import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
@@ -44,6 +45,7 @@
  * Test for {@link DynamoDBBasedLockProvider}.
  * Set it as integration test because it requires setting up docker environment.
  */
+@Disabled("HUDI-7475 The tests do not work. Disabling them to unblock Azure CI")
 public class ITTestDynamoDBBasedLockProvider {
 
   private static LockConfiguration lockConfiguration;
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java
index 79adad368450..46a1b715ca3c 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java
@@ -39,6 +39,7 @@ public class HoodieCLI {
 
   public static Configuration conf;
   public static ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
+  public static HoodieTimeGeneratorConfig timeGeneratorConfig;
   public static FileSystem fs;
   public static CLIState state = CLIState.INIT;
   public static String basePath;
@@ -58,6 +59,10 @@ public static void setConsistencyGuardConfig(ConsistencyGuardConfig config) {
     consistencyGuardConfig = config;
   }
 
+  public static void setTimeGeneratorConfig(HoodieTimeGeneratorConfig config) {
+    timeGeneratorConfig = config;
+  }
+
   private static void setTableMetaClient(HoodieTableMetaClient tableMetadata) {
     HoodieCLI.tableMetadata = tableMetadata;
   }
@@ -88,8 +93,7 @@ public static void initFS(boolean force) throws IOException {
   public static void refreshTableMetadata() {
     setTableMetaClient(HoodieTableMetaClient.builder().setConf(HoodieCLI.conf).setBasePath(basePath).setLoadActiveTimelineOnLoad(false)
         .setConsistencyGuardConfig(HoodieCLI.consistencyGuardConfig)
-        // TODO [HUDI-6884] Generate HoodieTimeGeneratorConfig from props user set
-        .setTimeGeneratorConfig(HoodieTimeGeneratorConfig.defaultConfig(basePath))
+        .setTimeGeneratorConfig(timeGeneratorConfig == null ? HoodieTimeGeneratorConfig.defaultConfig(basePath) : timeGeneratorConfig)
         .setLayoutVersion(Option.of(layoutVersion)).build());
   }
 
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java
index d106d8375e7a..7cebf43db029 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java
@@ -151,7 +151,7 @@ public String delete(@ShellOption(value = "--backup", help = "Backup the metadat
   public String deleteRecordIndex(@ShellOption(value = "--backup", help = "Backup the record index before delete", defaultValue = "true", arity = 1) final boolean backup) throws Exception {
     HoodieTableMetaClient dataMetaClient = HoodieCLI.getTableMetaClient();
     String backupPath = HoodieTableMetadataUtil.deleteMetadataTablePartition(dataMetaClient, new HoodieSparkEngineContext(jsc),
-        MetadataPartitionType.RECORD_INDEX, backup);
+        MetadataPartitionType.RECORD_INDEX.getPartitionPath(), backup);
     if (backup) {
       return "Record Index has been deleted from the Metadata Table and backed up to " + backupPath;
     } else {
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
index 158c79f52a74..e3c3e810c5cb 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.cli.HoodiePrintHelper;
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.common.config.HoodieTimeGeneratorConfig;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableConfig;
@@ -30,6 +31,7 @@
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimeGeneratorType;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.exception.HoodieException;
@@ -85,13 +87,25 @@ public String connect(
       @ShellOption(value = {"--maxWaitIntervalMs"}, defaultValue = "300000",
           help = "Max wait time for eventual consistency") final Integer maxConsistencyIntervalMs,
       @ShellOption(value = {"--maxCheckIntervalMs"}, defaultValue = "7",
-          help = "Max checks for eventual consistency") final Integer maxConsistencyChecks)
+          help = "Max checks for eventual consistency") final Integer maxConsistencyChecks,
+      @ShellOption(value = {"--timeGeneratorType"}, defaultValue = "WAIT_TO_ADJUST_SKEW",
+          help = "Time generator type, which is used to generate globally monotonically increasing timestamp") final String timeGeneratorType,
+      @ShellOption(value = {"--maxExpectedClockSkewMs"}, defaultValue = "200",
+          help = "The max expected clock skew time for WaitBasedTimeGenerator in ms") final Long maxExpectedClockSkewMs,
+      @ShellOption(value = {"--useDefaultLockProvider"}, defaultValue = "false",
+          help = "Use org.apache.hudi.client.transaction.lock.InProcessLockProvider") final boolean useDefaultLockProvider)
       throws IOException {
     HoodieCLI
         .setConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(eventuallyConsistent)
             .withInitialConsistencyCheckIntervalMs(initialConsistencyIntervalMs)
             .withMaxConsistencyCheckIntervalMs(maxConsistencyIntervalMs).withMaxConsistencyChecks(maxConsistencyChecks)
             .build());
+    HoodieCLI
+        .setTimeGeneratorConfig(HoodieTimeGeneratorConfig.newBuilder().withPath(path)
+            .withTimeGeneratorType(TimeGeneratorType.valueOf(timeGeneratorType))
+            .withMaxExpectedClockSkewMs(maxExpectedClockSkewMs)
+            .withDefaultLockProvider(useDefaultLockProvider)
+            .build());
     HoodieCLI.initConf();
     HoodieCLI.connectTo(path, layoutVersion);
     HoodieCLI.initFS(true);
@@ -144,7 +158,8 @@ public String createTable(
         .setTimelineLayoutVersion(layoutVersion)
         .initTable(HoodieCLI.conf, path);
     // Now connect to ensure loading works
-    return connect(path, layoutVersion, false, 0, 0, 0);
+    return connect(path, layoutVersion, false, 0, 0, 0,
+        "WAIT_TO_ADJUST_SKEW", 200L, true);
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
index 80cf9cd34ed3..7bafe2f432f9 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
@@ -101,11 +101,11 @@ private void createNonpartitionedTable() throws IOException {
     // Write date files and log file
     String testWriteToken = "2-0-2";
     Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
-        .makeBaseFileName(commitTime1, testWriteToken, fileId1)));
+        .makeBaseFileName(commitTime1, testWriteToken, fileId1, BASE_FILE_EXTENSION)));
     Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
         .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken)));
     Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
-        .makeBaseFileName(commitTime2, testWriteToken, fileId1)));
+        .makeBaseFileName(commitTime2, testWriteToken, fileId1, BASE_FILE_EXTENSION)));
     Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
         .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken)));
 
@@ -144,11 +144,11 @@ private void createPartitionedTable() throws IOException {
     // Write date files and log file
     String testWriteToken = "1-0-1";
     Files.createFile(Paths.get(fullPartitionPath, FSUtils
-        .makeBaseFileName(commitTime1, testWriteToken, fileId1)));
+        .makeBaseFileName(commitTime1, testWriteToken, fileId1, BASE_FILE_EXTENSION)));
     Files.createFile(Paths.get(fullPartitionPath, FSUtils
         .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken)));
     Files.createFile(Paths.get(fullPartitionPath, FSUtils
-        .makeBaseFileName(commitTime2, testWriteToken, fileId1)));
+        .makeBaseFileName(commitTime2, testWriteToken, fileId1, BASE_FILE_EXTENSION)));
     Files.createFile(Paths.get(fullPartitionPath, FSUtils
         .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken)));
 
@@ -323,7 +323,8 @@ public void testShowLatestFileSlices() throws IOException {
         .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_UNSCHEDULED);
 
     // Test show with partition path '2016/03/15'
-    new TableCommand().connect(partitionedTablePath, null, false, 0, 0, 0);
+    new TableCommand().connect(partitionedTablePath, null, false, 0, 0, 0,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     Object partitionedTable = shell.evaluate(() -> "show fsview latest --partitionPath " + partitionPath);
     assertTrue(ShellEvaluationResultUtil.isSuccess(partitionedTable));
 
@@ -336,7 +337,8 @@ public void testShowLatestFileSlices() throws IOException {
     assertEquals(partitionedExpected, partitionedResults);
 
     // Test show for non-partitioned table
-    new TableCommand().connect(nonpartitionedTablePath, null, false, 0, 0, 0);
+    new TableCommand().connect(nonpartitionedTablePath, null, false, 0, 0, 0,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     Object nonpartitionedTable = shell.evaluate(() -> "show fsview latest");
     assertTrue(ShellEvaluationResultUtil.isSuccess(nonpartitionedTable));
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestMetadataCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestMetadataCommand.java
index 3214bb2cfccd..5dbc1c59b9b5 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestMetadataCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestMetadataCommand.java
@@ -96,7 +96,8 @@ public void testMetadataDelete() throws Exception {
     HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf()).setBasePath(tablePath).build();
     assertFalse(metaClient.getTableConfig().getMetadataPartitions().isEmpty());
 
-    new TableCommand().connect(tablePath, null, false, 0, 0, 0);
+    new TableCommand().connect(tablePath, null, false, 0, 0, 0,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     Object result = shell.evaluate(() -> "metadata delete");
     assertTrue(ShellEvaluationResultUtil.isSuccess(result));
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java
index 5b6abf25f60d..8f6442379661 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java
@@ -23,12 +23,14 @@
 import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
 import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
 import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil;
+import org.apache.hudi.common.config.HoodieTimeGeneratorConfig;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimeGeneratorType;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
@@ -105,7 +107,7 @@ public void testConnectTable() {
 
     // Test connect with specified values
     Object result = shell.evaluate(() -> "connect --path " + tablePath + " --initialCheckIntervalMs 3000 "
-            + "--maxWaitIntervalMs 40000 --maxCheckIntervalMs 8");
+            + "--maxWaitIntervalMs 40000 --maxCheckIntervalMs 8 --maxExpectedClockSkewMs 888 --useDefaultLockProvider true");
     assertTrue(ShellEvaluationResultUtil.isSuccess(result));
 
     // Check specified values
@@ -113,10 +115,16 @@ public void testConnectTable() {
     assertEquals(3000, conf.getInitialConsistencyCheckIntervalMs());
     assertEquals(40000, conf.getMaxConsistencyCheckIntervalMs());
     assertEquals(8, conf.getMaxConsistencyChecks());
+    HoodieTimeGeneratorConfig timeGeneratorConfig = HoodieCLI.timeGeneratorConfig;
+    assertEquals(tablePath, timeGeneratorConfig.getBasePath());
+    assertEquals(888L, timeGeneratorConfig.getMaxExpectedClockSkewMs());
+    assertEquals("org.apache.hudi.client.transaction.lock.InProcessLockProvider",
+        timeGeneratorConfig.getLockConfiguration().getConfig().getString(HoodieTimeGeneratorConfig.LOCK_PROVIDER_KEY));
 
     // Check default values
     assertFalse(conf.isConsistencyCheckEnabled());
     assertEquals(new Integer(1), HoodieCLI.layoutVersion.getVersion());
+    assertEquals(TimeGeneratorType.valueOf("WAIT_TO_ADJUST_SKEW"), timeGeneratorConfig.getTimeGeneratorType());
   }
 
   /**
@@ -134,6 +142,13 @@ public void testDefaultCreate() {
     assertEquals(metaPath, client.getMetaPath());
     assertEquals(HoodieTableType.COPY_ON_WRITE, client.getTableType());
     assertEquals(new Integer(1), client.getTimelineLayoutVersion().getVersion());
+
+    HoodieTimeGeneratorConfig timeGeneratorConfig = HoodieCLI.timeGeneratorConfig;
+    assertEquals(tablePath, timeGeneratorConfig.getBasePath());
+    assertEquals(200L, timeGeneratorConfig.getMaxExpectedClockSkewMs());
+    assertEquals("org.apache.hudi.client.transaction.lock.InProcessLockProvider",
+        timeGeneratorConfig.getLockConfiguration().getConfig().getString(HoodieTimeGeneratorConfig.LOCK_PROVIDER_KEY));
+    assertEquals(TimeGeneratorType.valueOf("WAIT_TO_ADJUST_SKEW"), timeGeneratorConfig.getTimeGeneratorType());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/functional/CLIFunctionalTestHarness.java b/hudi-cli/src/test/java/org/apache/hudi/cli/functional/CLIFunctionalTestHarness.java
index 6d6335ab0fb1..7c72417504bc 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/functional/CLIFunctionalTestHarness.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/functional/CLIFunctionalTestHarness.java
@@ -21,6 +21,7 @@
 
 import org.apache.hudi.client.SparkRDDReadClient;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.testutils.HoodieClientTestUtils;
 import org.apache.hudi.testutils.providers.SparkProvider;
@@ -40,6 +41,8 @@
 
 public class CLIFunctionalTestHarness implements SparkProvider {
 
+  protected static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
+
   protected static int timelineServicePort =
       FileSystemViewStorageConfig.REMOTE_PORT_NUM.defaultValue();
   protected static transient TimelineService timelineService;
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java
index 2d73eb02e46d..271885df9bce 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java
@@ -91,7 +91,8 @@ public void testBootstrapRunCommand() throws IOException {
     assertTrue(ShellEvaluationResultUtil.isSuccess(resultForBootstrapRun));
 
     // Connect & check Hudi table exist
-    new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
+    new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     metaClient = HoodieCLI.getTableMetaClient();
     assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should have 1 commit.");
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java
index 3575b85344e0..7786a57896e4 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java
@@ -113,7 +113,8 @@ public void testConvertWithInsert() throws IOException {
     assertTrue(Files.exists(Paths.get(metaPath)), "Hoodie table not exist.");
 
     // Load meta data
-    new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
+    new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     metaClient = HoodieCLI.getTableMetaClient();
 
     assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should only 1 commit.");
@@ -137,7 +138,8 @@ public void testConvertWithUpsert() throws IOException, ParseException {
     dataImporter.dataImport(jsc, 0);
 
     // Load meta data
-    new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
+    new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     metaClient = HoodieCLI.getTableMetaClient();
 
     // check if insert instant exist
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
index a95ed9ff7787..d7e40c8f2f65 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
@@ -320,7 +320,8 @@ public void testDeduplicateWithReal(HoodieTableType tableType) throws IOExceptio
   }
 
   private void connectTableAndReloadMetaClient(String tablePath) throws IOException {
-    new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 0, 0, 0);
+    new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 0, 0, 0,
+        "WAIT_TO_ADJUST_SKEW", 200L, false);
     metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
   }
 
diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml
index 67705edb316f..9a4a7f2104b9 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -120,6 +120,16 @@
       <groupId>io.prometheus</groupId>
       <artifactId>simpleclient_pushgateway</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.uber.m3</groupId>
+      <artifactId>tally-m3</artifactId>
+      <version>${tally.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.m3</groupId>
+      <artifactId>tally-core</artifactId>
+      <version>${tally.version}</version>
+    </dependency>
 
     <!-- Lock -->
     <dependency>
@@ -236,9 +246,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 </project>
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
index a88030c9a2c1..009fbf140f04 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.client.heartbeat.HoodieHeartbeatClient;
 import org.apache.hudi.client.transaction.TransactionManager;
 import org.apache.hudi.client.utils.TransactionUtils;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieWriteStat;
@@ -41,6 +42,7 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieWriteConflictException;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metrics.HoodieMetrics;
 import org.apache.hudi.table.HoodieTable;
 
@@ -258,7 +260,7 @@ protected void finalizeWrite(HoodieTable table, String instantTime, List<HoodieW
       if (finalizeCtx != null) {
         Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop()));
         durationInMs.ifPresent(duration -> {
-          LOG.info("Finalize write elapsed time (milliseconds): " + duration);
+          LOG.info("Finalize write elapsed time (milliseconds): {}", duration);
           metrics.updateFinalizeWriteMetrics(duration, stats.size());
         });
       }
@@ -266,4 +268,28 @@ protected void finalizeWrite(HoodieTable table, String instantTime, List<HoodieW
       throw new HoodieCommitException("Failed to complete commit " + instantTime + " due to finalize errors.", ioe);
     }
   }
+
+  /**
+   * Write the HoodieCommitMetadata to metadata table if available.
+   *
+   * @param table         {@link HoodieTable} of interest.
+   * @param instantTime   instant time of the commit.
+   * @param metadata      instance of {@link HoodieCommitMetadata}.
+   * @param writeStatuses Write statuses of the commit
+   */
+  protected void writeTableMetadata(HoodieTable table, String instantTime, HoodieCommitMetadata metadata, HoodieData<WriteStatus> writeStatuses) {
+    context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table: " + config.getTableName());
+    Option<HoodieTableMetadataWriter> metadataWriterOpt = table.getMetadataWriter(instantTime);
+    if (metadataWriterOpt.isPresent()) {
+      try (HoodieTableMetadataWriter metadataWriter = metadataWriterOpt.get()) {
+        metadataWriter.updateFromWriteStatuses(metadata, writeStatuses, instantTime);
+      } catch (Exception e) {
+        if (e instanceof HoodieException) {
+          throw (HoodieException) e;
+        } else {
+          throw new HoodieException("Failed to update metadata", e);
+        }
+      }
+    }
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
index 967aaa4f68e4..c472be33f3d3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
@@ -58,7 +58,6 @@
 import org.apache.hudi.exception.HoodieLogCompactException;
 import org.apache.hudi.exception.HoodieRollbackException;
 import org.apache.hudi.metadata.HoodieTableMetadataUtil;
-import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.compact.CompactHelpers;
@@ -248,7 +247,7 @@ protected Option<String> inlineLogCompact(Option<Map<String, String>> extraMetad
   protected void runAnyPendingCompactions(HoodieTable table) {
     table.getActiveTimeline().getWriteTimeline().filterPendingCompactionTimeline().getInstants()
         .forEach(instant -> {
-          LOG.info("Running previously failed inflight compaction at instant " + instant);
+          LOG.info("Running previously failed inflight compaction at instant {}", instant);
           compact(instant.getTimestamp(), true);
         });
   }
@@ -256,7 +255,7 @@ protected void runAnyPendingCompactions(HoodieTable table) {
   protected void runAnyPendingLogCompactions(HoodieTable table) {
     table.getActiveTimeline().getWriteTimeline().filterPendingLogCompactionTimeline().getInstantsAsStream()
         .forEach(instant -> {
-          LOG.info("Running previously failed inflight log compaction at instant " + instant);
+          LOG.info("Running previously failed inflight log compaction at instant {}", instant);
           logCompact(instant.getTimestamp(), true);
         });
   }
@@ -329,7 +328,7 @@ protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable tab
       finalizeWrite(table, compactionCommitTime, writeStats);
       // commit to data table after committing to metadata table.
       writeTableMetadata(table, compactionCommitTime, metadata, context.emptyHoodieData());
-      LOG.info("Committing Compaction " + compactionCommitTime + ". Finished with result " + metadata);
+      LOG.info("Committing Compaction {}. Finished with result {}", compactionCommitTime, metadata);
       CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
     } finally {
       this.txnManager.endTransaction(Option.of(compactionInstant));
@@ -342,7 +341,7 @@ protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable tab
           metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, COMPACTION_ACTION)
       );
     }
-    LOG.info("Compacted successfully on commit " + compactionCommitTime);
+    LOG.info("Compacted successfully on commit {}", compactionCommitTime);
   }
 
   /**
@@ -389,7 +388,7 @@ protected void completeLogCompaction(HoodieCommitMetadata metadata, HoodieTable
       finalizeWrite(table, logCompactionCommitTime, writeStats);
       // commit to data table after committing to metadata table.
       writeTableMetadata(table, logCompactionCommitTime, metadata, context.emptyHoodieData());
-      LOG.info("Committing Log Compaction " + logCompactionCommitTime + ". Finished with result " + metadata);
+      LOG.info("Committing Log Compaction {}. Finished with result {}", logCompactionCommitTime, metadata);
       CompactHelpers.getInstance().completeInflightLogCompaction(table, logCompactionCommitTime, metadata);
     } finally {
       this.txnManager.endTransaction(Option.of(logCompactionInstant));
@@ -402,7 +401,7 @@ protected void completeLogCompaction(HoodieCommitMetadata metadata, HoodieTable
           metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, HoodieActiveTimeline.LOG_COMPACTION_ACTION)
       );
     }
-    LOG.info("Log Compacted successfully on commit " + logCompactionCommitTime);
+    LOG.info("Log Compacted successfully on commit {}", logCompactionCommitTime);
   }
 
   /**
@@ -450,7 +449,7 @@ public HoodieWriteMetadata<O> cluster(String clusteringInstant, boolean shouldCo
       table.getMetaClient().reloadActiveTimeline();
     }
     clusteringTimer = metrics.getClusteringCtx();
-    LOG.info("Starting clustering at " + clusteringInstant);
+    LOG.info("Starting clustering at {}", clusteringInstant);
     HoodieWriteMetadata<T> writeMetadata = table.cluster(context, clusteringInstant);
     HoodieWriteMetadata<O> clusteringMetadata = convertToOutputMetadata(writeMetadata);
     // Validation has to be done after cloning. if not, it could result in referencing the write status twice which means clustering could get executed twice.
@@ -484,6 +483,17 @@ public boolean purgePendingClustering(String clusteringInstant) {
     return false;
   }
 
+  /**
+   * Delete expired partition by config.
+   *
+   * @param instantTime Instant Time for the action
+   * @return HoodieWriteMetadata
+   */
+  public HoodieWriteMetadata<T> managePartitionTTL(String instantTime) {
+    HoodieTable<?, I, ?, T> table = createTable(config, context.getHadoopConf().get());
+    return table.managePartitionTTL(context, instantTime);
+  }
+
   protected abstract void validateClusteringCommit(HoodieWriteMetadata<O> clusteringMetadata, String clusteringCommitTime, HoodieTable table);
 
   protected abstract HoodieWriteMetadata<O> convertToOutputMetadata(HoodieWriteMetadata<T> writeMetadata);
@@ -509,7 +519,7 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata,
       // Update table's metadata (table)
       writeTableMetadata(table, clusteringInstant.getTimestamp(), metadata, writeStatuses.orElseGet(context::emptyHoodieData));
 
-      LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
+      LOG.info("Committing Clustering {}. Finished with result {}", clusteringCommitTime, metadata);
 
       table.getActiveTimeline().transitionReplaceInflightToComplete(
           false,
@@ -528,7 +538,7 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata,
           metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION)
       );
     }
-    LOG.info("Clustering successfully on commit " + clusteringCommitTime);
+    LOG.info("Clustering successfully on commit {}", clusteringCommitTime);
   }
 
   protected void runTableServicesInline(HoodieTable table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
@@ -583,6 +593,12 @@ protected void runTableServicesInline(HoodieTable table, HoodieCommitMetadata me
       metadata.addMetadata(HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING.key(), "true");
       inlineScheduleClustering(extraMetadata);
     }
+
+    //  Do an inline partition ttl management if enabled
+    if (config.isInlinePartitionTTLEnable()) {
+      String instantTime = createNewInstantTime();
+      table.managePartitionTTL(table.getContext(), instantTime);
+    }
   }
 
   /**
@@ -599,7 +615,7 @@ public Option<String> scheduleTableService(String instantTime, Option<Map<String
         tableServiceType.getAction(), instantTime));
     try {
       this.txnManager.beginTransaction(inflightInstant, Option.empty());
-      LOG.info("Scheduling table service " + tableServiceType);
+      LOG.info("Scheduling table service {}", tableServiceType);
       return scheduleTableServiceInternal(instantTime, extraMetadata, tableServiceType);
     } finally {
       this.txnManager.endTransaction(inflightInstant);
@@ -620,25 +636,25 @@ protected Option<String> scheduleTableServiceInternal(String instantTime, Option
         LOG.info("Scheduling archiving is not supported. Skipping.");
         break;
       case CLUSTER:
-        LOG.info("Scheduling clustering at instant time :" + instantTime);
+        LOG.info("Scheduling clustering at instant time: {}", instantTime);
         Option<HoodieClusteringPlan> clusteringPlan = table
             .scheduleClustering(context, instantTime, extraMetadata);
         option = clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
         break;
       case COMPACT:
-        LOG.info("Scheduling compaction at instant time :" + instantTime);
+        LOG.info("Scheduling compaction at instant time: {}", instantTime);
         Option<HoodieCompactionPlan> compactionPlan = table
             .scheduleCompaction(context, instantTime, extraMetadata);
         option = compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
         break;
       case LOG_COMPACT:
-        LOG.info("Scheduling log compaction at instant time :" + instantTime);
+        LOG.info("Scheduling log compaction at instant time: {}", instantTime);
         Option<HoodieCompactionPlan> logCompactionPlan = table
             .scheduleLogCompaction(context, instantTime, extraMetadata);
         option = logCompactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
         break;
       case CLEAN:
-        LOG.info("Scheduling cleaning at instant time :" + instantTime);
+        LOG.info("Scheduling cleaning at instant time: {}", instantTime);
         Option<HoodieCleanerPlan> cleanerPlan = table
             .scheduleCleaning(context, instantTime, extraMetadata);
         option = cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
@@ -649,7 +665,7 @@ protected Option<String> scheduleTableServiceInternal(String instantTime, Option
 
     Option<String> instantRange = delegateToTableServiceManager(tableServiceType, table);
     if (instantRange.isPresent()) {
-      LOG.info("Delegate instant [" + instantRange.get() + "] to table service manager");
+      LOG.info("Delegate instant [{}] to table service manager", instantRange.get());
     }
 
     return option;
@@ -693,36 +709,12 @@ protected void runAnyPendingClustering(HoodieTable table) {
     table.getActiveTimeline().filterPendingReplaceTimeline().getInstants().forEach(instant -> {
       Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(table.getMetaClient(), instant);
       if (instantPlan.isPresent()) {
-        LOG.info("Running pending clustering at instant " + instantPlan.get().getLeft());
+        LOG.info("Running pending clustering at instant {}", instantPlan.get().getLeft());
         cluster(instant.getTimestamp(), true);
       }
     });
   }
 
-  /**
-   * Write the HoodieCommitMetadata to metadata table if available.
-   *
-   * @param table         {@link HoodieTable} of interest.
-   * @param instantTime   instant time of the commit.
-   * @param metadata      instance of {@link HoodieCommitMetadata}.
-   * @param writeStatuses Write statuses of the commit
-   */
-  protected void writeTableMetadata(HoodieTable table, String instantTime, HoodieCommitMetadata metadata, HoodieData<WriteStatus> writeStatuses) {
-    context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table: " + config.getTableName());
-    Option<HoodieTableMetadataWriter> metadataWriterOpt = table.getMetadataWriter(instantTime);
-    if (metadataWriterOpt.isPresent()) {
-      try (HoodieTableMetadataWriter metadataWriter = metadataWriterOpt.get()) {
-        metadataWriter.updateFromWriteStatuses(metadata, writeStatuses, instantTime);
-      } catch (Exception e) {
-        if (e instanceof HoodieException) {
-          throw (HoodieException) e;
-        } else {
-          throw new HoodieException("Failed to update metadata", e);
-        }
-      }
-    }
-  }
-
   /**
    * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the
    * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index 9b69d819e712..9ade694d3409 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -347,30 +347,6 @@ protected void preCommit(HoodieInstant inflightInstant, HoodieCommitMetadata met
     resolveWriteConflict(table, metadata, this.pendingInflightAndRequestedInstants);
   }
 
-  /**
-   * Write the HoodieCommitMetadata to metadata table if available.
-   *
-   * @param table         {@link HoodieTable} of interest.
-   * @param instantTime   instant time of the commit.
-   * @param metadata      instance of {@link HoodieCommitMetadata}.
-   * @param writeStatuses WriteStatuses for the completed action.
-   */
-  protected void writeTableMetadata(HoodieTable table, String instantTime, HoodieCommitMetadata metadata, HoodieData<WriteStatus> writeStatuses) {
-    context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table: " + config.getTableName());
-    Option<HoodieTableMetadataWriter> metadataWriterOpt = table.getMetadataWriter(instantTime);
-    if (metadataWriterOpt.isPresent()) {
-      try (HoodieTableMetadataWriter metadataWriter = metadataWriterOpt.get()) {
-        metadataWriter.updateFromWriteStatuses(metadata, writeStatuses, instantTime);
-      } catch (Exception e) {
-        if (e instanceof HoodieException) {
-          throw (HoodieException) e;
-        } else {
-          throw new HoodieException("Failed to update metadata", e);
-        }
-      }
-    }
-  }
-
   /**
    * Filter out HoodieRecords that already exists in the output folder. This is useful in deduplication.
    *
@@ -987,10 +963,10 @@ public boolean scheduleCompactionAtInstant(String instantTime, Option<Map<String
    * @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed
    * @return instant time for the requested INDEX action
    */
-  public Option<String> scheduleIndexing(List<MetadataPartitionType> partitionTypes) {
+  public Option<String> scheduleIndexing(List<MetadataPartitionType> partitionTypes, List<String> partitionPaths) {
     String instantTime = createNewInstantTime();
     Option<HoodieIndexPlan> indexPlan = createTable(config, hadoopConf)
-        .scheduleIndexing(context, instantTime, partitionTypes);
+        .scheduleIndexing(context, instantTime, partitionTypes, partitionPaths);
     return indexPlan.isPresent() ? Option.of(instantTime) : Option.empty();
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/LSMTimelineWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/LSMTimelineWriter.java
index 4f487410a8c9..e5acf775a193 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/LSMTimelineWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/LSMTimelineWriter.java
@@ -20,6 +20,7 @@
 package org.apache.hudi.client.timeline;
 
 import org.apache.hudi.avro.model.HoodieLSMTimelineInstant;
+import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.table.timeline.MetadataConversionUtils;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
@@ -72,21 +73,29 @@ public class LSMTimelineWriter {
   public static final long MAX_FILE_SIZE_IN_BYTES = 1024 * 1024 * 1000;
 
   private final HoodieWriteConfig config;
-  private final HoodieTable<?, ?, ?, ?> table;
+  private final TaskContextSupplier taskContextSupplier;
   private final HoodieTableMetaClient metaClient;
 
   private HoodieWriteConfig writeConfig;
 
   private LSMTimelineWriter(HoodieWriteConfig config, HoodieTable<?, ?, ?, ?> table) {
+    this(config, table.getTaskContextSupplier(), table.getMetaClient());
+  }
+
+  private LSMTimelineWriter(HoodieWriteConfig config, TaskContextSupplier taskContextSupplier, HoodieTableMetaClient metaClient) {
     this.config = config;
-    this.table = table;
-    this.metaClient = table.getMetaClient();
+    this.taskContextSupplier = taskContextSupplier;
+    this.metaClient = metaClient;
   }
 
   public static LSMTimelineWriter getInstance(HoodieWriteConfig config, HoodieTable<?, ?, ?, ?> table) {
     return new LSMTimelineWriter(config, table);
   }
 
+  public static LSMTimelineWriter getInstance(HoodieWriteConfig config, TaskContextSupplier taskContextSupplier, HoodieTableMetaClient metaClient) {
+    return new LSMTimelineWriter(config, taskContextSupplier, metaClient);
+  }
+
   /**
    * Writes the list of active actions into the timeline.
    *
@@ -115,10 +124,14 @@ public void write(
           exceptionHandler.ifPresent(handler -> handler.accept(e));
         }
       }
-      updateManifest(filePath.getName());
     } catch (Exception e) {
       throw new HoodieCommitException("Failed to write commits", e);
     }
+    try {
+      updateManifest(filePath.getName());
+    } catch (Exception e) {
+      throw new HoodieCommitException("Failed to update archiving manifest", e);
+    }
   }
 
   /**
@@ -366,7 +379,7 @@ private HoodieWriteConfig getOrCreateWriterConfig() {
   private HoodieFileWriter openWriter(Path filePath) {
     try {
       return HoodieFileWriterFactory.getFileWriter("", filePath, metaClient.getHadoopConf(), getOrCreateWriterConfig(),
-          HoodieLSMTimelineInstant.getClassSchema(), table.getTaskContextSupplier(), HoodieRecord.HoodieRecordType.AVRO);
+          HoodieLSMTimelineInstant.getClassSchema(), taskContextSupplier, HoodieRecord.HoodieRecordType.AVRO);
     } catch (IOException e) {
       throw new HoodieException("Unable to initialize archiving writer", e);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/PreferWriterConflictResolutionStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/PreferWriterConflictResolutionStrategy.java
index 61ed673fc62f..62fbf64a7f9c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/PreferWriterConflictResolutionStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/PreferWriterConflictResolutionStrategy.java
@@ -55,7 +55,7 @@ public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient metaClie
                                                     Option<HoodieInstant> lastSuccessfulInstant) {
     HoodieActiveTimeline activeTimeline = metaClient.reloadActiveTimeline();
     if ((REPLACE_COMMIT_ACTION.equals(currentInstant.getAction())
-        && ClusteringUtils.isClusteringCommit(metaClient, currentInstant))
+        && ClusteringUtils.isClusteringInstant(activeTimeline, currentInstant))
         || COMPACTION_ACTION.equals(currentInstant.getAction())) {
       return getCandidateInstantsForTableServicesCommits(activeTimeline, currentInstant);
     } else {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodiePayloadConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodiePayloadConfig.java
index 3929dcba0471..5c70000bd6c7 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodiePayloadConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodiePayloadConfig.java
@@ -22,7 +22,7 @@
 import org.apache.hudi.common.config.ConfigGroups;
 import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.config.HoodieConfig;
-import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
+import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
 
 import java.io.File;
 import java.io.FileReader;
@@ -50,7 +50,7 @@ public class HoodiePayloadConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
       .key("hoodie.compaction.payload.class")
-      .defaultValue(OverwriteWithLatestAvroPayload.class.getName())
+      .defaultValue(DefaultHoodieRecordPayload.class.getName())
       .markAdvanced()
       .withDocumentation("This needs to be same as class used during insert/upserts. Just like writing, compaction also uses "
         + "the record payload class to merge records in the log against each other, merge again with the base file and "
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieTTLConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieTTLConfig.java
new file mode 100644
index 000000000000..1f9a4e40e983
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieTTLConfig.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.config;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+import org.apache.hudi.table.action.ttl.strategy.PartitionTTLStrategyType;
+
+import javax.annotation.concurrent.Immutable;
+
+import java.util.Properties;
+
+/**
+ * Hoodie Configs for partition/record level ttl management.
+ */
+@Immutable
+@ConfigClassProperty(name = "TTL management Configs",
+    groupName = ConfigGroups.Names.WRITE_CLIENT,
+    description = "Data ttl management")
+public class HoodieTTLConfig extends HoodieConfig {
+
+  public static final String PARTITION_TTL_STRATEGY_PARAM_PREFIX = "hoodie.partition.ttl.strategy.";
+
+  public static final String KEEP_BY_TIME_PARTITION_TTL_STRATEGY =
+      "org.apache.hudi.table.action.ttl.strategy.KeepByTimeStrategy";
+  public static final ConfigProperty<Boolean> INLINE_PARTITION_TTL = ConfigProperty
+      .key("hoodie.partition.ttl.inline")
+      .defaultValue(false)
+      .sinceVersion("1.0.0")
+      .markAdvanced()
+      .withDocumentation("When enabled, the partition ttl management service is invoked immediately after each commit, "
+          + "to delete exipired partitions");
+
+  public static final ConfigProperty<String> PARTITION_TTL_STRATEGY_CLASS_NAME = ConfigProperty
+      .key("hoodie.partition.ttl.strategy.class")
+      .noDefaultValue()
+      .sinceVersion("1.0.0")
+      .markAdvanced()
+      .withDocumentation("Config to provide a strategy class (subclass of PartitionTTLStrategy) to get the expired partitions");
+
+  public static final ConfigProperty<String> PARTITION_TTL_STRATEGY_TYPE = ConfigProperty
+      .key("hoodie.partition.ttl.management.strategy.type")
+      .defaultValue(PartitionTTLStrategyType.KEEP_BY_TIME.name())
+      .sinceVersion("1.0.0")
+      .markAdvanced()
+      .withDocumentation("Partition ttl management strategy type to determine the strategy class");
+
+  public static final ConfigProperty<Integer> DAYS_RETAIN = ConfigProperty
+      .key(PARTITION_TTL_STRATEGY_PARAM_PREFIX + "days.retain")
+      .defaultValue(-1)
+      .sinceVersion("1.0.0")
+      .markAdvanced()
+      .withDocumentation("Partition ttl management KEEP_BY_TIME strategy days retain");
+
+  public static final ConfigProperty<String> PARTITION_SELECTED = ConfigProperty
+      .key(PARTITION_TTL_STRATEGY_PARAM_PREFIX + "partition.selected")
+      .noDefaultValue()
+      .markAdvanced()
+      .sinceVersion("1.0.0")
+      .withDocumentation("Partitions to manage ttl");
+
+  public static final ConfigProperty<Integer> MAX_PARTITION_TO_DELETE = ConfigProperty
+      .key(PARTITION_TTL_STRATEGY_PARAM_PREFIX + "max.delete.partitions")
+      .defaultValue(1000)
+      .markAdvanced()
+      .sinceVersion("1.0.0")
+      .withDocumentation("max partitions to delete in partition ttl management");
+
+  public static class Builder {
+    private final HoodieTTLConfig ttlConfig = new HoodieTTLConfig();
+
+    public HoodieTTLConfig.Builder withTTLPartitionSelected(String partitionSelected) {
+      ttlConfig.setValue(PARTITION_SELECTED, partitionSelected);
+      return this;
+    }
+
+    public HoodieTTLConfig.Builder withTTLDaysRetain(Integer daysRetain) {
+      ttlConfig.setValue(DAYS_RETAIN, daysRetain.toString());
+      return this;
+    }
+
+    public HoodieTTLConfig.Builder enableInlinePartitionTTL(Boolean enable) {
+      ttlConfig.setValue(INLINE_PARTITION_TTL, enable.toString());
+      return this;
+    }
+
+    public HoodieTTLConfig.Builder withTTLStrategyClass(String clazz) {
+      ttlConfig.setValue(PARTITION_TTL_STRATEGY_CLASS_NAME, clazz);
+      return this;
+    }
+
+    public HoodieTTLConfig.Builder withTTLStrategyType(PartitionTTLStrategyType ttlStrategyType) {
+      ttlConfig.setValue(PARTITION_TTL_STRATEGY_TYPE, ttlStrategyType.name());
+      return this;
+    }
+
+    public HoodieTTLConfig.Builder fromProperties(Properties props) {
+      this.ttlConfig.getProps().putAll(props);
+      return this;
+    }
+
+    public HoodieTTLConfig build() {
+      ttlConfig.setDefaults(HoodieTTLConfig.class.getName());
+      return ttlConfig;
+    }
+  }
+
+  public static Builder newBuilder() {
+    return new Builder();
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 8fd3546671e5..cca3836ec6e8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -39,13 +39,13 @@
 import org.apache.hudi.common.engine.EngineType;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FileSystemRetryConfig;
+import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieAvroRecordMerger;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecordMerger;
 import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.model.RecordPayloadType;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.model.WriteOperationType;
@@ -68,6 +68,7 @@
 import org.apache.hudi.config.metrics.HoodieMetricsDatadogConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
+import org.apache.hudi.config.metrics.HoodieMetricsM3Config;
 import org.apache.hudi.config.metrics.HoodieMetricsPrometheusConfig;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode;
@@ -153,14 +154,14 @@ public class HoodieWriteConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> WRITE_PAYLOAD_CLASS_NAME = ConfigProperty
       .key("hoodie.datasource.write.payload.class")
-      .defaultValue(OverwriteWithLatestAvroPayload.class.getName())
+      .defaultValue(DefaultHoodieRecordPayload.class.getName())
       .markAdvanced()
       .withDocumentation("Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. "
           + "This will render any value set for PRECOMBINE_FIELD_OPT_VAL in-effective");
 
   public static final ConfigProperty<String> WRITE_PAYLOAD_TYPE = ConfigProperty
       .key("hoodie.datasource.write.payload.type")
-      .defaultValue(RecordPayloadType.OVERWRITE_LATEST_AVRO.name())
+      .defaultValue(RecordPayloadType.HOODIE_AVRO_DEFAULT.name())
       .markAdvanced()
       .withDocumentation(RecordPayloadType.class);
 
@@ -562,7 +563,7 @@ public class HoodieWriteConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> MERGE_ALLOW_DUPLICATE_ON_INSERTS_ENABLE = ConfigProperty
       .key("hoodie.merge.allow.duplicate.on.inserts")
-      .defaultValue("false")
+      .defaultValue("true")
       .markAdvanced()
       .withDocumentation("When enabled, we allow duplicate keys even if inserts are routed to merge with an existing file (for ensuring file sizing)."
           + " This is only relevant for insert operation, since upsert, delete operations will ensure unique key constraints are maintained.");
@@ -2238,6 +2239,26 @@ public int getGraphiteReportPeriodSeconds() {
     return getInt(HoodieMetricsGraphiteConfig.GRAPHITE_REPORT_PERIOD_IN_SECONDS);
   }
 
+  public String getM3ServerHost() {
+    return getString(HoodieMetricsM3Config.M3_SERVER_HOST_NAME);
+  }
+
+  public int getM3ServerPort() {
+    return getInt(HoodieMetricsM3Config.M3_SERVER_PORT_NUM);
+  }
+
+  public String getM3Tags() {
+    return getString(HoodieMetricsM3Config.M3_TAGS);
+  }
+
+  public String getM3Env() {
+    return getString(HoodieMetricsM3Config.M3_ENV);
+  }
+
+  public String getM3Service() {
+    return getString(HoodieMetricsM3Config.M3_SERVICE);
+  }
+
   public String getJmxHost() {
     return getString(HoodieMetricsJmxConfig.JMX_HOST_NAME);
   }
@@ -2697,6 +2718,29 @@ public boolean isNonBlockingConcurrencyControl() {
     return getWriteConcurrencyMode().isNonBlockingConcurrencyControl();
   }
 
+  /**
+   * TTL configs.
+   */
+  public boolean isInlinePartitionTTLEnable() {
+    return getBoolean(HoodieTTLConfig.INLINE_PARTITION_TTL);
+  }
+
+  public String getPartitionTTLStrategyClassName() {
+    return getString(HoodieTTLConfig.PARTITION_TTL_STRATEGY_CLASS_NAME);
+  }
+
+  public Integer getPartitionTTLStrategyDaysRetain() {
+    return getInt(HoodieTTLConfig.DAYS_RETAIN);
+  }
+
+  public String getPartitionTTLPartitionSelected() {
+    return getString(HoodieTTLConfig.PARTITION_SELECTED);
+  }
+
+  public Integer getPartitionTTLMaxPartitionsToDelete() {
+    return getInt(HoodieTTLConfig.MAX_PARTITION_TO_DELETE);
+  }
+
   public static class Builder {
 
     protected final HoodieWriteConfig writeConfig = new HoodieWriteConfig();
@@ -2716,10 +2760,13 @@ public static class Builder {
     private boolean isCallbackConfigSet = false;
     private boolean isPayloadConfigSet = false;
     private boolean isMetadataConfigSet = false;
+
+    private boolean isTTLConfigSet = false;
     private boolean isLockConfigSet = false;
     private boolean isPreCommitValidationConfigSet = false;
     private boolean isMetricsJmxConfigSet = false;
     private boolean isMetricsGraphiteConfigSet = false;
+    private boolean isMetricsM3ConfigSet = false;
     private boolean isLayoutConfigSet = false;
 
     public Builder withEngineType(EngineType engineType) {
@@ -2959,6 +3006,12 @@ public Builder withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig mericsGraph
       return this;
     }
 
+    public Builder withMetricsM3Config(HoodieMetricsM3Config metricsM3Config) {
+      writeConfig.getProps().putAll(metricsM3Config.getProps());
+      isMetricsM3ConfigSet = true;
+      return this;
+    }
+
     public Builder withPreCommitValidatorConfig(HoodiePreCommitValidatorConfig validatorConfig) {
       writeConfig.getProps().putAll(validatorConfig.getProps());
       isPreCommitValidationConfigSet = true;
@@ -2995,6 +3048,12 @@ public Builder withMetadataConfig(HoodieMetadataConfig metadataConfig) {
       return this;
     }
 
+    public Builder withTTLConfig(HoodieTTLConfig ttlConfig) {
+      writeConfig.getProps().putAll(ttlConfig.getProps());
+      isTTLConfigSet = true;
+      return this;
+    }
+
     public Builder withAutoCommit(boolean autoCommit) {
       writeConfig.setValue(AUTO_COMMIT_ENABLE, String.valueOf(autoCommit));
       return this;
@@ -3262,6 +3321,8 @@ protected void setDefaults() {
       final boolean isLockProviderPropertySet = writeConfigProperties.containsKey(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key());
       writeConfig.setDefaultOnCondition(!isLockConfigSet,
           HoodieLockConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
+      writeConfig.setDefaultOnCondition(!isTTLConfigSet,
+          HoodieTTLConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
 
       autoAdjustConfigsForConcurrencyMode(isLockProviderPropertySet);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
index e1d0afeb6fa4..328619f5e9c8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
@@ -220,6 +220,8 @@ public HoodieMetricsConfig build() {
           HoodieMetricsGraphiteConfig.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
       hoodieMetricsConfig.setDefaultOnCondition(reporterType == MetricsReporterType.CLOUDWATCH,
             HoodieMetricsCloudWatchConfig.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
+      hoodieMetricsConfig.setDefaultOnCondition(reporterType == MetricsReporterType.M3,
+              HoodieMetricsM3Config.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
       return hoodieMetricsConfig;
     }
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsM3Config.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsM3Config.java
new file mode 100644
index 000000000000..cc675eebfbbf
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsM3Config.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.config.metrics;
+
+import static org.apache.hudi.config.metrics.HoodieMetricsConfig.METRIC_PREFIX;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+/**
+ * Configs for M3 reporter type.
+ * <p>
+ * {@link org.apache.hudi.metrics.MetricsReporterType#M3}
+ */
+@ConfigClassProperty(name = "Metrics Configurations for M3",
+    groupName = ConfigGroups.Names.METRICS,
+    description = "Enables reporting on Hudi metrics using M3. "
+        + " Hudi publishes metrics on every commit, clean, rollback etc.")
+public class HoodieMetricsM3Config extends HoodieConfig {
+
+  public static final String M3_PREFIX = METRIC_PREFIX + ".m3";
+
+  public static final ConfigProperty<String> M3_SERVER_HOST_NAME = ConfigProperty
+      .key(M3_PREFIX + ".host")
+      .defaultValue("localhost")
+      .withDocumentation("M3 host to connect to.");
+
+  public static final ConfigProperty<Integer> M3_SERVER_PORT_NUM = ConfigProperty
+      .key(M3_PREFIX + ".port")
+      .defaultValue(9052)
+      .withDocumentation("M3 port to connect to.");
+
+  public static final ConfigProperty<String> M3_TAGS = ConfigProperty
+      .key(M3_PREFIX + ".tags")
+      .defaultValue("")
+      .withDocumentation("Optional M3 tags applied to all metrics.");
+
+  public static final ConfigProperty<String> M3_ENV = ConfigProperty
+      .key(M3_PREFIX + ".env")
+      .defaultValue("production")
+      .withDocumentation("M3 tag to label the environment (defaults to 'production'), "
+          + "applied to all metrics.");
+
+  public static final ConfigProperty<String> M3_SERVICE = ConfigProperty
+      .key(M3_PREFIX + ".service")
+      .defaultValue("hoodie")
+      .withDocumentation("M3 tag to label the service name (defaults to 'hoodie'), "
+          + "applied to all metrics.");
+
+  private HoodieMetricsM3Config() {
+    super();
+  }
+
+  public static HoodieMetricsM3Config.Builder newBuilder() {
+    return new HoodieMetricsM3Config.Builder();
+  }
+
+  public static class Builder {
+
+    private final HoodieMetricsM3Config hoodieMetricsM3Config = new HoodieMetricsM3Config();
+
+    public HoodieMetricsM3Config.Builder fromFile(File propertiesFile) throws IOException {
+      try (FileReader reader = new FileReader(propertiesFile)) {
+        this.hoodieMetricsM3Config.getProps().load(reader);
+        return this;
+      }
+    }
+
+    public HoodieMetricsM3Config.Builder fromProperties(Properties props) {
+      this.hoodieMetricsM3Config.getProps().putAll(props);
+      return this;
+    }
+
+    public HoodieMetricsM3Config.Builder toM3Host(String host) {
+      hoodieMetricsM3Config.setValue(M3_SERVER_HOST_NAME, host);
+      return this;
+    }
+
+    public HoodieMetricsM3Config.Builder onM3Port(int port) {
+      hoodieMetricsM3Config.setValue(M3_SERVER_PORT_NUM, String.valueOf(port));
+      return this;
+    }
+
+    public HoodieMetricsM3Config.Builder useM3Tags(String tags) {
+      hoodieMetricsM3Config.setValue(M3_TAGS, tags);
+      return this;
+    }
+
+    public HoodieMetricsM3Config.Builder useM3Env(String env) {
+      hoodieMetricsM3Config.setValue(M3_ENV, env);
+      return this;
+    }
+
+    public HoodieMetricsM3Config.Builder useM3Service(String service) {
+      hoodieMetricsM3Config.setValue(M3_SERVICE, service);
+      return this;
+    }
+
+    public HoodieMetricsM3Config build() {
+      hoodieMetricsM3Config.setDefaults(HoodieMetricsM3Config.class.getName());
+      return hoodieMetricsM3Config;
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java
index d22e4b21a5ec..0e47d0a688ab 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java
@@ -210,7 +210,16 @@ private static void createCommitMarker(HoodieTable table, Path fileStatus, Path
     if (fs.exists(fullPath)) {
       return;
     }
-    FileIOUtils.createFileInPath(fs, fullPath, Option.of(getUTF8Bytes(StringUtils.EMPTY_STRING)));
+    //prevent exception from race condition. We are ok with the file being created in another thread, so we should
+    // check for the marker after catching the exception and we don't need to fail if the file exists
+    try {
+      FileIOUtils.createFileInPath(fs, fullPath, Option.of(getUTF8Bytes(StringUtils.EMPTY_STRING)));
+    } catch (HoodieIOException e) {
+      if (!fs.exists(fullPath)) {
+        throw e;
+      }
+      LOG.warn("Failed to create marker but " + fullPath + " exists", e);
+    }
   }
 
   /***
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
index f8cc77274c2e..1301f046ae29 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -89,7 +89,6 @@ public class HoodieAppendHandle<T, I, K, O> extends HoodieWriteHandle<T, I, K, O
   private static final AtomicLong RECORD_COUNTER = new AtomicLong(1);
   private static final int NUMBER_OF_RECORDS_TO_ESTIMATE_RECORD_SIZE = 100;
 
-  protected final String fileId;
   private final boolean shouldWriteRecordPositions;
   // Buffer for holding records in memory before they are flushed to disk
   private final List<HoodieRecord> recordList = new ArrayList<>();
@@ -158,7 +157,6 @@ public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTa
             ? Option.of(new Schema.Parser().parse(config.getPartialUpdateSchema()))
             : Option.empty(),
         taskContextSupplier);
-    this.fileId = fileId;
     this.recordItr = recordItr;
     this.sizeEstimator = new DefaultSizeEstimator();
     this.statuses = new ArrayList<>();
@@ -173,50 +171,52 @@ public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTa
   }
 
   private void init(HoodieRecord record) {
-    if (doInit) {
-      String prevCommit = instantTime;
-      String baseFile = "";
-      List<String> logFiles = new ArrayList<>();
-      if (config.isCDCEnabled()) {
-        // the cdc reader needs the base file metadata to have deterministic update sequence.
-        TableFileSystemView.SliceView rtView = hoodieTable.getSliceView();
-        Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
-        if (fileSlice.isPresent()) {
-          prevCommit = fileSlice.get().getBaseInstantTime();
-          baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
-          logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
-        }
+    if (!doInit) {
+      return;
+    }
+
+    String prevCommit = instantTime;
+    String baseFile = "";
+    List<String> logFiles = new ArrayList<>();
+    if (config.isCDCEnabled()) {
+      // the cdc reader needs the base file metadata to have deterministic update sequence.
+      TableFileSystemView.SliceView rtView = hoodieTable.getSliceView();
+      Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
+      if (fileSlice.isPresent()) {
+        prevCommit = fileSlice.get().getBaseInstantTime();
+        baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
+        logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
       }
+    }
 
-      // Prepare the first write status
-      HoodieDeltaWriteStat deltaWriteStat = new HoodieDeltaWriteStat();
-      writeStatus.setStat(deltaWriteStat);
-      writeStatus.setFileId(fileId);
-      writeStatus.setPartitionPath(partitionPath);
-      averageRecordSize = sizeEstimator.sizeEstimate(record);
+    // Prepare the first write status
+    HoodieDeltaWriteStat deltaWriteStat = new HoodieDeltaWriteStat();
+    writeStatus.setStat(deltaWriteStat);
+    writeStatus.setFileId(fileId);
+    writeStatus.setPartitionPath(partitionPath);
+    averageRecordSize = sizeEstimator.sizeEstimate(record);
 
-      deltaWriteStat.setPrevCommit(prevCommit);
-      deltaWriteStat.setPartitionPath(partitionPath);
-      deltaWriteStat.setFileId(fileId);
-      deltaWriteStat.setBaseFile(baseFile);
-      deltaWriteStat.setLogFiles(logFiles);
+    deltaWriteStat.setPrevCommit(prevCommit);
+    deltaWriteStat.setPartitionPath(partitionPath);
+    deltaWriteStat.setFileId(fileId);
+    deltaWriteStat.setBaseFile(baseFile);
+    deltaWriteStat.setLogFiles(logFiles);
 
-      try {
-        // Save hoodie partition meta in the partition path
-        HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime,
-            new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath),
-            hoodieTable.getPartitionMetafileFormat());
-        partitionMetadata.trySave(getPartitionId());
-
-        this.writer = createLogWriter(getFileInstant(record));
-      } catch (Exception e) {
-        LOG.error("Error in update task at commit " + instantTime, e);
-        writeStatus.setGlobalError(e);
-        throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
-            + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePathV2() + "/" + partitionPath, e);
-      }
-      doInit = false;
+    try {
+      // Save hoodie partition meta in the partition path
+      HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime,
+          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath),
+          hoodieTable.getPartitionMetafileFormat());
+      partitionMetadata.trySave(getPartitionId());
+
+      this.writer = createLogWriter(getFileInstant(record));
+    } catch (Exception e) {
+      LOG.error("Error in update task at commit " + instantTime, e);
+      writeStatus.setGlobalError(e);
+      throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
+          + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePathV2() + "/" + partitionPath, e);
     }
+    doInit = false;
   }
 
   /**
@@ -324,12 +324,7 @@ private MetadataValues populateMetadataFields(HoodieRecord<T> hoodieRecord) {
   private void initNewStatus() {
     HoodieDeltaWriteStat prevStat = (HoodieDeltaWriteStat) this.writeStatus.getStat();
     // Make a new write status and copy basic fields over.
-    HoodieDeltaWriteStat stat = new HoodieDeltaWriteStat();
-    stat.setFileId(fileId);
-    stat.setPartitionPath(partitionPath);
-    stat.setPrevCommit(prevStat.getPrevCommit());
-    stat.setBaseFile(prevStat.getBaseFile());
-    stat.setLogFiles(new ArrayList<>(prevStat.getLogFiles()));
+    HoodieDeltaWriteStat stat = prevStat.copy();
 
     this.writeStatus = (WriteStatus) ReflectionUtils.loadClass(config.getWriteStatusClassName(),
         hoodieTable.shouldTrackSuccessRecords(), config.getWriteStatusFailureFraction());
@@ -567,7 +562,7 @@ public IOType getIOType() {
     return IOType.APPEND;
   }
 
-  public List<WriteStatus> writeStatuses() {
+  public List<WriteStatus> getWriteStatuses() {
     return statuses;
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
index bdb35641f268..0a0f3352069a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
@@ -34,7 +34,6 @@
 import org.apache.hudi.exception.HoodieInsertException;
 import org.apache.hudi.io.storage.HoodieFileWriter;
 import org.apache.hudi.io.storage.HoodieFileWriterFactory;
-import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.table.HoodieTable;
 
 import org.apache.avro.Schema;
@@ -115,8 +114,7 @@ public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTa
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
       String partitionPath, String fileId, Map<String, HoodieRecord<T>> recordMap,
       TaskContextSupplier taskContextSupplier) {
-    // preserveMetadata is disabled by default for MDT but enabled otherwise
-    this(config, instantTime, hoodieTable, partitionPath, fileId, taskContextSupplier, !HoodieTableMetadata.isMetadataTable(config.getBasePath()));
+    this(config, instantTime, hoodieTable, partitionPath, fileId, taskContextSupplier, true);
     this.recordMap = recordMap;
     this.useWriterSchema = true;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
index a9b22d083326..4f5f240c4fd0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -47,7 +47,6 @@
 import org.apache.hudi.io.storage.HoodieFileWriter;
 import org.apache.hudi.io.storage.HoodieFileWriterFactory;
 import org.apache.hudi.keygen.BaseKeyGenerator;
-import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.table.HoodieTable;
 
 import org.apache.avro.Schema;
@@ -145,8 +144,7 @@ public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTab
     super(config, instantTime, partitionPath, fileId, hoodieTable, taskContextSupplier);
     this.keyToNewRecords = keyToNewRecords;
     this.useWriterSchemaForCompaction = true;
-    // preserveMetadata is disabled by default for MDT but enabled otherwise
-    this.preserveMetadata = !HoodieTableMetadata.isMetadataTable(config.getBasePath());
+    this.preserveMetadata = true;
     init(fileId, this.partitionPath, dataFileToBeMerged);
     validateAndSetAndKeyGenProps(keyGeneratorOpt, config.populateMetaFields());
   }
@@ -484,6 +482,15 @@ public void performMergeDataValidationCheck(WriteStatus writeStatus) {
     }
   }
 
+  public Iterator<List<WriteStatus>> getWriteStatusesAsIterator() {
+    List<WriteStatus> statuses = getWriteStatuses();
+    // TODO(vc): This needs to be revisited
+    if (getPartitionPath() == null) {
+      LOG.info("Upsert Handle has partition path as null {}, {}", getOldFilePath(), statuses);
+    }
+    return Collections.singletonList(statuses).iterator();
+  }
+
   public Path getOldFilePath() {
     return oldFilePath;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
index 9d1bb6d511e8..ab80629c941b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
@@ -190,7 +190,7 @@ protected void markClosed() {
 
   public abstract List<WriteStatus> close();
 
-  public List<WriteStatus> writeStatuses() {
+  public List<WriteStatus> getWriteStatuses() {
     return Collections.singletonList(writeStatus);
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
index 7b88a0ab979b..4d7c83a7794d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
@@ -146,21 +146,24 @@ public static String[] extractRecordKeysByFields(String recordKey, List<String>
   public static String getRecordKey(GenericRecord record, List<String> recordKeyFields, boolean consistentLogicalTimestampEnabled) {
     boolean keyIsNullEmpty = true;
     StringBuilder recordKey = new StringBuilder();
-    for (String recordKeyField : recordKeyFields) {
+    for (int i = 0; i < recordKeyFields.size(); i++) {
+      String recordKeyField = recordKeyFields.get(i);
       String recordKeyValue = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true, consistentLogicalTimestampEnabled);
       if (recordKeyValue == null) {
-        recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + NULL_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+        recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(NULL_RECORDKEY_PLACEHOLDER);
       } else if (recordKeyValue.isEmpty()) {
-        recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + EMPTY_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+        recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(EMPTY_RECORDKEY_PLACEHOLDER);
       } else {
-        recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + recordKeyValue + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+        recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(recordKeyValue);
         keyIsNullEmpty = false;
       }
+      if (i != recordKeyFields.size() - 1) {
+        recordKey.append(DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+      }
     }
-    recordKey.deleteCharAt(recordKey.length() - 1);
     if (keyIsNullEmpty) {
       throw new HoodieKeyException("recordKey values: \"" + recordKey + "\" for fields: "
-          + recordKeyFields.toString() + " cannot be entirely null or empty.");
+          + recordKeyFields + " cannot be entirely null or empty.");
     }
     return recordKey.toString();
   }
@@ -172,20 +175,27 @@ public static String getRecordPartitionPath(GenericRecord record, List<String> p
     }
 
     StringBuilder partitionPath = new StringBuilder();
-    for (String partitionPathField : partitionPathFields) {
+    for (int i = 0; i < partitionPathFields.size(); i++) {
+      String partitionPathField = partitionPathFields.get(i);
       String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
       if (fieldVal == null || fieldVal.isEmpty()) {
-        partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + HUDI_DEFAULT_PARTITION_PATH
-            : HUDI_DEFAULT_PARTITION_PATH);
+        if (hiveStylePartitioning) {
+          partitionPath.append(partitionPathField).append("=");
+        }
+        partitionPath.append(HUDI_DEFAULT_PARTITION_PATH);
       } else {
         if (encodePartitionPath) {
           fieldVal = PartitionPathEncodeUtils.escapePathName(fieldVal);
         }
-        partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + fieldVal : fieldVal);
+        if (hiveStylePartitioning) {
+          partitionPath.append(partitionPathField).append("=");
+        }
+        partitionPath.append(fieldVal);
+      }
+      if (i != partitionPathFields.size() - 1) {
+        partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
       }
-      partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
     }
-    partitionPath.deleteCharAt(partitionPath.length() - 1);
     return partitionPath.toString();
   }
 
@@ -257,7 +267,7 @@ public static List<String> getRecordKeyFields(TypedProperties props) {
    * @param props props of interest.
    * @return true if record keys need to be auto generated. false otherwise.
    */
-  public static boolean enableAutoGenerateRecordKeys(TypedProperties props) {
+  public static boolean isAutoGeneratedRecordKeysEnabled(TypedProperties props) {
     return !props.containsKey(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key());
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java
index f375095122da..f68e3232753a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java
@@ -98,7 +98,7 @@ public static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) t
         throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
     }
 
-    if (KeyGenUtils.enableAutoGenerateRecordKeys(props)) {
+    if (KeyGenUtils.isAutoGeneratedRecordKeysEnabled(props)) {
       return new AutoRecordGenWrapperAvroKeyGenerator(props, keyGenerator);
     } else {
       return keyGenerator;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index 8d40fc240952..99739947077c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -101,7 +101,7 @@
 import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_POPULATE_META_FIELDS;
 import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
 import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
-import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.getIndexInflightInstant;
 import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeIndexPlan;
@@ -392,7 +392,6 @@ private boolean initializeFromFilesystem(String initializationTime, List<Metadat
     // Already initialized partitions can be ignored
     partitionsToInit.removeIf(metadataPartition -> dataMetaClient.getTableConfig().isMetadataPartitionAvailable((metadataPartition)));
 
-
     // Get a complete list of files and partitions from the file system or from already initialized FILES partition of MDT
     List<DirectoryInfo> partitionInfoList;
     if (filesPartitionAvailable) {
@@ -462,7 +461,9 @@ private boolean initializeFromFilesystem(String initializationTime, List<Metadat
       HoodieData<HoodieRecord> records = fileGroupCountAndRecordsPair.getValue();
       bulkCommit(commitTimeForPartition, partitionType, records, fileGroupCount);
       metadataMetaClient.reloadActiveTimeline();
-      dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionType, true);
+      String partitionPath = (partitionType == FUNCTIONAL_INDEX) ? dataWriteConfig.getFunctionalIndexConfig().getIndexName() : partitionType.getPartitionPath();
+
+      dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionPath, true);
       // initialize the metadata reader again so the MDT partition can be read after initialization
       initMetadataReader();
       long totalInitTime = partitionInitTimer.endTimer();
@@ -795,7 +796,7 @@ public void dropMetadataPartitions(List<MetadataPartitionType> metadataPartition
     for (MetadataPartitionType partitionType : metadataPartitions) {
       String partitionPath = partitionType.getPartitionPath();
       // first update table config
-      dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionType, false);
+      dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionPath, false);
       LOG.warn("Deleting Metadata Table partition: " + partitionPath);
       dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath(), partitionPath), true);
       // delete corresponding pending indexing instant file in the timeline
@@ -829,7 +830,7 @@ private static void deletePendingIndexingInstant(HoodieTableMetaClient metaClien
   protected static void checkNumDeltaCommits(HoodieTableMetaClient metaClient, int maxNumDeltaCommitsWhenPending) {
     final HoodieActiveTimeline activeTimeline = metaClient.reloadActiveTimeline();
     Option<HoodieInstant> lastCompaction = activeTimeline.filterCompletedInstants()
-        .filter(s -> s.getAction().equals(COMPACTION_ACTION)).lastInstant();
+        .filter(s -> s.getAction().equals(COMMIT_ACTION)).lastInstant();
     int numDeltaCommits = lastCompaction.isPresent()
         ? activeTimeline.getDeltaCommitTimeline().findInstantsAfter(lastCompaction.get().getTimestamp()).countInstants()
         : activeTimeline.getDeltaCommitTimeline().countInstants();
@@ -900,6 +901,7 @@ public void buildMetadataPartitions(HoodieEngineContext engineContext, List<Hood
       return;
     }
     String indexUptoInstantTime = indexPartitionInfos.get(0).getIndexUptoInstant();
+    List<String> partitionPaths = new ArrayList<>();
     List<MetadataPartitionType> partitionTypes = new ArrayList<>();
     indexPartitionInfos.forEach(indexPartitionInfo -> {
       String relativePartitionPath = indexPartitionInfo.getMetadataPartitionPath();
@@ -913,10 +915,11 @@ public void buildMetadataPartitions(HoodieEngineContext engineContext, List<Hood
         throw new HoodieIndexException(String.format("Indexing for metadata partition: %s is not enabled", partitionType));
       }
       partitionTypes.add(partitionType);
+      partitionPaths.add(relativePartitionPath);
     });
 
     // before initialization set these  partitions as inflight in table config
-    dataMetaClient.getTableConfig().setMetadataPartitionsInflight(dataMetaClient, partitionTypes);
+    dataMetaClient.getTableConfig().setMetadataPartitionsInflight(dataMetaClient, partitionPaths);
 
     // initialize partitions
     initializeFromFilesystem(HoodieTableMetadataUtil.createAsyncIndexerTimestamp(indexUptoInstantTime), partitionTypes, Option.empty());
@@ -1101,21 +1104,13 @@ public void update(HoodieRollbackMetadata rollbackMetadata, String instantTime)
       HoodieInstant compactionInstant = deltaCommitsInfo.get().getValue();
       HoodieTimeline deltacommitsSinceCompaction = deltaCommitsInfo.get().getKey();
 
-      // The deltacommit that will be rolled back
-      HoodieInstant deltaCommitInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, commitToRollbackInstantTime);
-
       validateRollback(commitToRollbackInstantTime, compactionInstant, deltacommitsSinceCompaction);
 
-      // lets apply a delta commit with DT's rb instant(with special suffix) containing following records:
-      // a. any log files as part of RB commit metadata that was added
-      // b. log files added by the commit in DT being rolled back. By rolled back, we mean, a rollback block will be added and does not mean it will be deleted.
-      // both above list should only be added to FILES partition.
-
-      String rollbackInstantTime = createRollbackTimestamp(instantTime);
-      processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, dataMetaClient, rollbackMetadata, instantTime));
-
+      // The deltacommit that will be rolled back
+      HoodieInstant deltaCommitInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, commitToRollbackInstantTime);
       if (deltacommitsSinceCompaction.containsInstant(deltaCommitInstant)) {
         LOG.info("Rolling back MDT deltacommit " + commitToRollbackInstantTime);
+        String rollbackInstantTime = createRollbackTimestamp(instantTime);
         if (!getWriteClient().rollback(commitToRollbackInstantTime, rollbackInstantTime)) {
           throw new HoodieMetadataException("Failed to rollback deltacommit at " + commitToRollbackInstantTime);
         }
@@ -1131,8 +1126,9 @@ protected void validateRollback(
       String commitToRollbackInstantTime,
       HoodieInstant compactionInstant,
       HoodieTimeline deltacommitsSinceCompaction) {
-    // The commit being rolled back should not be earlier than the latest compaction on the MDT. Compaction on MDT only occurs when all actions
-    // are completed on the dataset. Hence, this case implies a rollback of completed commit which should actually be handled using restore.
+    // The commit being rolled back should not be earlier than the latest compaction on the MDT because the latest file slice does not change after all.
+    // Compaction on MDT only occurs when all actions are completed on the dataset.
+    // Hence, this case implies a rollback of completed commit which should actually be handled using restore.
     if (compactionInstant.getAction().equals(HoodieTimeline.COMMIT_ACTION)) {
       final String compactionInstantTime = compactionInstant.getTimestamp();
       if (commitToRollbackInstantTime.length() == compactionInstantTime.length() && HoodieTimeline.LESSER_THAN_OR_EQUALS.test(commitToRollbackInstantTime, compactionInstantTime)) {
@@ -1316,9 +1312,8 @@ public void performTableServices(Option<String> inFlightInstantTimestamp) {
           .getTimestamp();
       LOG.info("Latest deltacommit time found is " + latestDeltacommitTime + ", running clean operations.");
       cleanIfNecessary(writeClient, latestDeltacommitTime);
-
       // Do timeline validation before scheduling compaction/logCompaction operations.
-      if (validateTimelineBeforeSchedulingCompaction(inFlightInstantTimestamp, latestDeltacommitTime)) {
+      if (validateCompactionScheduling()) {
         compactIfNecessary(writeClient, latestDeltacommitTime);
       }
       writeClient.archive();
@@ -1355,10 +1350,12 @@ private void runPendingTableServicesOperations(BaseHoodieWriteClient writeClient
    * deltacommit.
    */
   protected void compactIfNecessary(BaseHoodieWriteClient writeClient, String latestDeltacommitTime) {
-    // Trigger compaction with suffixes based on the same instant time. This ensures that any future
-    // delta commits synced over will not have an instant time lesser than the last completed instant on the
-    // metadata table.
-    final String compactionInstantTime = writeClient.createNewInstantTime(false);
+    // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT.
+    // The compaction planner will manage to filter out the log files that finished with greater completion time.
+    // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details.
+    final String compactionInstantTime = dataMetaClient.reloadActiveTimeline().filterInflightsAndRequested()
+        .findInstantsBeforeOrEquals(latestDeltacommitTime).firstInstant().map(HoodieInstant::getTimestamp)
+        .orElse(writeClient.createNewInstantTime(false));
 
     // we need to avoid checking compaction w/ same instant again.
     // let's say we trigger compaction after C5 in MDT and so compaction completes with C4001. but C5 crashed before completing in MDT.
@@ -1407,35 +1404,19 @@ protected void cleanIfNecessary(BaseHoodieWriteClient writeClient, String instan
   /**
    * Validates the timeline for both main and metadata tables to ensure compaction on MDT can be scheduled.
    */
-  protected boolean validateTimelineBeforeSchedulingCompaction(Option<String> inFlightInstantTimestamp, String latestDeltaCommitTimeInMetadataTable) {
-    // we need to find if there are any inflights in data table timeline before or equal to the latest delta commit in metadata table.
-    // Whenever you want to change this logic, please ensure all below scenarios are considered.
-    // a. There could be a chance that latest delta commit in MDT is committed in MDT, but failed in DT. And so findInstantsBeforeOrEquals() should be employed
-    // b. There could be DT inflights after latest delta commit in MDT and we are ok with it. bcoz, the contract is, the latest compaction instant time in MDT represents
-    // any instants before that is already synced with metadata table.
-    // c. Do consider out of order commits. For eg, c4 from DT could complete before c3. and we can't trigger compaction in MDT with c4 as base instant time, until every
-    // instant before c4 is synced with metadata table.
-    List<HoodieInstant> pendingInstants = dataMetaClient.reloadActiveTimeline().filterInflightsAndRequested()
-        .findInstantsBeforeOrEquals(latestDeltaCommitTimeInMetadataTable).getInstants();
-
-    if (!pendingInstants.isEmpty()) {
-      checkNumDeltaCommits(metadataMetaClient, dataWriteConfig.getMetadataConfig().getMaxNumDeltacommitsWhenPending());
-      LOG.info(String.format(
-          "Cannot compact metadata table as there are %d inflight instants in data table before latest deltacommit in metadata table: %s. Inflight instants in data table: %s",
-          pendingInstants.size(), latestDeltaCommitTimeInMetadataTable, Arrays.toString(pendingInstants.toArray())));
-      return false;
-    }
-
-    // Check if there are any pending compaction or log compaction instants in the timeline.
-    // If pending compact/logCompaction operations are found abort scheduling new compaction/logCompaction operations.
-    Option<HoodieInstant> pendingLogCompactionInstant =
-        metadataMetaClient.getActiveTimeline().filterPendingLogCompactionTimeline().firstInstant();
-    Option<HoodieInstant> pendingCompactionInstant =
-        metadataMetaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant();
-    if (pendingLogCompactionInstant.isPresent() || pendingCompactionInstant.isPresent()) {
-      LOG.warn(String.format("Not scheduling compaction or logCompaction, since a pending compaction instant %s or logCompaction %s instant is present",
-          pendingCompactionInstant, pendingLogCompactionInstant));
-      return false;
+  protected boolean validateCompactionScheduling() {
+    // Under the log compaction scope, the sequence of the log-compaction and compaction needs to be ensured because metadata items such as RLI
+    // only has proc-time ordering semantics. For "ensured", it means the completion sequence of the log-compaction/compaction is the same as the start sequence.
+    if (metadataWriteConfig.isLogCompactionEnabled()) {
+      Option<HoodieInstant> pendingLogCompactionInstant =
+          metadataMetaClient.getActiveTimeline().filterPendingLogCompactionTimeline().firstInstant();
+      Option<HoodieInstant> pendingCompactionInstant =
+          metadataMetaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant();
+      if (pendingLogCompactionInstant.isPresent() || pendingCompactionInstant.isPresent()) {
+        LOG.warn(String.format("Not scheduling compaction or logCompaction, since a pending compaction instant %s or logCompaction %s instant is present",
+            pendingCompactionInstant, pendingLogCompactionInstant));
+        return false;
+      }
     }
     return true;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
index 7c42ccf50161..48cfb46b49f2 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
@@ -37,6 +37,7 @@
 import org.apache.hudi.config.metrics.HoodieMetricsConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
+import org.apache.hudi.config.metrics.HoodieMetricsM3Config;
 import org.apache.hudi.config.metrics.HoodieMetricsPrometheusConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsDatadogConfig;
 import org.apache.hudi.exception.HoodieMetadataException;
@@ -81,11 +82,31 @@ public static HoodieWriteConfig createMetadataWriteConfig(
     String tableName = writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX;
 
     final long maxLogFileSizeBytes = writeConfig.getMetadataConfig().getMaxLogFileSize();
+    // Borrow the cleaner policy from the main table and adjust the cleaner policy based on the main table's cleaner policy
+    HoodieCleaningPolicy dataTableCleaningPolicy = writeConfig.getCleanerPolicy();
+    HoodieCleanConfig.Builder cleanConfigBuilder = HoodieCleanConfig.newBuilder()
+        .withAsyncClean(DEFAULT_METADATA_ASYNC_CLEAN)
+        .withAutoClean(false)
+        .withCleanerParallelism(MDT_DEFAULT_PARALLELISM)
+        .withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
+        .withCleanerPolicy(dataTableCleaningPolicy);
+
+    if (HoodieCleaningPolicy.KEEP_LATEST_COMMITS.equals(dataTableCleaningPolicy)) {
+      int retainCommits = (int) Math.max(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED, writeConfig.getCleanerCommitsRetained() * 1.2);
+      cleanConfigBuilder.retainCommits(retainCommits);
+    } else if (HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.equals(dataTableCleaningPolicy)) {
+      int retainFileVersions = (int) Math.ceil(writeConfig.getCleanerFileVersionsRetained() * 1.2);
+      cleanConfigBuilder.retainFileVersions(retainFileVersions);
+    } else if (HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS.equals(dataTableCleaningPolicy)) {
+      int numHoursRetained = (int) Math.ceil(writeConfig.getCleanerHoursRetained() * 1.2);
+      cleanConfigBuilder.cleanerNumHoursRetained(numHoursRetained);
+    }
 
     // Create the write config for the metadata table by borrowing options from the main write config.
     HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder()
         .withEngineType(writeConfig.getEngineType())
         .withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION)
+        .withMergeAllowDuplicateOnInserts(false)
         .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder()
             .withConsistencyCheckEnabled(writeConfig.getConsistencyGuardConfig().isConsistencyCheckEnabled())
             .withInitialConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getInitialConsistencyCheckIntervalMs())
@@ -103,14 +124,7 @@ public static HoodieWriteConfig createMetadataWriteConfig(
         .withSchema(HoodieMetadataRecord.getClassSchema().toString())
         .forTable(tableName)
         // we will trigger cleaning manually, to control the instant times
-        .withCleanConfig(HoodieCleanConfig.newBuilder()
-            .withAsyncClean(DEFAULT_METADATA_ASYNC_CLEAN)
-            .withAutoClean(false)
-            .withCleanerParallelism(MDT_DEFAULT_PARALLELISM)
-            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-            .withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
-            .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
-            .build())
+        .withCleanConfig(cleanConfigBuilder.build())
         // we will trigger archive manually, to ensure only regular writer invokes it
         .withArchivalConfig(HoodieArchivalConfig.newBuilder()
             .archiveCommitsWith(
@@ -182,6 +196,15 @@ public static HoodieWriteConfig createMetadataWriteConfig(
               .withPushgatewayPortNum(writeConfig.getPushGatewayPort()).build();
           builder.withProperties(prometheusConfig.getProps());
           break;
+        case M3:
+          HoodieMetricsM3Config m3Config = HoodieMetricsM3Config.newBuilder()
+              .onM3Port(writeConfig.getM3ServerPort())
+              .toM3Host(writeConfig.getM3ServerHost())
+              .useM3Tags(writeConfig.getM3Tags())
+              .useM3Service(writeConfig.getM3Service())
+              .useM3Env(writeConfig.getM3Env()).build();
+          builder.withProperties(m3Config.getProps());
+          break;
         case DATADOG:
           HoodieMetricsDatadogConfig.Builder datadogConfig = HoodieMetricsDatadogConfig.newBuilder()
                   .withDatadogApiKey(writeConfig.getDatadogApiKey())
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
index 27034735a040..0d20337fa5c5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.metrics.cloudwatch.CloudWatchMetricsReporter;
 import org.apache.hudi.metrics.custom.CustomizableMetricsReporter;
 import org.apache.hudi.metrics.datadog.DatadogMetricsReporter;
+import org.apache.hudi.metrics.m3.M3MetricsReporter;
 import org.apache.hudi.metrics.prometheus.PrometheusReporter;
 import org.apache.hudi.metrics.prometheus.PushGatewayMetricsReporter;
 
@@ -89,6 +90,9 @@ public static Option<MetricsReporter> createReporter(HoodieWriteConfig config, M
       case CLOUDWATCH:
         reporter = new CloudWatchMetricsReporter(config, registry);
         break;
+      case M3:
+        reporter = new M3MetricsReporter(config, registry);
+        break;
       default:
         LOG.error("Reporter type[" + type + "] is not supported.");
         break;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java
index 3c8600159287..6d05e443e6b9 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java
@@ -22,5 +22,5 @@
  * Types of the reporter supported, hudi also supports user defined reporter.
  */
 public enum MetricsReporterType {
-  GRAPHITE, INMEMORY, JMX, DATADOG, CONSOLE, PROMETHEUS_PUSHGATEWAY, PROMETHEUS, CLOUDWATCH
+  GRAPHITE, INMEMORY, JMX, DATADOG, CONSOLE, PROMETHEUS_PUSHGATEWAY, PROMETHEUS, CLOUDWATCH, M3
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3MetricsReporter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3MetricsReporter.java
new file mode 100644
index 000000000000..a658476ef754
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3MetricsReporter.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.m3;
+
+import com.codahale.metrics.MetricRegistry;
+import com.uber.m3.tally.m3.M3Reporter;
+import com.uber.m3.util.Duration;
+import com.uber.m3.util.ImmutableMap;
+import com.uber.m3.tally.RootScopeBuilder;
+import com.uber.m3.tally.Scope;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metrics.MetricsReporter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Implementation of M3 Metrics reporter, which can report metrics to a https://m3db.io/ service
+ */
+public class M3MetricsReporter extends MetricsReporter {
+
+  private static final Logger LOG = LoggerFactory.getLogger(M3MetricsReporter.class);
+  private final HoodieWriteConfig config;
+  private final MetricRegistry registry;
+  private final ImmutableMap<String, String> tags;
+
+  public M3MetricsReporter(HoodieWriteConfig config, MetricRegistry registry) {
+    this.config = config;
+    this.registry = registry;
+
+    ImmutableMap.Builder tagBuilder = new ImmutableMap.Builder<>();
+    tagBuilder.putAll(parseOptionalTags(config.getM3Tags()));
+    tagBuilder.put("service", config.getM3Service());
+    tagBuilder.put("env", config.getM3Env());
+    this.tags = tagBuilder.build();
+    LOG.info(String.format("Building M3 Reporter with M3 tags mapping: %s", tags));
+  }
+
+  private static Map parseOptionalTags(String tagValueString) {
+    Map parsedTags = new HashMap();
+    if (!tagValueString.isEmpty()) {
+      Arrays.stream(tagValueString.split(",")).forEach((tagValuePair) -> {
+        String[] parsedTagValuePair = Arrays.stream(tagValuePair.split("="))
+            .map((tagOrValue) -> tagOrValue.trim()).filter((tagOrValue) -> !tagOrValue.isEmpty())
+            .toArray(String[]::new);
+        if (parsedTagValuePair.length != 2) {
+          throw new RuntimeException(String.format(
+              "M3 Reporter tags cannot be initialized with tags [%s] due to not being in format `tag=value, . . .`.",
+              tagValuePair));
+        }
+        parsedTags.put(parsedTagValuePair[0], parsedTagValuePair[1]);
+      });
+    }
+    return parsedTags;
+  }
+
+  @Override
+  public void start() {}
+
+  @Override
+  public void report() {
+    /*
+      Although com.uber.m3.tally.Scope supports automatically submitting metrics in an interval
+      via a background task, it does not seem to support
+      - an API for explicitly flushing/emitting all metrics
+      - Taking in an external com.codahale.metrics.MetricRegistry metrics registry and automatically
+      adding any new counters/gauges whenever they are added to the registry
+      Due to this, this implementation emits metrics by creating a Scope, adding all metrics from
+      the HUDI metircs registry as counters/gauges to the scope, and then closing the Scope. Since
+      closing this Scope will implicitly flush all M3 metrics, the reporting intervals
+      are configured to be Integer.MAX_VALUE.
+     */
+    synchronized (this) {
+      try (Scope scope = new RootScopeBuilder()
+          .reporter(new M3Reporter.Builder(
+              new InetSocketAddress(config.getM3ServerHost(), config.getM3ServerPort()))
+              .includeHost(true).commonTags(tags)
+              .build())
+          .reportEvery(Duration.ofSeconds(Integer.MAX_VALUE))
+          .tagged(tags)) {
+
+        M3ScopeReporterAdaptor scopeReporter = new M3ScopeReporterAdaptor(registry, scope);
+        scopeReporter.start(Integer.MAX_VALUE, TimeUnit.SECONDS);
+        scopeReporter.report();
+        scopeReporter.stop();
+      } catch (Exception e) {
+        LOG.error(String.format("Error reporting metrics to M3: %s", e));
+      }
+    }
+  }
+
+  @Override
+  public void stop() {}
+}
+
+
+
+
+
+
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3ScopeReporterAdaptor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3ScopeReporterAdaptor.java
new file mode 100644
index 000000000000..ae66914400b9
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/m3/M3ScopeReporterAdaptor.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.m3;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.Metered;
+import com.codahale.metrics.MetricFilter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.ScheduledReporter;
+import com.codahale.metrics.Snapshot;
+import com.codahale.metrics.Timer;
+import com.uber.m3.tally.Scope;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+import org.apache.hudi.common.util.collection.Pair;
+
+/**
+ * Implementation of com.codahale.metrics.ScheduledReporter, to emit metrics from
+ * com.codahale.metrics.MetricRegistry to M3
+ */
+public class M3ScopeReporterAdaptor extends ScheduledReporter {
+  private final Scope scope;
+
+  protected M3ScopeReporterAdaptor(MetricRegistry registry, Scope scope) {
+    super(registry, "hudi-m3-reporter", MetricFilter.ALL, TimeUnit.SECONDS, TimeUnit.SECONDS);
+    this.scope = scope;
+  }
+
+  @Override
+  public void start(long period, TimeUnit unit) {
+  }
+
+  @Override
+  public void stop() {
+  }
+
+  @Override
+  public void report(SortedMap<String, Gauge> gauges, SortedMap<String, Counter> counters,
+      SortedMap<String, Histogram> histograms, SortedMap<String, Meter> meters,
+      SortedMap<String, Timer> timers) {
+    /*
+      When reporting, process each com.codahale.metrics metric and add counters & gauges to
+      the passed-in com.uber.m3.tally.Scope with the same name and value. This is needed
+      for the Scope to register these metrics
+    */
+    report(scope,
+        gauges,
+        counters,
+        histograms,
+        meters,
+        timers);
+  }
+
+  private void report(Scope scope,
+      Map<String, Gauge> gauges,
+      Map<String, Counter> counters,
+      Map<String, Histogram> histograms,
+      Map<String, Meter> meters,
+      Map<String, Timer> timers) {
+
+    for (Entry<String, Gauge> entry : gauges.entrySet()) {
+      scope.gauge(entry.getKey()).update(
+          ((Number) entry.getValue().getValue()).doubleValue());
+    }
+
+    for (Entry<String, Counter> entry : counters.entrySet()) {
+      scope.counter(entry.getKey()).inc(
+          ((Number) entry.getValue().getCount()).longValue());
+    }
+
+    for (Entry<String, Histogram> entry : histograms.entrySet()) {
+      scope.gauge(MetricRegistry.name(entry.getKey(), "count")).update(
+          entry.getValue().getCount());
+      reportSnapshot(entry.getKey(), entry.getValue().getSnapshot());
+    }
+
+    for (Entry<String, Meter> entry : meters.entrySet()) {
+      reportMetered(entry.getKey(), entry.getValue());
+    }
+
+    for (Entry<String, Timer> entry : timers.entrySet()) {
+      reportTimer(entry.getKey(), entry.getValue());
+    }
+  }
+
+  private void reportMetered(String name, Metered meter) {
+    scope.counter(MetricRegistry.name(name, "count")).inc(meter.getCount());
+    List<Pair<String, Double>> meterGauges = Arrays.asList(
+        Pair.of("m1_rate", meter.getOneMinuteRate()),
+        Pair.of("m5_rate", meter.getFiveMinuteRate()),
+        Pair.of("m15_rate", meter.getFifteenMinuteRate()),
+        Pair.of("mean_rate", meter.getMeanRate())
+    );
+    for (Pair<String, Double> pair : meterGauges) {
+      scope.gauge(MetricRegistry.name(name, pair.getLeft())).update(pair.getRight());
+    }
+  }
+
+  private void reportSnapshot(String name, Snapshot snapshot) {
+    List<Pair<String, Number>> snapshotGauges = Arrays.asList(
+        Pair.of("max", snapshot.getMax()),
+        Pair.of("mean", snapshot.getMean()),
+        Pair.of("min", snapshot.getMin()),
+        Pair.of("stddev", snapshot.getStdDev()),
+        Pair.of("p50", snapshot.getMedian()),
+        Pair.of("p75", snapshot.get75thPercentile()),
+        Pair.of("p95", snapshot.get95thPercentile()),
+        Pair.of("p98", snapshot.get98thPercentile()),
+        Pair.of("p99", snapshot.get99thPercentile()),
+        Pair.of("p999", snapshot.get999thPercentile())
+    );
+    for (Pair<String, Number> pair : snapshotGauges) {
+      scope.gauge(MetricRegistry.name(name, pair.getLeft())).update(pair.getRight().doubleValue());
+    }
+  }
+
+  private void reportTimer(String name, Timer timer) {
+    reportMetered(name, timer);
+    reportSnapshot(name, timer.getSnapshot());
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
index 080fe5f357d6..aadb0d486857 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -67,14 +67,17 @@
 import org.apache.hudi.exception.HoodieInsertException;
 import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.exception.SchemaCompatibilityException;
 import org.apache.hudi.hadoop.fs.ConsistencyGuard;
 import org.apache.hudi.hadoop.fs.ConsistencyGuard.FileVisibility;
 import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
+import org.apache.hudi.table.action.commit.HoodieMergeHelper;
 import org.apache.hudi.table.marker.WriteMarkers;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
 import org.apache.hudi.table.storage.HoodieLayoutFactory;
@@ -121,13 +124,12 @@
  * @param <O> Type of outputs
  */
 public abstract class HoodieTable<T, I, K, O> implements Serializable {
-
   private static final Logger LOG = LoggerFactory.getLogger(HoodieTable.class);
 
   protected final HoodieWriteConfig config;
   protected final HoodieTableMetaClient metaClient;
   protected final HoodieIndex<?, ?> index;
-  private SerializableConfiguration hadoopConfiguration;
+  private final SerializableConfiguration hadoopConfiguration;
   protected final TaskContextSupplier taskContextSupplier;
   private final HoodieTableMetadata metadata;
   private final HoodieStorageLayout storageLayout;
@@ -146,7 +148,7 @@ protected HoodieTable(HoodieWriteConfig config, HoodieEngineContext context, Hoo
         .build();
     this.metadata = HoodieTableMetadata.create(context, metadataConfig, config.getBasePath());
 
-    this.viewManager = FileSystemViewManager.createViewManager(context, config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), unused -> metadata);
+    this.viewManager = getViewManager();
     this.metaClient = metaClient;
     this.index = getIndex(config, context);
     this.storageLayout = getStorageLayout(config);
@@ -165,7 +167,7 @@ protected HoodieStorageLayout getStorageLayout(HoodieWriteConfig config) {
 
   private synchronized FileSystemViewManager getViewManager() {
     if (null == viewManager) {
-      viewManager = FileSystemViewManager.createViewManager(getContext(), config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), unused -> metadata);
+      viewManager = FileSystemViewManager.createViewManager(getContext(), config.getViewStorageConfig(), config.getCommonConfig(), unused -> metadata);
     }
     return viewManager;
   }
@@ -177,8 +179,7 @@ private synchronized FileSystemViewManager getViewManager() {
    * @param records  hoodieRecords to upsert
    * @return HoodieWriteMetadata
    */
-  public abstract HoodieWriteMetadata<O> upsert(HoodieEngineContext context, String instantTime,
-      I records);
+  public abstract HoodieWriteMetadata<O> upsert(HoodieEngineContext context, String instantTime, I records);
 
   /**
    * Insert a batch of new records into Hoodie table at the supplied instantTime.
@@ -187,8 +188,7 @@ public abstract HoodieWriteMetadata<O> upsert(HoodieEngineContext context, Strin
    * @param records  hoodieRecords to upsert
    * @return HoodieWriteMetadata
    */
-  public abstract HoodieWriteMetadata<O> insert(HoodieEngineContext context, String instantTime,
-      I records);
+  public abstract HoodieWriteMetadata<O> insert(HoodieEngineContext context, String instantTime, I records);
 
   /**
    * Bulk Insert a batch of new records into Hoodie table at the supplied instantTime.
@@ -267,7 +267,7 @@ public abstract HoodieWriteMetadata<O> insertPrepped(HoodieEngineContext context
    * @return HoodieWriteMetadata
    */
   public abstract HoodieWriteMetadata<O> bulkInsertPrepped(HoodieEngineContext context, String instantTime,
-      I preppedRecords,  Option<BulkInsertPartitioner> bulkInsertPartitioner);
+      I preppedRecords, Option<BulkInsertPartitioner> bulkInsertPartitioner);
 
   /**
    * Replaces all the existing records and inserts the specified new records into Hoodie table at the supplied instantTime,
@@ -291,6 +291,14 @@ public abstract HoodieWriteMetadata<O> bulkInsertPrepped(HoodieEngineContext con
    */
   public abstract HoodieWriteMetadata<O> insertOverwriteTable(HoodieEngineContext context, String instantTime, I records);
 
+  /**
+   * Delete expired partition by config
+   * @param context HoodieEngineContext
+   * @param instantTime Instant Time for the action
+   * @return HoodieWriteMetadata
+   */
+  public abstract HoodieWriteMetadata<O> managePartitionTTL(HoodieEngineContext context, String instantTime);
+
   public HoodieWriteConfig getConfig() {
     return config;
   }
@@ -564,7 +572,9 @@ public abstract HoodieRollbackMetadata rollback(HoodieEngineContext context,
    * @param partitionsToIndex List of {@link MetadataPartitionType} that should be indexed.
    * @return HoodieIndexPlan containing metadata partitions and instant upto which they should be indexed.
    */
-  public abstract Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex);
+  public abstract Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime,
+                                                           List<MetadataPartitionType> partitionsToIndex,
+                                                           List<String> partitionPaths);
 
   /**
    * Execute requested index action.
@@ -859,8 +869,10 @@ private void validateSchema() throws HoodieUpsertException, HoodieInsertExceptio
       Schema writerSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema());
       Schema tableSchema = HoodieAvroUtils.createHoodieWriteSchema(existingTableSchema.get());
       AvroSchemaUtils.checkSchemaCompatible(tableSchema, writerSchema, shouldValidate, allowProjection, getDropPartitionColNames());
+    } catch (SchemaCompatibilityException e) {
+      throw e;
     } catch (Exception e) {
-      throw new HoodieException("Failed to read schema/check compatibility for base path " + metaClient.getBasePath(), e);
+      throw new SchemaCompatibilityException("Failed to read schema/check compatibility for base path " + metaClient.getBasePath(), e);
     }
   }
 
@@ -984,8 +996,8 @@ public void deleteMetadataIndexIfNecessary() {
       if (shouldDeleteMetadataPartition(partitionType)) {
         try {
           LOG.info("Deleting metadata partition because it is disabled in writer: " + partitionType.name());
-          if (metadataPartitionExists(metaClient.getBasePath(), context, partitionType)) {
-            deleteMetadataPartition(metaClient.getBasePath(), context, partitionType);
+          if (metadataPartitionExists(metaClient.getBasePath(), context, partitionType.getPartitionPath())) {
+            deleteMetadataPartition(metaClient.getBasePath(), context, partitionType.getPartitionPath());
           }
           clearMetadataTablePartitionsConfig(Option.of(partitionType), false);
         } catch (HoodieMetadataException e) {
@@ -1083,4 +1095,12 @@ private Set<String> getDropPartitionColNames() {
     }
     return new HashSet<>(Arrays.asList(partitionFields.get()));
   }
+
+  public void runMerge(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String instantTime, String fileId) throws IOException {
+    if (upsertHandle.getOldFilePath() == null) {
+      throw new HoodieUpsertException("Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
+    } else {
+      HoodieMergeHelper.newInstance().runMerge(this, upsertHandle);
+    }
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index 61c0eeeffb0f..c7e294410e3d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -234,7 +234,7 @@ private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstan
       throw new HoodieIOException("Failed to clean up after commit", e);
     } finally {
       if (!skipLocking) {
-        this.txnManager.endTransaction(Option.of(inflightInstant));
+        this.txnManager.endTransaction(Option.ofNullable(inflightInstant));
       }
     }
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
index 723a95bb2181..77c96b47f057 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
@@ -48,6 +48,7 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.client.utils.MetadataTableUtils.shouldUseBatchLookup;
 import static org.apache.hudi.common.util.MapUtils.nonEmpty;
 import static org.apache.hudi.table.action.clean.CleanPlanner.SAVEPOINTED_TIMESTAMPS;
 
@@ -122,10 +123,15 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) {
 
       Map<String, List<HoodieCleanFileInfo>> cleanOps = new HashMap<>();
       List<String> partitionsToDelete = new ArrayList<>();
+      boolean shouldUseBatchLookup = shouldUseBatchLookup(table.getMetaClient().getTableConfig(), config);
       for (int i = 0; i < partitionsToClean.size(); i += cleanerParallelism) {
         // Handles at most 'cleanerParallelism' number of partitions once at a time to avoid overlarge memory pressure to the timeline server
         // (remote or local embedded), thus to reduce the risk of an OOM exception.
         List<String> subPartitionsToClean = partitionsToClean.subList(i, Math.min(i + cleanerParallelism, partitionsToClean.size()));
+        if (shouldUseBatchLookup) {
+          LOG.info("Load partitions and files into file system view in advance. Paths: {}", subPartitionsToClean);
+          table.getHoodieView().loadPartitions(subPartitionsToClean);
+        }
         Map<String, Pair<Boolean, List<CleanFileInfo>>> cleanOpsWithPartitionMeta = context
             .map(subPartitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean, earliestInstant)), cleanerParallelism)
             .stream()
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 19cbe0f91a73..753f8c8253d5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -64,8 +64,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.apache.hudi.client.utils.MetadataTableUtils.shouldUseBatchLookup;
-
 /**
  * Cleaner is responsible for garbage collecting older files in a given partition path. Such that
  * <p>
@@ -108,14 +106,9 @@ public CleanPlanner(HoodieEngineContext context, HoodieTable<T, I, K, O> hoodieT
         .map(entry -> Pair.of(new HoodieFileGroupId(entry.getValue().getPartitionPath(), entry.getValue().getFileId()), entry.getValue()))
         .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
 
-    // load all partitions in advance if necessary.
-    if (shouldUseBatchLookup(hoodieTable.getMetaClient().getTableConfig(), config)) {
-      LOG.info("Load all partitions and files into file system view in advance.");
-      fileSystemView.loadAllPartitions();
-    }
-    // collect savepointed timestamps to be assist with incremental cleaning. For non-partitioned and metadata table, we may not need this.
-    this.savepointedTimestamps = hoodieTable.isMetadataTable() ? Collections.EMPTY_LIST : (hoodieTable.isPartitioned() ? hoodieTable.getSavepointTimestamps().stream().collect(Collectors.toList())
-        : Collections.EMPTY_LIST);
+    // collect savepointed timestamps to assist with incremental cleaning. For non-partitioned and metadata table, we may not need this.
+    this.savepointedTimestamps = hoodieTable.isMetadataTable() ? Collections.emptyList() : (hoodieTable.isPartitioned() ? new ArrayList<>(hoodieTable.getSavepointTimestamps())
+        : Collections.emptyList());
   }
 
   /**
@@ -234,8 +227,8 @@ private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata
   }
 
   private List<String> getPartitionsFromDeletedSavepoint(HoodieCleanMetadata cleanMetadata) {
-    List<String> savepointedTimestampsFromLastClean = Arrays.stream(cleanMetadata.getExtraMetadata()
-            .getOrDefault(SAVEPOINTED_TIMESTAMPS, StringUtils.EMPTY_STRING).split(","))
+    List<String> savepointedTimestampsFromLastClean = cleanMetadata.getExtraMetadata() == null ? Collections.emptyList()
+        : Arrays.stream(cleanMetadata.getExtraMetadata().getOrDefault(SAVEPOINTED_TIMESTAMPS, StringUtils.EMPTY_STRING).split(","))
         .filter(partition -> !StringUtils.isNullOrEmpty(partition)).collect(Collectors.toList());
     if (savepointedTimestampsFromLastClean.isEmpty()) {
       return Collections.emptyList();
@@ -252,6 +245,7 @@ private List<String> getPartitionsFromDeletedSavepoint(HoodieCleanMetadata clean
       Option<HoodieInstant> instantOption = hoodieTable.getCompletedCommitsTimeline().filter(instant -> instant.getTimestamp().equals(savepointCommit)).firstInstant();
       if (!instantOption.isPresent()) {
         LOG.warn("Skipping to process a commit for which savepoint was removed as the instant moved to archived timeline already");
+        return Stream.empty();
       }
       HoodieInstant instant = instantOption.get();
       return getPartitionsForInstants(instant);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
index 18c98d377f6c..6cb8f023ba83 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
@@ -57,7 +57,8 @@ public ClusteringPlanActionExecutor(HoodieEngineContext context,
 
   protected Option<HoodieClusteringPlan> createClusteringPlan() {
     LOG.info("Checking if clustering needs to be run on " + config.getBasePath());
-    Option<HoodieInstant> lastClusteringInstant = table.getActiveTimeline().getLastClusterCommit();
+    Option<HoodieInstant> lastClusteringInstant =
+        table.getActiveTimeline().getLastClusteringInstant();
 
     int commitsSinceLastClustering = table.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()
         .findInstantsAfter(lastClusteringInstant.map(HoodieInstant::getTimestamp).orElse("0"), Integer.MAX_VALUE)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
index 0d07bed531a4..a6894388f6d2 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
@@ -54,7 +54,7 @@ public abstract class ClusteringPlanStrategy<T,I,K,O> implements Serializable {
 
   public static final int CLUSTERING_PLAN_VERSION_1 = 1;
 
-  private final HoodieTable<T,I,K,O> hoodieTable;
+  protected final HoodieTable<T,I,K,O> hoodieTable;
   private final transient HoodieEngineContext engineContext;
   private final HoodieWriteConfig writeConfig;
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index c5f5273fbad9..36f75b6a5b07 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
@@ -263,6 +263,7 @@ protected abstract Iterator<List<WriteStatus>> handleUpdate(String partitionPath
                                                               Iterator<HoodieRecord<T>> recordItr) throws IOException;
 
   protected HoodieWriteMetadata<HoodieData<WriteStatus>> executeClustering(HoodieClusteringPlan clusteringPlan) {
+    context.setJobStatus(this.getClass().getSimpleName(), "Clustering records for " + config.getTableName());
     HoodieInstant instant = HoodieTimeline.getReplaceCommitRequestedInstant(instantTime);
     // Mark instant as clustering inflight
     table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
@@ -285,6 +286,7 @@ protected HoodieWriteMetadata<HoodieData<WriteStatus>> executeClustering(HoodieC
     writeMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(clusteringPlan, writeMetadata));
     commitOnAutoCommit(writeMetadata);
     if (!writeMetadata.getCommitMetadata().isPresent()) {
+      LOG.info("Found empty commit metadata for clustering with instant time " + instantTime);
       HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(),
           extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
       writeMetadata.setCommitMetadata(Option.of(commitMetadata));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
index 2aac6f0db8ee..941d93fd3506 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
@@ -45,7 +45,6 @@
 import javax.annotation.Nullable;
 
 import java.io.IOException;
-import java.text.ParseException;
 import java.util.Map;
 
 import static org.apache.hudi.common.util.CollectionUtils.nonEmpty;
@@ -211,12 +210,7 @@ private boolean needCompact(CompactionTriggerStrategy compactionTriggerStrategy)
   }
 
   private Long parsedToSeconds(String time) {
-    long timestamp;
-    try {
-      timestamp = HoodieActiveTimeline.parseDateFromInstantTime(time).getTime() / 1000;
-    } catch (ParseException e) {
-      throw new HoodieCompactionException(e.getMessage(), e);
-    }
-    return timestamp;
+    return HoodieActiveTimeline.parseDateFromInstantTimeSafely(time).orElseThrow(() -> new HoodieCompactionException("Failed to parse timestamp " + time))
+            .getTime() / 1000;
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
index fc0c320b4406..94c4296e470e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
@@ -51,7 +51,6 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Locale;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -72,7 +71,6 @@
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION;
 import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
 import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath;
-import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX;
 import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
 import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
 import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
@@ -220,9 +218,8 @@ private void abort(HoodieInstant indexInstant, Set<String> requestedPartitions)
 
     // delete metadata partition
     requestedPartitions.forEach(partition -> {
-      MetadataPartitionType partitionType = MetadataPartitionType.valueOf(partition.toUpperCase(Locale.ROOT));
-      if (metadataPartitionExists(table.getMetaClient().getBasePathV2().toString(), context, partitionType)) {
-        deleteMetadataPartition(table.getMetaClient().getBasePathV2().toString(), context, partitionType);
+      if (metadataPartitionExists(table.getMetaClient().getBasePathV2().toString(), context, partition)) {
+        deleteMetadataPartition(table.getMetaClient().getBasePathV2().toString(), context, partition);
       }
     });
 
@@ -320,9 +317,7 @@ private static List<HoodieInstant> getCompletedArchivedAndActiveInstantsAfter(St
 
   private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set<String> metadataPartitions) {
     metadataPartitions.forEach(metadataPartition -> {
-      MetadataPartitionType partitionType = metadataPartition.startsWith(PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX) ? MetadataPartitionType.FUNCTIONAL_INDEX :
-          MetadataPartitionType.valueOf(metadataPartition.toUpperCase(Locale.ROOT));
-      metaClient.getTableConfig().setMetadataPartitionState(metaClient, partitionType, true);
+      metaClient.getTableConfig().setMetadataPartitionState(metaClient, metadataPartition, true);
     });
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
index 7b27d7ef6e1c..da85fc4d6340 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
@@ -67,15 +67,20 @@ public class ScheduleIndexActionExecutor<T, I, K, O> extends BaseActionExecutor<
   private static final Integer LATEST_INDEX_PLAN_VERSION = INDEX_PLAN_VERSION_1;
 
   private final List<MetadataPartitionType> partitionIndexTypes;
+
+  private final List<String> partitionPaths;
+
   private final TransactionManager txnManager;
 
   public ScheduleIndexActionExecutor(HoodieEngineContext context,
                                      HoodieWriteConfig config,
                                      HoodieTable<T, I, K, O> table,
                                      String instantTime,
-                                     List<MetadataPartitionType> partitionIndexTypes) {
+                                     List<MetadataPartitionType> partitionIndexTypes,
+                                     List<String> partitionPaths) {
     super(context, config, table, instantTime);
     this.partitionIndexTypes = partitionIndexTypes;
+    this.partitionPaths = partitionPaths;
     this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
   }
 
@@ -84,8 +89,11 @@ public Option<HoodieIndexPlan> execute() {
     validateBeforeScheduling();
     // make sure that it is idempotent, check with previously pending index operations.
     Set<String> indexesInflightOrCompleted = getInflightAndCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
+
     Set<String> requestedPartitions = partitionIndexTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
+    requestedPartitions.addAll(partitionPaths);
     requestedPartitions.removeAll(indexesInflightOrCompleted);
+
     if (!requestedPartitions.isEmpty()) {
       LOG.warn(String.format("Following partitions already exist or inflight: %s. Going to schedule indexing of only these partitions: %s",
           indexesInflightOrCompleted, requestedPartitions));
@@ -142,8 +150,8 @@ private void validateBeforeScheduling() {
   private void abort(HoodieInstant indexInstant) {
     // delete metadata partition
     partitionIndexTypes.forEach(partitionType -> {
-      if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType)) {
-        deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType);
+      if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType.getPartitionPath())) {
+        deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType.getPartitionPath());
       }
     });
     // delete requested instant
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
index 077f956eb7be..51159d3d5c3a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
@@ -188,7 +188,7 @@ private void validateRollbackCommitSequence() {
         if (!instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
           return true;
         }
-        return !ClusteringUtils.isPendingClusteringInstant(table.getMetaClient(), instant);
+        return !ClusteringUtils.isClusteringInstant(table.getActiveTimeline(), instant);
       }).map(HoodieInstant::getTimestamp)
           .collect(Collectors.toList());
       if ((instantTimeToRollback != null) && !inflights.isEmpty()
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
index b3ee11b9836e..2f9e96859ff6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
@@ -71,7 +71,7 @@ public Option<HoodieRestorePlan> execute() {
       // rollback pending clustering instants first before other instants (See HUDI-3362)
       List<HoodieInstant> pendingClusteringInstantsToRollback = table.getActiveTimeline().filterPendingReplaceTimeline()
               // filter only clustering related replacecommits (Not insert_overwrite related commits)
-              .filter(instant -> ClusteringUtils.isPendingClusteringInstant(table.getMetaClient(), instant))
+              .filter(instant -> ClusteringUtils.isClusteringInstant(table.getActiveTimeline(), instant))
               .getReverseOrderedInstants()
               .filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), savepointToRestoreTimestamp))
               .collect(Collectors.toList());
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/HoodiePartitionTTLStrategyFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/HoodiePartitionTTLStrategyFactory.java
new file mode 100644
index 000000000000..26bcaa9fe51e
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/HoodiePartitionTTLStrategyFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.ttl.strategy;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieTTLConfig;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.table.HoodieTable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Locale;
+
+import static org.apache.hudi.config.HoodieTTLConfig.PARTITION_TTL_STRATEGY_CLASS_NAME;
+import static org.apache.hudi.config.HoodieTTLConfig.PARTITION_TTL_STRATEGY_TYPE;
+
+/**
+ * Factory help to create {@link PartitionTTLStrategy}.
+ * <p>
+ * This factory will try {@link HoodieTTLConfig#PARTITION_TTL_STRATEGY_CLASS_NAME} firstly,
+ * this ensures the class prop will not be overwritten by {@link PartitionTTLStrategyType}
+ */
+public class HoodiePartitionTTLStrategyFactory {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HoodiePartitionTTLStrategyFactory.class);
+
+  public static PartitionTTLStrategy createStrategy(HoodieTable hoodieTable, TypedProperties props, String instantTime) throws IOException {
+    String strategyClassName = getPartitionTTLStrategyClassName(props);
+    try {
+      return (PartitionTTLStrategy) ReflectionUtils.loadClass(strategyClassName,
+          new Class<?>[] {HoodieTable.class, String.class}, hoodieTable, instantTime);
+    } catch (Throwable e) {
+      throw new IOException("Could not load partition ttl management strategy class " + strategyClassName, e);
+    }
+  }
+
+  private static String getPartitionTTLStrategyClassName(TypedProperties props) {
+    String strategyClassName =
+        props.getString(PARTITION_TTL_STRATEGY_CLASS_NAME.key(), null);
+    if (StringUtils.isNullOrEmpty(strategyClassName)) {
+      String strategyType = props.getString(PARTITION_TTL_STRATEGY_TYPE.key(),
+          PARTITION_TTL_STRATEGY_TYPE.defaultValue());
+      PartitionTTLStrategyType strategyTypeEnum;
+      try {
+        strategyTypeEnum = PartitionTTLStrategyType.valueOf(strategyType.toUpperCase(Locale.ROOT));
+      } catch (IllegalArgumentException e) {
+        throw new HoodieException("Unsupported PartitionTTLStrategy Type " + strategyType);
+      }
+      strategyClassName = getPartitionTTLStrategyFromType(strategyTypeEnum);
+    }
+    return strategyClassName;
+  }
+
+  /**
+   * @param type {@link PartitionTTLStrategyType} enum.
+   * @return The partition ttl management strategy class name based on the {@link PartitionTTLStrategyType}.
+   */
+  public static String getPartitionTTLStrategyFromType(PartitionTTLStrategyType type) {
+    switch (type) {
+      case KEEP_BY_TIME:
+        return KeepByTimeStrategy.class.getName();
+      case KEEP_BY_CREATION_TIME:
+        return KeepByCreationTimeStrategy.class.getName();
+      default:
+        throw new HoodieException("Unsupported PartitionTTLStrategy Type " + type);
+    }
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByCreationTimeStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByCreationTimeStrategy.java
new file mode 100644
index 000000000000..a350086f2dcb
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByCreationTimeStrategy.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.ttl.strategy;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.table.HoodieTable;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * KeepByTimeStrategy will return expired partitions by their lastCommitTime.
+ */
+public class KeepByCreationTimeStrategy extends KeepByTimeStrategy {
+
+  public KeepByCreationTimeStrategy(HoodieTable hoodieTable, String instantTime) {
+    super(hoodieTable, instantTime);
+  }
+
+  @Override
+  protected List<String> getExpiredPartitionsForTimeStrategy(List<String> partitionPathsForTTL) {
+    HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
+    return partitionPathsForTTL.stream().parallel().filter(part -> {
+      HoodiePartitionMetadata hoodiePartitionMetadata =
+          new HoodiePartitionMetadata(metaClient.getFs(), FSUtils.getPartitionPath(metaClient.getBasePath(), part));
+      Option<String> instantOption = hoodiePartitionMetadata.readPartitionCreatedCommitTime();
+      if (instantOption.isPresent()) {
+        String instantTime = instantOption.get();
+        return isPartitionExpired(instantTime);
+      }
+      return false;
+    }).collect(Collectors.toList());
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByTimeStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByTimeStrategy.java
new file mode 100644
index 000000000000..b6d67bb9e8a3
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/KeepByTimeStrategy.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.ttl.strategy;
+
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.table.HoodieTable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.fixInstantTimeCompatibility;
+import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.instantTimePlusMillis;
+
+/**
+ * KeepByTimeStrategy will return expired partitions by their lastCommitTime.
+ */
+public class KeepByTimeStrategy extends PartitionTTLStrategy {
+
+  private static final Logger LOG = LoggerFactory.getLogger(KeepByTimeStrategy.class);
+
+  protected final long ttlInMilis;
+
+  public KeepByTimeStrategy(HoodieTable hoodieTable, String instantTime) {
+    super(hoodieTable, instantTime);
+    this.ttlInMilis = writeConfig.getPartitionTTLStrategyDaysRetain() * 1000 * 3600 * 24;
+  }
+
+  @Override
+  public List<String> getExpiredPartitionPaths() {
+    Option<HoodieInstant> lastCompletedInstant = hoodieTable.getActiveTimeline().filterCompletedInstants().lastInstant();
+    if (!lastCompletedInstant.isPresent() || ttlInMilis <= 0
+        || !hoodieTable.getMetaClient().getTableConfig().getPartitionFields().isPresent()) {
+      return Collections.emptyList();
+    }
+    List<String> expiredPartitions = getExpiredPartitionsForTimeStrategy(getPartitionPathsForTTL());
+    int limit = writeConfig.getPartitionTTLMaxPartitionsToDelete();
+    LOG.info("Total expired partitions count {}, limit {}", expiredPartitions.size(), limit);
+    return expiredPartitions.stream()
+        .limit(limit) // Avoid a single replace commit too large
+        .collect(Collectors.toList());
+  }
+
+  protected List<String> getExpiredPartitionsForTimeStrategy(List<String> partitionsForTTLManagement) {
+    HoodieTimer timer = HoodieTimer.start();
+    Map<String, Option<String>> lastCommitTimeForPartitions = getLastCommitTimeForPartitions(partitionsForTTLManagement);
+    LOG.info("Collect last commit time for partitions cost {} ms", timer.endTimer());
+    return lastCommitTimeForPartitions.entrySet()
+        .stream()
+        .filter(entry -> entry.getValue().isPresent())
+        .filter(entry -> isPartitionExpired(entry.getValue().get()))
+        .map(Map.Entry::getKey)
+        .collect(Collectors.toList());
+  }
+
+  /**
+   * @param partitionPaths Partitions to collect stats.
+   */
+  private Map<String, Option<String>> getLastCommitTimeForPartitions(List<String> partitionPaths) {
+    int statsParallelism = Math.min(partitionPaths.size(), 200);
+    return hoodieTable.getContext().map(partitionPaths, partitionPath -> {
+      Option<String> partitionLastModifiedTime = hoodieTable.getHoodieView()
+          .getLatestFileSlicesBeforeOrOn(partitionPath, instantTime, true)
+          .map(FileSlice::getBaseInstantTime)
+          .max(Comparator.naturalOrder())
+          .map(Option::ofNullable)
+          .orElse(Option.empty());
+      return Pair.of(partitionPath, partitionLastModifiedTime);
+    }, statsParallelism).stream().collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+  }
+
+  /**
+   * Determines if a partition's reference time has exceeded its time-to-live (TTL).
+   * <p>
+   * This method checks if the current time has passed the TTL threshold based on a
+   * reference time, which could be the creation time or the last commit time of the partition.
+   *
+   * @param referenceTime last commit time or creation time for partition
+   */
+  protected boolean isPartitionExpired(String referenceTime) {
+    String expiredTime = instantTimePlusMillis(fixInstantTimeCompatibility(referenceTime), ttlInMilis);
+    return fixInstantTimeCompatibility(instantTime).compareTo(expiredTime) > 0;
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategy.java
new file mode 100644
index 000000000000..477688709303
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategy.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.ttl.strategy;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Strategy for partition-level ttl management.
+ */
+public abstract class PartitionTTLStrategy implements TTLStrategy, Serializable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(PartitionTTLStrategy.class);
+
+  protected final HoodieTable hoodieTable;
+  protected final HoodieWriteConfig writeConfig;
+  protected final String instantTime;
+
+  public PartitionTTLStrategy(HoodieTable hoodieTable, String instantTime) {
+    this.writeConfig = hoodieTable.getConfig();
+    this.hoodieTable = hoodieTable;
+    this.instantTime = instantTime;
+  }
+
+  /**
+   * Get expired partition paths for a specific partition ttl strategy.
+   *
+   * @return Expired partition paths.
+   */
+  public abstract List<String> getExpiredPartitionPaths();
+
+  /**
+   * Scan and list all partitions for partition ttl management.
+   *
+   * @return all partitions paths for the dataset.
+   */
+  protected List<String> getPartitionPathsForTTL() {
+    String partitionSelected = writeConfig.getClusteringPartitionSelected();
+    HoodieTimer timer = HoodieTimer.start();
+    List<String> partitionsForTTL;
+    if (StringUtils.isNullOrEmpty(partitionSelected)) {
+      // Return All partition paths
+      partitionsForTTL = FSUtils.getAllPartitionPaths(hoodieTable.getContext(), writeConfig.getMetadataConfig(), writeConfig.getBasePath());
+    } else {
+      partitionsForTTL = Arrays.asList(partitionSelected.split(","));
+    }
+    LOG.info("Get partitions for ttl cost {} ms", timer.endTimer());
+    return partitionsForTTL;
+  }
+
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategyType.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategyType.java
new file mode 100644
index 000000000000..6dfbcd6d0e59
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/PartitionTTLStrategyType.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.ttl.strategy;
+
+import org.apache.hudi.common.config.HoodieConfig;
+import org.apache.hudi.keygen.constant.KeyGeneratorType;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.apache.hudi.config.HoodieTTLConfig.PARTITION_TTL_STRATEGY_CLASS_NAME;
+import static org.apache.hudi.config.HoodieTTLConfig.PARTITION_TTL_STRATEGY_TYPE;
+
+/**
+ * Types of {@link PartitionTTLStrategy}.
+ */
+public enum PartitionTTLStrategyType {
+  KEEP_BY_TIME("org.apache.hudi.table.action.ttl.strategy.KeepByTimeStrategy"),
+  KEEP_BY_CREATION_TIME("org.apache.hudi.table.action.ttl.strategy.KeepByCreationTimeStrategy");
+
+  private final String className;
+
+  PartitionTTLStrategyType(String className) {
+    this.className = className;
+  }
+
+  public String getClassName() {
+    return className;
+  }
+
+  public static PartitionTTLStrategyType fromClassName(String className) {
+    for (PartitionTTLStrategyType type : PartitionTTLStrategyType.values()) {
+      if (type.getClassName().equals(className)) {
+        return type;
+      }
+    }
+    throw new IllegalArgumentException("No PartitionTTLStrategyType found for class name: " + className);
+  }
+
+  public static List<String> getPartitionTTLStrategyNames() {
+    List<String> names = new ArrayList<>(PartitionTTLStrategyType.values().length);
+    Arrays.stream(PartitionTTLStrategyType.values())
+        .forEach(x -> names.add(x.name()));
+    return names;
+  }
+
+  @Nullable
+  public static String getPartitionTTLStrategyClassName(HoodieConfig config) {
+    if (config.contains(PARTITION_TTL_STRATEGY_CLASS_NAME)) {
+      return config.getString(PARTITION_TTL_STRATEGY_CLASS_NAME);
+    } else if (config.contains(PARTITION_TTL_STRATEGY_TYPE)) {
+      return KeyGeneratorType.valueOf(config.getString(PARTITION_TTL_STRATEGY_TYPE)).getClassName();
+    }
+    return null;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/TTLStrategy.java
similarity index 71%
rename from hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/TTLStrategy.java
index a739af67909b..ad41f95fba27 100644
--- a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/ttl/strategy/TTLStrategy.java
@@ -16,18 +16,10 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.exception;
+package org.apache.hudi.table.action.ttl.strategy;
 
 /**
- * Exception for incompatible schema.
+ * Strategy for ttl management.
  */
-public class HoodieIncompatibleSchemaException extends RuntimeException {
-
-  public HoodieIncompatibleSchemaException(String msg, Throwable e) {
-    super(msg, e);
-  }
-
-  public HoodieIncompatibleSchemaException(String msg) {
-    super(msg);
-  }
+public interface TTLStrategy {
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
index c7cb544aec94..edc2d19cf4bc 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
@@ -49,7 +49,7 @@ public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngin
     tablePropsToAdd.put(TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
     // if metadata is enabled and files partition exist then update TABLE_METADATA_INDEX_COMPLETED
     // schema for the files partition is same between the two versions
-    if (config.isMetadataTableEnabled() && metadataPartitionExists(config.getBasePath(), context, MetadataPartitionType.FILES)) {
+    if (config.isMetadataTableEnabled() && metadataPartitionExists(config.getBasePath(), context, MetadataPartitionType.FILES.getPartitionPath())) {
       tablePropsToAdd.put(TABLE_METADATA_PARTITIONS, MetadataPartitionType.FILES.getPartitionPath());
     }
     return tablePropsToAdd;
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java
index edb3617ea9ef..296cb162a424 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.testutils.InProcessTimeGenerator;
@@ -111,7 +112,7 @@ public static void setupTimelineInFS(
   }
 
   public static String getBaseFilename(String instantTime, String fileId) {
-    return FSUtils.makeBaseFileName(instantTime, BASE_FILE_WRITE_TOKEN, fileId);
+    return FSUtils.makeBaseFileName(instantTime, BASE_FILE_WRITE_TOKEN, fileId, HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension());
   }
 
   public static String getLogFilename(String instantTime, String fileId) {
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieArchivedTimeline.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieArchivedTimeline.java
new file mode 100644
index 000000000000..664385f9ae06
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieArchivedTimeline.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.common.table.timeline;
+
+import org.apache.hudi.DummyActiveAction;
+import org.apache.hudi.client.timeline.LSMTimelineWriter;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hudi.common.engine.LocalTaskContextSupplier;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+/**
+ * Test cases for {@link HoodieArchivedTimeline}.
+ */
+public class TestHoodieArchivedTimeline extends HoodieCommonTestHarness {
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanMetaClient();
+  }
+
+  @Test
+  public void testLoadingInstantsIncrementally() throws Exception {
+    writeArchivedTimeline(10, 10000000);
+    // now we got 500 instants spread in 5 parquets.
+    HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline("10000043");
+    assertThat(archivedTimeline.firstInstant().map(HoodieInstant::getTimestamp).orElse(""), is("10000043"));
+    assertThat(archivedTimeline.lastInstant().map(HoodieInstant::getTimestamp).orElse(""), is("10000050"));
+    // load incrementally
+    archivedTimeline.reload("10000034");
+    assertThat(archivedTimeline.firstInstant().map(HoodieInstant::getTimestamp).orElse(""), is("10000034"));
+    archivedTimeline.reload("10000011");
+    assertThat(archivedTimeline.firstInstant().map(HoodieInstant::getTimestamp).orElse(""), is("10000011"));
+  }
+
+  // -------------------------------------------------------------------------
+  //  Utilities
+  // -------------------------------------------------------------------------
+
+  private void writeArchivedTimeline(int batchSize, long startTs) throws Exception {
+    HoodieTestTable testTable = HoodieTestTable.of(this.metaClient);
+    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(this.metaClient.getBasePathV2().toString())
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build())
+        .withMarkersType("DIRECT")
+        .build();
+    HoodieEngineContext engineContext = new HoodieLocalEngineContext(new Configuration());
+    LSMTimelineWriter writer = LSMTimelineWriter.getInstance(writeConfig, new LocalTaskContextSupplier(), metaClient);
+    List<ActiveAction> instantBuffer = new ArrayList<>();
+    for (int i = 1; i <= 50; i++) {
+      long instantTimeTs = startTs + i;
+      String instantTime = String.valueOf(instantTimeTs);
+      String completionTime = String.valueOf(instantTimeTs + 10);
+      HoodieInstant instant = new HoodieInstant(HoodieInstant.State.COMPLETED, "commit", instantTime, completionTime);
+      HoodieCommitMetadata metadata  = testTable.createCommitMetadata(instantTime, WriteOperationType.INSERT, Arrays.asList("par1", "par2"), 10, false);
+      byte[] serializedMetadata = TimelineMetadataUtils.serializeCommitMetadata(metadata).get();
+      instantBuffer.add(new DummyActiveAction(instant, serializedMetadata));
+      if (i % batchSize == 0) {
+        // archive 10 instants each time
+        writer.write(instantBuffer, org.apache.hudi.common.util.Option.empty(), org.apache.hudi.common.util.Option.empty());
+        writer.compactAndClean(engineContext);
+        instantBuffer.clear();
+      }
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/testutils/HoodieMetadataTestTable.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/testutils/HoodieMetadataTestTable.java
index 3bcba72eb684..5024a9f59d2c 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/testutils/HoodieMetadataTestTable.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/testutils/HoodieMetadataTestTable.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.testutils;
 
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -134,6 +135,16 @@ public HoodieCleanMetadata doClean(String commitTime, Map<String, Integer> parti
     return cleanMetadata;
   }
 
+  @Override
+  public void repeatClean(String cleanCommitTime,
+                          HoodieCleanerPlan cleanerPlan,
+                          HoodieCleanMetadata cleanMetadata) throws IOException {
+    super.repeatClean(cleanCommitTime, cleanerPlan, cleanMetadata);
+    if (writer != null) {
+      writer.update(cleanMetadata, cleanCommitTime);
+    }
+  }
+
   public HoodieTestTable addCompaction(String instantTime, HoodieCommitMetadata commitMetadata) throws Exception {
     super.addCompaction(instantTime, commitMetadata);
     if (writer != null) {
@@ -148,7 +159,6 @@ public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata ro
     if (writer != null) {
       writer.update(rollbackMetadata, instantTime);
     }
-    super.addRollbackCompleted(instantTime, rollbackMetadata, false);
     return this;
   }
 
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
index 5c93f924ecef..90fcfd4fd7ae 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
@@ -89,6 +89,7 @@ public void testPropertyLoading(boolean withAlternative) throws IOException {
     assertEquals(5, config.getMaxCommitsToKeep());
     assertEquals(2, config.getMinCommitsToKeep());
     assertTrue(config.shouldUseExternalSchemaTransformation());
+    assertTrue(config.allowDuplicateInserts());
   }
 
   @Test
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java
new file mode 100644
index 000000000000..529d2ddfc7ff
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metadata;
+
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.HoodieCleaningPolicy;
+import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.config.HoodieCleanConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+public class TestHoodieMetadataWriteUtils {
+
+  @Test
+  public void testCreateMetadataWriteConfigForCleaner() {
+    HoodieWriteConfig writeConfig1 = HoodieWriteConfig.newBuilder()
+        .withPath("/tmp")
+        .withCleanConfig(HoodieCleanConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(5).build())
+        .build();
+
+    HoodieWriteConfig metadataWriteConfig1 = HoodieMetadataWriteUtils.createMetadataWriteConfig(writeConfig1, HoodieFailedWritesCleaningPolicy.EAGER);
+    assertEquals(HoodieFailedWritesCleaningPolicy.EAGER, metadataWriteConfig1.getFailedWritesCleanPolicy());
+    assertEquals(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, metadataWriteConfig1.getCleanerPolicy());
+    // default value already greater than data cleaner commits retained * 1.2
+    assertEquals(HoodieMetadataConfig.DEFAULT_METADATA_CLEANER_COMMITS_RETAINED, metadataWriteConfig1.getCleanerCommitsRetained());
+
+    assertNotEquals(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, metadataWriteConfig1.getCleanerPolicy());
+    assertNotEquals(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS, metadataWriteConfig1.getCleanerPolicy());
+
+    HoodieWriteConfig writeConfig2 = HoodieWriteConfig.newBuilder()
+        .withPath("/tmp")
+        .withCleanConfig(HoodieCleanConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(20).build())
+        .build();
+    HoodieWriteConfig metadataWriteConfig2 = HoodieMetadataWriteUtils.createMetadataWriteConfig(writeConfig2, HoodieFailedWritesCleaningPolicy.EAGER);
+    assertEquals(HoodieFailedWritesCleaningPolicy.EAGER, metadataWriteConfig2.getFailedWritesCleanPolicy());
+    assertEquals(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, metadataWriteConfig2.getCleanerPolicy());
+    // data cleaner commits retained * 1.2 is greater than default
+    assertEquals(24, metadataWriteConfig2.getCleanerCommitsRetained());
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/m3/TestM3Metrics.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/m3/TestM3Metrics.java
new file mode 100644
index 000000000000..e7299d706b89
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/m3/TestM3Metrics.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.m3;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.when;
+
+import java.util.UUID;
+import org.apache.hudi.common.testutils.NetworkTestUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metrics.HoodieMetrics;
+import org.apache.hudi.metrics.Metrics;
+import org.apache.hudi.metrics.MetricsReporterType;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+@ExtendWith(MockitoExtension.class)
+public class TestM3Metrics {
+
+  @Mock
+  HoodieWriteConfig config;
+  HoodieMetrics hoodieMetrics;
+  Metrics metrics;
+
+  @BeforeEach
+  public void start() {
+    when(config.isMetricsOn()).thenReturn(true);
+    when(config.getMetricsReporterType()).thenReturn(MetricsReporterType.M3);
+    when(config.getBasePath()).thenReturn("s3://test" + UUID.randomUUID());
+  }
+
+  @Test
+  public void testRegisterGauge() {
+    when(config.getM3ServerHost()).thenReturn("localhost");
+    when(config.getM3ServerPort()).thenReturn(NetworkTestUtils.nextFreePort());
+    when(config.getTableName()).thenReturn("raw_table");
+    when(config.getM3Env()).thenReturn("dev");
+    when(config.getM3Service()).thenReturn("hoodie");
+    when(config.getM3Tags()).thenReturn("tag1=value1,tag2=value2");
+    when(config.getMetricReporterMetricsNamePrefix()).thenReturn("");
+    hoodieMetrics = new HoodieMetrics(config);
+    metrics = hoodieMetrics.getMetrics();
+    metrics.registerGauge("metric1", 123L);
+    assertEquals("123", metrics.getRegistry().getGauges().get("metric1").getValue().toString());
+    metrics.shutdown();
+  }
+
+  @Test
+  public void testEmptyM3Tags() {
+    when(config.getM3ServerHost()).thenReturn("localhost");
+    when(config.getM3ServerPort()).thenReturn(NetworkTestUtils.nextFreePort());
+    when(config.getTableName()).thenReturn("raw_table");
+    when(config.getM3Env()).thenReturn("dev");
+    when(config.getM3Service()).thenReturn("hoodie");
+    when(config.getM3Tags()).thenReturn("");
+    when(config.getMetricReporterMetricsNamePrefix()).thenReturn("");
+    hoodieMetrics = new HoodieMetrics(config);
+    metrics = hoodieMetrics.getMetrics();
+    metrics.registerGauge("metric1", 123L);
+    assertEquals("123", metrics.getRegistry().getGauges().get("metric1").getValue().toString());
+    metrics.shutdown();
+  }
+
+  @Test
+  public void testInvalidM3Tags() {
+    when(config.getTableName()).thenReturn("raw_table");
+    when(config.getMetricReporterMetricsNamePrefix()).thenReturn("");
+    assertThrows(RuntimeException.class, () -> {
+      hoodieMetrics = new HoodieMetrics(config);
+    });
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java
index 4268cc36d474..9989273b723f 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java
@@ -138,14 +138,14 @@ void testGetDeletePaths(HoodieWriteConfig config, String earliestInstant, List<H
   void testPartitionsForIncrCleaning(HoodieWriteConfig config, String earliestInstant,
                                      String lastCompletedTimeInLastClean, String lastCleanInstant, String earliestInstantsInLastClean, List<String> partitionsInLastClean,
                                      Map<String, List<String>> savepointsTrackedInLastClean, Map<String, List<String>> activeInstantsPartitions,
-                                     Map<String, List<String>> savepoints, List<String> expectedPartitions) throws IOException {
+                                     Map<String, List<String>> savepoints, List<String> expectedPartitions, boolean areCommitsForSavepointsRemoved) throws IOException {
     HoodieActiveTimeline activeTimeline = mock(HoodieActiveTimeline.class);
     when(mockHoodieTable.getActiveTimeline()).thenReturn(activeTimeline);
     // setup savepoint mocks
     Set<String> savepointTimestamps = savepoints.keySet().stream().collect(Collectors.toSet());
     when(mockHoodieTable.getSavepointTimestamps()).thenReturn(savepointTimestamps);
     if (!savepoints.isEmpty()) {
-      for (Map.Entry<String, List<String>> entry: savepoints.entrySet()) {
+      for (Map.Entry<String, List<String>> entry : savepoints.entrySet()) {
         Pair<HoodieSavepointMetadata, Option<byte[]>> savepointMetadataOptionPair = getSavepointMetadata(entry.getValue());
         HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, entry.getKey());
         when(activeTimeline.getInstantDetails(instant)).thenReturn(savepointMetadataOptionPair.getRight());
@@ -156,7 +156,7 @@ void testPartitionsForIncrCleaning(HoodieWriteConfig config, String earliestInst
     Pair<HoodieCleanMetadata, Option<byte[]>> cleanMetadataOptionPair =
         getCleanCommitMetadata(partitionsInLastClean, lastCleanInstant, earliestInstantsInLastClean, lastCompletedTimeInLastClean, savepointsTrackedInLastClean.keySet());
     mockLastCleanCommit(mockHoodieTable, lastCleanInstant, earliestInstantsInLastClean, activeTimeline, cleanMetadataOptionPair);
-    mockFewActiveInstants(mockHoodieTable, activeInstantsPartitions, savepointsTrackedInLastClean);
+    mockFewActiveInstants(mockHoodieTable, activeInstantsPartitions, savepointsTrackedInLastClean, areCommitsForSavepointsRemoved);
 
     // Trigger clean and validate partitions to clean.
     CleanPlanner<?, ?, ?, ?> cleanPlanner = new CleanPlanner<>(context, mockHoodieTable, config);
@@ -332,7 +332,7 @@ static Stream<Arguments> keepLatestByHoursOrCommitsArgs() {
 
   static Stream<Arguments> keepLatestByHoursOrCommitsArgsIncrCleanPartitions() {
     String earliestInstant = "20231204194919610";
-    String earliestInstantPlusTwoDays =  "20231206194919610";
+    String earliestInstantPlusTwoDays = "20231206194919610";
     String lastCleanInstant = earliestInstantPlusTwoDays;
     String earliestInstantMinusThreeDays = "20231201194919610";
     String earliestInstantMinusFourDays = "20231130194919610";
@@ -340,9 +340,9 @@ static Stream<Arguments> keepLatestByHoursOrCommitsArgsIncrCleanPartitions() {
     String earliestInstantMinusSixDays = "20231128194919610";
     String earliestInstantInLastClean = earliestInstantMinusSixDays;
     String lastCompletedInLastClean = earliestInstantMinusSixDays;
-    String earliestInstantMinusOneWeek =   "20231127194919610";
+    String earliestInstantMinusOneWeek = "20231127194919610";
     String savepoint2 = earliestInstantMinusOneWeek;
-    String earliestInstantMinusOneMonth =  "20231104194919610";
+    String earliestInstantMinusOneMonth = "20231104194919610";
     String savepoint3 = earliestInstantMinusOneMonth;
 
     List<String> threePartitionsInActiveTimeline = Arrays.asList(PARTITION1, PARTITION2, PARTITION3);
@@ -360,66 +360,74 @@ static Stream<Arguments> keepLatestByHoursOrCommitsArgsIncrCleanPartitions() {
     List<Arguments> arguments = new ArrayList<>();
 
     // no savepoints tracked in last clean and no additional savepoints. all partitions in uncleaned instants should be expected
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1), Collections.emptyMap(),
-        activeInstantsPartitionsMap3, Collections.emptyMap(), threePartitionsInActiveTimeline));
+        activeInstantsPartitionsMap3, Collections.emptyMap(), threePartitionsInActiveTimeline, false));
 
     // a new savepoint is added after last clean. but rest of uncleaned touches all partitions, and so all partitions are expected
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1), Collections.emptyMap(),
-        activeInstantsPartitionsMap3, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), threePartitionsInActiveTimeline));
+        activeInstantsPartitionsMap3, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), threePartitionsInActiveTimeline, false));
 
     // previous clean tracks a savepoint which exists in timeline still. only 2 partitions are touched by uncleaned instants. only 2 partitions are expected
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
         Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)),
-        activeInstantsPartitionsMap2, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), twoPartitionsInActiveTimeline));
+        activeInstantsPartitionsMap2, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), twoPartitionsInActiveTimeline, false));
 
     // savepoint tracked in previous clean was removed(touching partition1). latest uncleaned touched 2 other partitions. So, in total 3 partitions are expected.
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
         Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)),
-        activeInstantsPartitionsMap2, Collections.emptyMap(), threePartitionsInActiveTimeline));
+        activeInstantsPartitionsMap2, Collections.emptyMap(), threePartitionsInActiveTimeline, false));
 
     // previous savepoint still exists and touches partition1. uncleaned touches only partition2 and partition3. expected partition2 and partition3.
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
         Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)),
-        activeInstantsPartitionsMap2, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), twoPartitionsInActiveTimeline));
+        activeInstantsPartitionsMap2, Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), twoPartitionsInActiveTimeline, false));
 
     // a new savepoint was added compared to previous clean. all 2 partitions are expected since uncleaned commits touched just 2 partitions.
     Map<String, List<String>> latestSavepoints = new HashMap<>();
     latestSavepoints.put(savepoint2, Collections.singletonList(PARTITION1));
     latestSavepoints.put(savepoint3, Collections.singletonList(PARTITION1));
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
         Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)),
-        activeInstantsPartitionsMap2, latestSavepoints, twoPartitionsInActiveTimeline));
+        activeInstantsPartitionsMap2, latestSavepoints, twoPartitionsInActiveTimeline, false));
 
     // 2 savepoints were tracked in previous clean. one of them is removed in latest. A partition which was part of the removed savepoint should be added in final
     // list of partitions to clean
     Map<String, List<String>> previousSavepoints = new HashMap<>();
     latestSavepoints.put(savepoint2, Collections.singletonList(PARTITION1));
     latestSavepoints.put(savepoint3, Collections.singletonList(PARTITION2));
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
-        previousSavepoints, activeInstantsPartitionsMap2, Collections.singletonMap(savepoint3, Collections.singletonList(PARTITION2)), twoPartitionsInActiveTimeline));
+        previousSavepoints, activeInstantsPartitionsMap2, Collections.singletonMap(savepoint3, Collections.singletonList(PARTITION2)), twoPartitionsInActiveTimeline, false));
 
     // 2 savepoints were tracked in previous clean. one of them is removed in latest. But a partition part of removed savepoint is already touched by uncleaned commits.
     // so we expect all 3 partitions to be in final list.
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
-        previousSavepoints, activeInstantsPartitionsMap3, Collections.singletonMap(savepoint3, Collections.singletonList(PARTITION2)), threePartitionsInActiveTimeline));
+        previousSavepoints, activeInstantsPartitionsMap3, Collections.singletonMap(savepoint3, Collections.singletonList(PARTITION2)), threePartitionsInActiveTimeline, false));
 
     // unpartitioned test case. savepoint removed.
     List<String> unPartitionsInActiveTimeline = Arrays.asList(StringUtils.EMPTY_STRING);
     Map<String, List<String>> activeInstantsUnPartitionsMap = new HashMap<>();
     activeInstantsUnPartitionsMap.put(earliestInstantMinusThreeDays, unPartitionsInActiveTimeline);
 
-    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
         earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(StringUtils.EMPTY_STRING),
         Collections.singletonMap(savepoint2, Collections.singletonList(StringUtils.EMPTY_STRING)),
-        activeInstantsUnPartitionsMap, Collections.emptyMap(), unPartitionsInActiveTimeline));
+        activeInstantsUnPartitionsMap, Collections.emptyMap(), unPartitionsInActiveTimeline, false));
+
+    // savepoint tracked in previous clean was removed(touching partition1). active instants does not have the instant corresponding to the savepoint.
+    // latest uncleaned touched 2 other partitions. So, in total 2 partitions are expected.
+    activeInstantsPartitionsMap2.remove(earliestInstantMinusOneWeek);
+    arguments.addAll(buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(
+        earliestInstant, lastCompletedInLastClean, lastCleanInstant, earliestInstantInLastClean, Collections.singletonList(PARTITION1),
+        Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)),
+        activeInstantsPartitionsMap2, Collections.emptyMap(), twoPartitionsInActiveTimeline, true));
 
     return arguments.stream();
   }
@@ -450,19 +458,20 @@ private static List<Arguments> buildArgumentsForCleanByHoursAndCommitsCases(Stri
   }
 
   // helper to build common cases for the two policies
-  private static List<Arguments> buildArgumentsForCleanByHoursAndCommitsIncrCleanParitionsCases(String earliestInstant,
-                                                                                                String latestCompletedInLastClean,
-                                                                                                String lastKnownCleanInstantTime,
-                                                                                                String earliestInstantInLastClean,
-                                                                                                List<String> partitionsInLastClean,
-                                                                                                Map<String, List<String>> savepointsTrackedInLastClean,
-                                                                                                Map<String, List<String>> activeInstantsToPartitionsMap,
-                                                                                                Map<String, List<String>> savepoints,
-                                                                                                List<String> expectedPartitions) {
+  private static List<Arguments> buildArgumentsForCleanByHoursAndCommitsIncrCleanPartitionsCases(String earliestInstant,
+                                                                                                 String latestCompletedInLastClean,
+                                                                                                 String lastKnownCleanInstantTime,
+                                                                                                 String earliestInstantInLastClean,
+                                                                                                 List<String> partitionsInLastClean,
+                                                                                                 Map<String, List<String>> savepointsTrackedInLastClean,
+                                                                                                 Map<String, List<String>> activeInstantsToPartitionsMap,
+                                                                                                 Map<String, List<String>> savepoints,
+                                                                                                 List<String> expectedPartitions,
+                                                                                                 boolean areCommitsForSavepointsRemoved) {
     return Arrays.asList(Arguments.of(getCleanByHoursConfig(), earliestInstant, latestCompletedInLastClean, lastKnownCleanInstantTime,
-            earliestInstantInLastClean, partitionsInLastClean, savepointsTrackedInLastClean, activeInstantsToPartitionsMap, savepoints, expectedPartitions),
+            earliestInstantInLastClean, partitionsInLastClean, savepointsTrackedInLastClean, activeInstantsToPartitionsMap, savepoints, expectedPartitions, areCommitsForSavepointsRemoved),
         Arguments.of(getCleanByCommitsConfig(), earliestInstant, latestCompletedInLastClean, lastKnownCleanInstantTime,
-            earliestInstantInLastClean, partitionsInLastClean, savepointsTrackedInLastClean, activeInstantsToPartitionsMap, savepoints, expectedPartitions));
+            earliestInstantInLastClean, partitionsInLastClean, savepointsTrackedInLastClean, activeInstantsToPartitionsMap, savepoints, expectedPartitions, areCommitsForSavepointsRemoved));
   }
 
   private static HoodieFileGroup buildFileGroup(List<String> baseFileCommitTimes) {
@@ -507,7 +516,7 @@ private static Pair<HoodieCleanMetadata, Option<byte[]>> getCleanCommitMetadata(
         extraMetadata.put(SAVEPOINTED_TIMESTAMPS, savepointsToTrack.stream().collect(Collectors.joining(",")));
       }
       HoodieCleanMetadata cleanMetadata = new HoodieCleanMetadata(instantTime, 100L, 10, earliestCommitToRetain, lastCompletedTime, partitionMetadata,
-          CLEAN_METADATA_VERSION_2, Collections.EMPTY_MAP, extraMetadata);
+          CLEAN_METADATA_VERSION_2, Collections.EMPTY_MAP, extraMetadata.isEmpty() ? null : extraMetadata);
       return Pair.of(cleanMetadata, TimelineMetadataUtils.serializeCleanMetadata(cleanMetadata));
     } catch (IOException ex) {
       throw new UncheckedIOException(ex);
@@ -548,14 +557,16 @@ private static void mockLastCleanCommit(HoodieTable hoodieTable, String timestam
   }
 
   private static void mockFewActiveInstants(HoodieTable hoodieTable, Map<String, List<String>> activeInstantsToPartitions,
-                                            Map<String, List<String>> savepointedCommitsToAdd)
+                                            Map<String, List<String>> savepointedCommitsToAdd, boolean areCommitsForSavepointsRemoved)
       throws IOException {
     HoodieDefaultTimeline commitsTimeline = new HoodieDefaultTimeline();
     List<HoodieInstant> instants = new ArrayList<>();
     Map<String, List<String>> instantstoProcess = new HashMap<>();
     instantstoProcess.putAll(activeInstantsToPartitions);
-    instantstoProcess.putAll(savepointedCommitsToAdd);
-    instantstoProcess.forEach((k,v) -> {
+    if (!areCommitsForSavepointsRemoved) {
+      instantstoProcess.putAll(savepointedCommitsToAdd);
+    }
+    instantstoProcess.forEach((k, v) -> {
       HoodieInstant hoodieInstant = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, k);
       instants.add(hoodieInstant);
       Map<String, List<HoodieWriteStat>> partitionToWriteStats = new HashMap<>();
diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml
index 99a66498c15b..d774078d5d68 100644
--- a/hudi-client/hudi-flink-client/pom.xml
+++ b/hudi-client/hudi-flink-client/pom.xml
@@ -262,9 +262,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 </project>
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/FlinkSizeBasedClusteringPlanStrategyRecently.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/FlinkSizeBasedClusteringPlanStrategyRecently.java
new file mode 100644
index 000000000000..234bd7a90908
--- /dev/null
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/FlinkSizeBasedClusteringPlanStrategyRecently.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.clustering.plan.strategy;
+
+import org.apache.hudi.avro.model.HoodieClusteringGroup;
+import org.apache.hudi.avro.model.HoodieClusteringPlan;
+import org.apache.hudi.avro.model.HoodieClusteringStrategy;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
+
+/**
+ * Only take care of partitions related to active timeline, instead of do full partition listing.
+ */
+public class FlinkSizeBasedClusteringPlanStrategyRecently<T> extends FlinkSizeBasedClusteringPlanStrategy<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(FlinkSizeBasedClusteringPlanStrategy.class);
+  public FlinkSizeBasedClusteringPlanStrategyRecently(HoodieTable table,
+                                                      HoodieEngineContext engineContext,
+                                                      HoodieWriteConfig writeConfig) {
+    super(table, engineContext, writeConfig);
+    if (!table.getConfig().getTableType().equals(HoodieTableType.COPY_ON_WRITE)) {
+      throw new UnsupportedOperationException("FlinkSizeBasedClusteringPlanStrategyRecently only support cow table for now.");
+    }
+  }
+
+  @Override
+  public Option<HoodieClusteringPlan> generateClusteringPlan() {
+    if (!checkPrecondition()) {
+      return Option.empty();
+    }
+
+    HoodieTableMetaClient metaClient = getHoodieTable().getMetaClient();
+    LOG.info("Scheduling clustering for " + metaClient.getBasePath());
+
+    List<String> partitionPaths = getPartitionPathInActiveTimeline(hoodieTable);
+
+    partitionPaths = filterPartitionPaths(partitionPaths);
+
+    if (partitionPaths.isEmpty()) {
+      // In case no partitions could be picked, return no clustering plan
+      return Option.empty();
+    }
+
+    List<HoodieClusteringGroup> clusteringGroups = getEngineContext()
+            .flatMap(
+                    partitionPaths, partitionPath -> {
+                    List<FileSlice> fileSlicesEligible = getFileSlicesEligibleForClustering(partitionPath).collect(Collectors.toList());
+                    return buildClusteringGroupsForPartition(partitionPath, fileSlicesEligible).limit(getWriteConfig().getClusteringMaxNumGroups());
+                },
+                    partitionPaths.size())
+            .stream()
+            .limit(getWriteConfig().getClusteringMaxNumGroups())
+            .collect(Collectors.toList());
+
+    if (clusteringGroups.isEmpty()) {
+      LOG.info("No data available to cluster");
+      return Option.empty();
+    }
+
+    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder()
+            .setStrategyClassName(getWriteConfig().getClusteringExecutionStrategyClass())
+            .setStrategyParams(getStrategyParams())
+            .build();
+
+    return Option.of(HoodieClusteringPlan.newBuilder()
+            .setStrategy(strategy)
+            .setInputGroups(clusteringGroups)
+            .setExtraMetadata(getExtraMetadata())
+            .setVersion(getPlanVersion())
+            .setPreserveHoodieMetadata(true)
+            .build());
+  }
+
+  /**
+   * Only take care of partitions related to active timeline, instead of do full partition listing.
+   * @param hoodieTable
+   * @return
+   */
+  private List<String> getPartitionPathInActiveTimeline(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) {
+    HashSet<String> partitions = new HashSet<>();
+    HoodieTimeline cowCommitTimeline = hoodieTable.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(COMMIT_ACTION)).filterCompletedInstants();
+    cowCommitTimeline.getInstants().forEach(instant -> {
+      try {
+        HoodieCommitMetadata metadata =
+                HoodieCommitMetadata.fromBytes(cowCommitTimeline.getInstantDetails(instant).get(), HoodieCommitMetadata.class);
+        partitions.addAll(metadata.getWritePartitionPaths());
+      } catch (IOException e) {
+        // ignore Exception here
+        LOG.warn("Exception while get instant details from commit metadata.", e);
+      }
+    });
+
+    LOG.info("Partitions related to active timeline: " + partitions);
+    return new ArrayList<>(partitions);
+  }
+}
\ No newline at end of file
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
index 9ce17cbb8c68..94a127ae9263 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
@@ -32,7 +32,6 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
@@ -172,24 +171,6 @@ protected void commitInternal(String instantTime, Map<MetadataPartitionType, Hoo
     metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata, dataMetaClient.getTableConfig().getMetadataPartitions()));
   }
 
-  /**
-   * Validates the timeline for both main and metadata tables to ensure compaction on MDT can be scheduled.
-   */
-  @Override
-  protected boolean validateTimelineBeforeSchedulingCompaction(Option<String> inFlightInstantTimestamp, String latestDeltaCommitTimeInMetadataTable) {
-    // Allows compaction of the metadata table to run regardless of inflight instants
-    return true;
-  }
-
-  @Override
-  protected void validateRollback(String commitToRollbackInstantTime, HoodieInstant compactionInstant, HoodieTimeline deltacommitsSinceCompaction) {
-    // ignore, flink has more radical compression strategy, it is very probably that
-    // the latest compaction instant has greater timestamp than the instant to roll back.
-
-    // The limitation can be relaxed because the log reader of MDT only accepts valid instants
-    // based on the DT timeline, so the base file of MDT does not include un-committed instants.
-  }
-
   @Override
   public void deletePartitions(String instantTime, List<MetadataPartitionType> partitions) {
     throw new HoodieNotSupportedException("Dropping metadata index not supported for Flink metadata table yet.");
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
index 0f73b0bce05d..05779339780c 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
@@ -41,7 +41,6 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
-import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieMergeHandleFactory;
@@ -64,7 +63,6 @@
 import org.apache.hudi.table.action.commit.FlinkInsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.commit.FlinkUpsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.FlinkUpsertPreppedCommitActionExecutor;
-import org.apache.hudi.table.action.commit.HoodieMergeHelper;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
 import org.slf4j.Logger;
@@ -378,7 +376,7 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context, String rollb
   }
 
   @Override
-  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex, List<String> partitionPaths) {
     throw new HoodieNotSupportedException("Metadata indexing is not supported for a Flink table yet.");
   }
 
@@ -397,6 +395,11 @@ public Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context, St
     throw new HoodieNotSupportedException("Restore is not supported yet");
   }
 
+  @Override
+  public HoodieWriteMetadata<List<WriteStatus>> managePartitionTTL(HoodieEngineContext context, String instantTime) {
+    throw new HoodieNotSupportedException("Manage partition ttl is not supported yet");
+  }
+
   @Override
   public HoodieRestoreMetadata restore(HoodieEngineContext context, String restoreInstantTimestamp, String savepointToRestoreTimestamp) {
     throw new HoodieNotSupportedException("Savepoint and restore is not supported yet");
@@ -416,20 +419,8 @@ public Iterator<List<WriteStatus>> handleUpdate(
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String instantTime,
                                                              String fileId) throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      HoodieMergeHelper.newInstance().runMerge(this, upsertHandle);
-    }
-
-    // TODO(vc): This needs to be revisited
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + upsertHandle.writeStatuses());
-    }
-
-    return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
+    runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId,
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
index 732832ae9112..750ef71bc37b 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
@@ -184,20 +184,8 @@ public Iterator<List<WriteStatus>> handleUpdate(String partitionPath, String fil
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String fileId)
       throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      HoodieMergeHelper.newInstance().runMerge(table, upsertHandle);
-    }
-
-    // TODO(vc): This needs to be revisited
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + upsertHandle.writeStatuses());
-    }
-
-    return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
+    table.runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   @Override
diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestFlinkSizeBasedClusteringPlanStrategy.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestFlinkSizeBasedClusteringPlanStrategy.java
index 97f12abf322b..50a3233bf370 100644
--- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestFlinkSizeBasedClusteringPlanStrategy.java
+++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestFlinkSizeBasedClusteringPlanStrategy.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieFlinkCopyOnWriteTable;
@@ -90,7 +91,8 @@ public void testBuildClusteringGroupsForPartitionOnlyOneFile() {
 
   private FileSlice generateFileSlice(String partitionPath, String fileId, String baseInstant) {
     FileSlice fs = new FileSlice(new HoodieFileGroupId(partitionPath, fileId), baseInstant);
-    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId)));
+    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId,
+        HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension())));
     return fs;
   }
 }
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
index 37e4ecbec36d..3fb62e2fa504 100644
--- a/hudi-client/hudi-java-client/pom.xml
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -151,9 +151,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
index af503e15c608..9a906c7e7e00 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
@@ -208,28 +208,6 @@ public List<WriteStatus> deletePrepped(List<HoodieRecord<T>> preppedRecords, fin
     return postWrite(result, instantTime, table);
   }
 
-  @Override
-  public List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> result,
-                                     String instantTime,
-                                     HoodieTable hoodieTable) {
-    if (result.getIndexLookupDuration().isPresent()) {
-      metrics.updateIndexMetrics(getOperationType().name(), result.getIndexUpdateDuration().get().toMillis());
-    }
-    if (result.isCommitted()) {
-      // Perform post commit operations.
-      if (result.getFinalizeDuration().isPresent()) {
-        metrics.updateFinalizeWriteMetrics(result.getFinalizeDuration().get().toMillis(),
-            result.getWriteStats().get().size());
-      }
-
-      postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty());
-      mayBeCleanAndArchive(hoodieTable);
-
-      emitCommitMetrics(instantTime, result.getCommitMetadata().get(), hoodieTable.getMetaClient().getCommitActionType());
-    }
-    return result.getWriteStatuses();
-  }
-
   @Override
   protected void initMetadataTable(Option<String> instantTime) {
     // Initialize Metadata Table to make sure it's bootstrapped _before_ the operation,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
index 4c080f2f6635..6e111f67da27 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
@@ -42,7 +42,6 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
-import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieMergeHandleFactory;
@@ -55,7 +54,6 @@
 import org.apache.hudi.table.action.clean.CleanPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.ClusteringPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.JavaExecuteClusteringCommitActionExecutor;
-import org.apache.hudi.table.action.commit.HoodieMergeHelper;
 import org.apache.hudi.table.action.commit.JavaBulkInsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.JavaBulkInsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.commit.JavaDeleteCommitActionExecutor;
@@ -178,6 +176,11 @@ public HoodieWriteMetadata<List<WriteStatus>> insertOverwriteTable(HoodieEngineC
         context, config, this, instantTime, records).execute();
   }
 
+  @Override
+  public HoodieWriteMetadata<List<WriteStatus>> managePartitionTTL(HoodieEngineContext context, String instantTime) {
+    throw new HoodieNotSupportedException("Manage partition ttl is not supported yet");
+  }
+
   @Override
   public Option<HoodieCompactionPlan> scheduleCompaction(HoodieEngineContext context,
                                                          String instantTime,
@@ -242,8 +245,8 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context,
   }
 
   @Override
-  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
-    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex, List<String> partitionPaths) {
+    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex, partitionPaths).execute();
   }
 
   @Override
@@ -285,20 +288,8 @@ public Iterator<List<WriteStatus>> handleUpdate(
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String instantTime,
                                                              String fileId) throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      HoodieMergeHelper.newInstance().runMerge(this, upsertHandle);
-    }
-
-    // TODO(yihua): This needs to be revisited
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + upsertHandle.writeStatuses());
-    }
-
-    return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
+    runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
index 5ce6bcccef57..84e4316d164d 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
@@ -243,18 +243,8 @@ public Iterator<List<WriteStatus>> handleUpdate(String partitionPath, String fil
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String fileId)
       throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      HoodieMergeHelper.newInstance().runMerge(table, upsertHandle);
-    }
-
-    List<WriteStatus> statuses = upsertHandle.writeStatuses();
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", " + statuses);
-    }
-    return Collections.singletonList(statuses).iterator();
+    table.runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java
index d4df65270a86..cc0478d73f53 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java
@@ -59,6 +59,7 @@
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
@@ -69,6 +70,7 @@
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestTable;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.common.testutils.InProcessTimeGenerator;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.JsonUtils;
 import org.apache.hudi.common.util.Option;
@@ -110,7 +112,6 @@
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.Time;
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.schema.MessageType;
 import org.junit.jupiter.api.AfterEach;
@@ -167,7 +168,6 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
@@ -578,8 +578,8 @@ public void testMetadataTableCompactionWithPendingInstants() throws Exception {
     doWriteOperation(testTable, "0000007", INSERT);
 
     tableMetadata = metadata(writeConfig, context);
-    // verify that compaction of metadata table does not kick in.
-    assertFalse(tableMetadata.getLatestCompactionTime().isPresent());
+    // verify that compaction of metadata table should kick in.
+    assertTrue(tableMetadata.getLatestCompactionTime().isPresent(), "Compaction of metadata table does not kick in");
 
     // move inflight to completed
     testTable.moveInflightCommitToComplete("0000003", inflightCommitMeta);
@@ -1051,7 +1051,6 @@ public void testRollbackOperationsNonPartitioned() throws Exception {
    */
   @Test
   public void testManualRollbacks() throws Exception {
-    boolean populateMateFields = false;
     init(COPY_ON_WRITE, false);
     // Setting to archive more aggressively on the Metadata Table than the Dataset
     final int maxDeltaCommitsBeforeCompaction = 4;
@@ -1082,23 +1081,17 @@ public void testManualRollbacks() throws Exception {
     }
     validateMetadata(testTable);
 
-    // We can only rollback those commits whose deltacommit have not been archived yet.
-    int numRollbacks = 0;
-    boolean exceptionRaised = false;
+    // We can only roll back those commits whose deltacommit have not been archived yet.
     List<HoodieInstant> allInstants = metaClient.reloadActiveTimeline().getCommitsTimeline().getReverseOrderedInstants().collect(Collectors.toList());
     for (HoodieInstant instantToRollback : allInstants) {
       try {
-        testTable.doRollback(instantToRollback.getTimestamp(), String.valueOf(Time.now()));
+        testTable.doRollback(instantToRollback.getTimestamp(), metaClient.createNewInstantTime());
         validateMetadata(testTable);
-        ++numRollbacks;
       } catch (HoodieMetadataException e) {
         // This is expected since we are rolling back commits that are older than the latest compaction on the MDT
         break;
       }
     }
-    // Since each rollback also creates a deltacommit, we can only support rolling back of half of the original
-    // instants present before rollback started.
-    // assertTrue(numRollbacks >= minArchiveCommitsDataset / 2, "Rollbacks of non archived instants should work");
   }
 
   /**
@@ -1178,7 +1171,7 @@ public void testMetadataBootstrapLargeCommitList(HoodieTableType tableType, bool
         doCompaction(testTable, instantTime5, nonPartitionedDataset);
       }
       // added 60s to commitTime6 to make sure it is greater than compaction instant triggered by previous commit
-      String commitTime6 = metaClient.createNewInstantTime() + + 60000L;
+      String commitTime6 = HoodieInstantTimeGenerator.instantTimePlusMillis(InProcessTimeGenerator.createNewInstantTime(), 60000L);
       doWriteOperation(testTable, commitTime6, UPSERT, nonPartitionedDataset);
       String instantTime7 = metaClient.createNewInstantTime();
       doRollback(testTable, commitTime6, instantTime7);
@@ -2301,22 +2294,21 @@ public void testErrorCases() throws Exception {
   @Test
   public void testMetadataTableWithLongLog() throws Exception {
     init(COPY_ON_WRITE, false);
-    final int maxNumDeltacommits = 3;
+    final int maxNumDeltaCommits = 3;
     writeConfig = getWriteConfigBuilder(true, true, false)
         .withMetadataConfig(HoodieMetadataConfig.newBuilder()
             .enable(true)
             .enableMetrics(false)
-            .withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltacommits + 100)
-            .withMaxNumDeltacommitsWhenPending(maxNumDeltacommits)
+            .withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltaCommits + 100)
+            .withMaxNumDeltacommitsWhenPending(maxNumDeltaCommits)
             .build()).build();
     initWriteConfigAndMetatableWriter(writeConfig, true);
     testTable.addRequestedCommit(String.format("%016d", 0));
-    for (int i = 1; i <= maxNumDeltacommits; i++) {
+    for (int i = 1; i <= maxNumDeltaCommits; i++) {
       doWriteOperation(testTable, String.format("%016d", i));
     }
-    int instant = maxNumDeltacommits + 1;
-    Throwable t = assertThrows(HoodieMetadataException.class, () -> doWriteOperation(testTable, String.format("%016d", instant)));
-    assertTrue(t.getMessage().startsWith(String.format("Metadata table's deltacommits exceeded %d: ", maxNumDeltacommits)));
+    int instant = maxNumDeltaCommits + 1;
+    assertDoesNotThrow(() -> doWriteOperation(testTable, String.format("%016d", instant)));
   }
 
   @Test
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/functional/TestHoodieJavaClientOnCopyOnWriteStorage.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/functional/TestHoodieJavaClientOnCopyOnWriteStorage.java
index a591134517fc..9749eb55e1ea 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/functional/TestHoodieJavaClientOnCopyOnWriteStorage.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/functional/TestHoodieJavaClientOnCopyOnWriteStorage.java
@@ -50,6 +50,7 @@
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.model.IOType;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.marker.MarkerType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -1526,7 +1527,7 @@ private Pair<Path, List<WriteStatus>> testConsistencyCheck(HoodieTableMetaClient
     Option<Path> markerFilePath = WriteMarkersFactory.get(
             cfg.getMarkersType(), getHoodieTable(metaClient, cfg), instantTime)
         .create(partitionPath,
-            FSUtils.makeBaseFileName(instantTime, "1-0-1", UUID.randomUUID().toString()),
+            FSUtils.makeBaseFileName(instantTime, "1-0-1", UUID.randomUUID().toString(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension()),
             IOType.MERGE);
     if (!enableOptimisticConsistencyGuard) {
       Exception e = assertThrows(HoodieCommitException.class, () -> {
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
index bda362931c7d..87e690bfca0c 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
@@ -105,7 +106,7 @@ public void testMakeNewPath() {
     }).collect(Collectors.toList()).get(0);
 
     assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath,
-        FSUtils.makeBaseFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
+        FSUtils.makeBaseFileName(instantTime, newPathWithWriteToken.getRight(), fileName, HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension())).toString());
   }
 
   private HoodieWriteConfig makeHoodieClientConfig() {
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java
index 336309deb235..8de67be6bf7d 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java
@@ -107,7 +107,7 @@ public void init(HoodieTableType tableType, Option<HoodieWriteConfig> writeConfi
     initWriteConfigAndMetatableWriter(this.writeConfig, enableMetadataTable);
   }
 
-  protected void initWriteConfigAndMetatableWriter(HoodieWriteConfig writeConfig, boolean enableMetadataTable) throws IOException {
+  protected void initWriteConfigAndMetatableWriter(HoodieWriteConfig writeConfig, boolean enableMetadataTable) {
     this.writeConfig = writeConfig;
     if (enableMetadataTable) {
       metadataWriter = JavaHoodieBackedTableMetadataWriter.create(hadoopConf, writeConfig, context, Option.empty());
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index 34b5ab6e06e6..4e7361d651a8 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -259,9 +259,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 </project>
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
index 6fdfee16bbe0..0308649dbf61 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
@@ -265,6 +265,14 @@ public HoodieWriteResult deletePartitions(List<String> partitions, String instan
     return new HoodieWriteResult(postWrite(resultRDD, instantTime, table), result.getPartitionToReplaceFileIds());
   }
 
+  public HoodieWriteResult managePartitionTTL(String instantTime) {
+    HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> table = initTable(WriteOperationType.DELETE_PARTITION, Option.ofNullable(instantTime));
+    preWrite(instantTime, WriteOperationType.DELETE_PARTITION, table.getMetaClient());
+    HoodieWriteMetadata<HoodieData<WriteStatus>> result = table.managePartitionTTL(context, instantTime);
+    HoodieWriteMetadata<JavaRDD<WriteStatus>> resultRDD = result.clone(HoodieJavaRDD.getJavaRDD(result.getWriteStatuses()));
+    return new HoodieWriteResult(postWrite(resultRDD, instantTime, table), result.getPartitionToReplaceFileIds());
+  }
+
   @Override
   protected void initMetadataTable(Option<String> instantTime) {
     // Initialize Metadata Table to make sure it's bootstrapped _before_ the operation,
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
index 097e3decc2fb..acbdbd2413c0 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
@@ -551,7 +551,7 @@ public int getBatchSize(int numRegionServersForTable, int maxQpsPerRegionServer,
       int maxReqPerSec = getMaxReqPerSec(numRSAlive, maxQpsPerRegionServer, qpsFraction);
       int numTasks = numTasksDuringPut;
       int maxParallelPutsTask = Math.max(1, Math.min(numTasks, maxExecutors));
-      int multiPutBatchSizePerSecPerTask = Math.max(1, (int) Math.ceil(maxReqPerSec / maxParallelPutsTask));
+      int multiPutBatchSizePerSecPerTask = Math.max(1, (int) Math.ceil((double) maxReqPerSec / maxParallelPutsTask));
       LOG.info("HbaseIndexThrottling: qpsFraction :" + qpsFraction);
       LOG.info("HbaseIndexThrottling: numRSAlive :" + numRSAlive);
       LOG.info("HbaseIndexThrottling: maxReqPerSec :" + maxReqPerSec);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java
index 83fe4e88737b..d06b69139059 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.io.storage;
 
+import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieIOException;
 
@@ -30,6 +31,7 @@
 
 public class HoodieSparkFileReaderFactory extends HoodieFileReaderFactory {
 
+  @Override
   public HoodieFileReader newParquetFileReader(Configuration conf, Path path) {
     conf.setIfUnset(SQLConf.PARQUET_BINARY_AS_STRING().key(), SQLConf.PARQUET_BINARY_AS_STRING().defaultValueString());
     conf.setIfUnset(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), SQLConf.PARQUET_INT96_AS_TIMESTAMP().defaultValueString());
@@ -42,12 +44,15 @@ public HoodieFileReader newParquetFileReader(Configuration conf, Path path) {
     return new HoodieSparkParquetReader(conf, path);
   }
 
-  protected HoodieFileReader newHFileFileReader(Configuration conf,
+  @Override
+  protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
+                                                Configuration conf,
                                                 Path path,
                                                 Option<Schema> schemaOption) throws IOException {
     throw new HoodieIOException("Not support read HFile");
   }
 
+  @Override
   protected HoodieFileReader newOrcFileReader(Configuration conf, Path path) {
     throw new HoodieIOException("Not support read orc file");
   }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java
index 5feefa3bee2b..7091c2b240f8 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java
@@ -34,6 +34,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.spark.sql.HoodieInternalRowUtils;
+import org.apache.spark.sql.types.StructType;
 
 import java.io.IOException;
 
@@ -44,15 +45,13 @@ protected HoodieFileWriter newParquetFileWriter(
       String instantTime, Path path, Configuration conf, HoodieConfig config, Schema schema,
       TaskContextSupplier taskContextSupplier) throws IOException {
     boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS);
-    Option<BloomFilter> filter = enableBloomFilter(populateMetaFields, config) ? Option.of(createBloomFilter(config)) : Option.empty();
     String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME);
     // Support PARQUET_COMPRESSION_CODEC_NAME is ""
     if (compressionCodecName.isEmpty()) {
       compressionCodecName = null;
     }
-    HoodieRowParquetWriteSupport writeSupport = new HoodieRowParquetWriteSupport(conf,
-        HoodieInternalRowUtils.getCachedSchema(schema), filter,
-        HoodieStorageConfig.newBuilder().fromProperties(config.getProps()).build());
+    HoodieRowParquetWriteSupport writeSupport = getHoodieRowParquetWriteSupport(conf, schema,
+        config, enableBloomFilter(populateMetaFields, config));
     HoodieRowParquetConfig parquetConfig = new HoodieRowParquetConfig(writeSupport,
         CompressionCodecName.fromConf(compressionCodecName),
         config.getIntOrDefault(HoodieStorageConfig.PARQUET_BLOCK_SIZE),
@@ -69,10 +68,7 @@ protected HoodieFileWriter newParquetFileWriter(
   protected HoodieFileWriter newParquetFileWriter(
       FSDataOutputStream outputStream, Configuration conf, HoodieConfig config, Schema schema) throws IOException {
     boolean enableBloomFilter = false;
-    Option<BloomFilter> filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty();
-    HoodieRowParquetWriteSupport writeSupport = new HoodieRowParquetWriteSupport(conf,
-        HoodieInternalRowUtils.getCachedSchema(schema), filter,
-        HoodieStorageConfig.newBuilder().fromProperties(config.getProps()).build());
+    HoodieRowParquetWriteSupport writeSupport = getHoodieRowParquetWriteSupport(conf, schema, config, enableBloomFilter);
     String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME);
     // Support PARQUET_COMPRESSION_CODEC_NAME is ""
     if (compressionCodecName.isEmpty()) {
@@ -100,4 +96,11 @@ protected HoodieFileWriter newOrcFileWriter(String instantTime, Path path, Confi
       TaskContextSupplier taskContextSupplier) throws IOException {
     throw new HoodieIOException("Not support write to Orc file");
   }
+
+  private static HoodieRowParquetWriteSupport getHoodieRowParquetWriteSupport(Configuration conf, Schema schema,
+                                                                              HoodieConfig config, boolean enableBloomFilter) {
+    Option<BloomFilter> filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty();
+    StructType structType = HoodieInternalRowUtils.getCachedSchema(schema);
+    return HoodieRowParquetWriteSupport.getHoodieRowParquetWriteSupport(conf, structType, filter, config);
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
index 8a61c7c44d90..ad362d170142 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
@@ -66,8 +66,8 @@ private static HoodieInternalRowFileWriter newParquetInternalRowFileWriter(Path
                                                                              Option<BloomFilter> bloomFilterOpt
   )
       throws IOException {
-    HoodieRowParquetWriteSupport writeSupport =
-            new HoodieRowParquetWriteSupport(table.getHadoopConf(), structType, bloomFilterOpt, writeConfig.getStorageConfig());
+    HoodieRowParquetWriteSupport writeSupport = HoodieRowParquetWriteSupport
+        .getHoodieRowParquetWriteSupport(table.getHadoopConf(), structType, bloomFilterOpt, writeConfig);
 
     return new HoodieInternalRowParquetWriter(
         path,
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
index 3a1b6d000bec..99102c309223 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
@@ -21,8 +21,11 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.avro.HoodieBloomFilterWriteSupport;
 import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.config.HoodieStorageConfig;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
+
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
 import org.apache.spark.sql.types.StructType;
@@ -41,11 +44,11 @@ public class HoodieRowParquetWriteSupport extends ParquetWriteSupport {
   private final Configuration hadoopConf;
   private final Option<HoodieBloomFilterWriteSupport<UTF8String>> bloomFilterWriteSupportOpt;
 
-  public HoodieRowParquetWriteSupport(Configuration conf, StructType structType, Option<BloomFilter> bloomFilterOpt, HoodieStorageConfig config) {
+  public HoodieRowParquetWriteSupport(Configuration conf, StructType structType, Option<BloomFilter> bloomFilterOpt, HoodieConfig config) {
     Configuration hadoopConf = new Configuration(conf);
-    hadoopConf.set("spark.sql.parquet.writeLegacyFormat", config.getString(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED));
-    hadoopConf.set("spark.sql.parquet.outputTimestampType", config.getString(HoodieStorageConfig.PARQUET_OUTPUT_TIMESTAMP_TYPE));
-    hadoopConf.set("spark.sql.parquet.fieldId.write.enabled", config.getString(PARQUET_FIELD_ID_WRITE_ENABLED));
+    hadoopConf.set("spark.sql.parquet.writeLegacyFormat", config.getStringOrDefault(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED));
+    hadoopConf.set("spark.sql.parquet.outputTimestampType", config.getStringOrDefault(HoodieStorageConfig.PARQUET_OUTPUT_TIMESTAMP_TYPE));
+    hadoopConf.set("spark.sql.parquet.fieldId.write.enabled", config.getStringOrDefault(PARQUET_FIELD_ID_WRITE_ENABLED));
     setSchema(structType, hadoopConf);
 
     this.hadoopConf = hadoopConf;
@@ -89,4 +92,12 @@ protected UTF8String dereference(UTF8String key) {
     }
   }
 
+  public static HoodieRowParquetWriteSupport getHoodieRowParquetWriteSupport(Configuration conf, StructType structType,
+                                                                             Option<BloomFilter> bloomFilterOpt, HoodieConfig config) {
+    return (HoodieRowParquetWriteSupport) ReflectionUtils.loadClass(
+        config.getStringOrDefault(HoodieStorageConfig.HOODIE_PARQUET_SPARK_ROW_WRITE_SUPPORT_CLASS),
+        new Class<?>[] {Configuration.class, StructType.class, Option.class, HoodieConfig.class},
+        conf, structType, bloomFilterOpt, config);
+  }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java
index 34d22000fb2b..1ea5adcd6b49 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java
@@ -79,7 +79,7 @@ public class HoodieSparkKeyGeneratorFactory {
 
   public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
     String keyGeneratorClass = getKeyGeneratorClassName(props);
-    boolean autoRecordKeyGen = KeyGenUtils.enableAutoGenerateRecordKeys(props)
+    boolean autoRecordKeyGen = KeyGenUtils.isAutoGeneratedRecordKeysEnabled(props)
         //Need to prevent overwriting the keygen for spark sql merge into because we need to extract
         //the recordkey from the meta cols if it exists. Sql keygen will use pkless keygen if needed.
         && !props.getBoolean(SPARK_SQL_MERGE_INTO_PREPPED_KEY, false);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
index e9d21350c212..0a533e659125 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
@@ -30,7 +30,6 @@
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
 import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.client.utils.SparkPartitionUtils;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.data.HoodieData;
@@ -47,7 +46,6 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
-import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieMergeHandleFactory;
@@ -61,7 +59,6 @@
 import org.apache.hudi.table.action.clean.CleanPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.ClusteringPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.SparkExecuteClusteringCommitActionExecutor;
-import org.apache.hudi.table.action.commit.HoodieMergeHelper;
 import org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkBulkInsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkDeleteCommitActionExecutor;
@@ -71,6 +68,7 @@
 import org.apache.hudi.table.action.commit.SparkInsertOverwriteCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkInsertOverwriteTableCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkInsertPreppedCommitActionExecutor;
+import org.apache.hudi.table.action.commit.SparkPartitionTTLActionExecutor;
 import org.apache.hudi.table.action.commit.SparkUpsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkUpsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.index.RunIndexActionExecutor;
@@ -226,6 +224,16 @@ public Option<HoodieRollbackPlan> scheduleRollback(HoodieEngineContext context,
         shouldRollbackUsingMarkers, isRestore).execute();
   }
 
+  /**
+   * Delete expired partition by config
+   * @param context HoodieEngineContext
+   * @param instantTime Instant Time for the action
+   * @return HoodieWriteMetadata
+   */
+  public HoodieWriteMetadata<HoodieData<WriteStatus>> managePartitionTTL(HoodieEngineContext context, String instantTime) {
+    return new SparkPartitionTTLActionExecutor<>(context, config, this, instantTime).execute();
+  }
+
   @Override
   public Iterator<List<WriteStatus>> handleUpdate(
       String instantTime, String partitionPath, String fileId,
@@ -237,28 +245,8 @@ public Iterator<List<WriteStatus>> handleUpdate(
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String instantTime,
                                                              String fileId) throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      if (upsertHandle.baseFileForMerge().getBootstrapBaseFile().isPresent()) {
-        Option<String[]> partitionFields = getMetaClient().getTableConfig().getPartitionFields();
-        Object[] partitionValues = SparkPartitionUtils.getPartitionFieldVals(partitionFields, upsertHandle.getPartitionPath(),
-            getMetaClient().getTableConfig().getBootstrapBasePath().get(),
-            upsertHandle.getWriterSchema(), getHadoopConf());
-        upsertHandle.setPartitionFields(partitionFields);
-        upsertHandle.setPartitionValues(partitionValues);
-      }
-      HoodieMergeHelper.newInstance().runMerge(this, upsertHandle);
-    }
-
-    // TODO(vc): This needs to be revisited
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + upsertHandle.writeStatuses());
-    }
-
-    return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
+    runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId,
@@ -299,8 +287,8 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context, String rollb
   }
 
   @Override
-  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
-    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex, List<String> partitionPaths) {
+    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex, partitionPaths).execute();
   }
 
   @Override
@@ -322,4 +310,5 @@ public HoodieRestoreMetadata restore(HoodieEngineContext context, String restore
   public Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context, String restoreInstantTimestamp, String savepointToRestoreTimestamp) {
     return new RestorePlanActionExecutor<>(context, config, this, restoreInstantTimestamp, savepointToRestoreTimestamp).execute();
   }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
index 08d8a88ae1bf..fc3aba63740e 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.table;
 
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.client.utils.SparkPartitionUtils;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
@@ -30,12 +31,15 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieMetadataException;
+import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.index.SparkHoodieIndexFactory;
+import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.table.action.commit.HoodieMergeHelper;
 import org.apache.spark.TaskContext;
 import org.apache.spark.TaskContext$;
 
@@ -125,4 +129,22 @@ public Runnable getPreExecuteRunnable() {
     final TaskContext taskContext = TaskContext.get();
     return () -> TaskContext$.MODULE$.setTaskContext(taskContext);
   }
+
+  @Override
+  public void runMerge(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String instantTime, String fileId) throws IOException {
+    if (upsertHandle.getOldFilePath() == null) {
+      throw new HoodieUpsertException("Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
+    } else {
+      if (upsertHandle.baseFileForMerge().getBootstrapBaseFile().isPresent()) {
+        Option<String[]> partitionFields = getMetaClient().getTableConfig().getPartitionFields();
+        Object[] partitionValues = SparkPartitionUtils.getPartitionFieldVals(partitionFields, upsertHandle.getPartitionPath(),
+                getMetaClient().getTableConfig().getBootstrapBasePath().get(),
+                upsertHandle.getWriterSchema(), getHadoopConf());
+        upsertHandle.setPartitionFields(partitionFields);
+        upsertHandle.setPartitionValues(partitionValues);
+      }
+      HoodieMergeHelper.newInstance().runMerge(this, upsertHandle);
+    }
+  }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BaseBootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BaseBootstrapMetadataHandler.java
index 4d6d07c9e498..ffda89d5b7fd 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BaseBootstrapMetadataHandler.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BaseBootstrapMetadataHandler.java
@@ -70,7 +70,7 @@ public BootstrapWriteStatus runMetadataBootstrap(String srcPartitionPath, String
       throw new HoodieException(e.getMessage(), e);
     }
 
-    BootstrapWriteStatus writeStatus = (BootstrapWriteStatus) bootstrapHandle.writeStatuses().get(0);
+    BootstrapWriteStatus writeStatus = (BootstrapWriteStatus) bootstrapHandle.getWriteStatuses().get(0);
     BootstrapFileMapping bootstrapFileMapping = new BootstrapFileMapping(
         config.getBootstrapSourceBasePath(), srcPartitionPath, partitionPath,
         srcFileStatus, writeStatus.getFileId());
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
index 5e379d3e9561..0fcb2359cdfc 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
@@ -20,7 +20,6 @@
 
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.clustering.update.strategy.SparkAllowUpdateStrategy;
-import org.apache.hudi.client.utils.SparkPartitionUtils;
 import org.apache.hudi.client.utils.SparkValidatorUtils;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.data.HoodieData.HoodieDataCacheKey;
@@ -349,29 +348,8 @@ public Iterator<List<WriteStatus>> handleUpdate(String partitionPath, String fil
 
   protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?, ?, ?, ?> upsertHandle, String fileId)
       throws IOException {
-    if (upsertHandle.getOldFilePath() == null) {
-      throw new HoodieUpsertException(
-          "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
-    } else {
-      if (upsertHandle.baseFileForMerge().getBootstrapBaseFile().isPresent()) {
-        Option<String[]> partitionFields = table.getMetaClient().getTableConfig().getPartitionFields();
-        Object[] partitionValues = SparkPartitionUtils.getPartitionFieldVals(partitionFields, upsertHandle.getPartitionPath(),
-            table.getMetaClient().getTableConfig().getBootstrapBasePath().get(),
-            upsertHandle.getWriterSchema(), table.getHadoopConf());
-        upsertHandle.setPartitionFields(partitionFields);
-        upsertHandle.setPartitionValues(partitionValues);
-      }
-
-      HoodieMergeHelper.newInstance().runMerge(table, upsertHandle);
-    }
-
-    // TODO(vc): This needs to be revisited
-    if (upsertHandle.getPartitionPath() == null) {
-      LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + upsertHandle.writeStatuses());
-    }
-
-    return Collections.singletonList(upsertHandle.writeStatuses()).iterator();
+    table.runMerge(upsertHandle, instantTime, fileId);
+    return upsertHandle.getWriteStatusesAsIterator();
   }
 
   protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkPartitionTTLActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkPartitionTTLActionExecutor.java
new file mode 100644
index 000000000000..166fc3672fa0
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkPartitionTTLActionExecutor.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.ttl.strategy.HoodiePartitionTTLStrategyFactory;
+import org.apache.hudi.table.action.ttl.strategy.PartitionTTLStrategy;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+
+public class SparkPartitionTTLActionExecutor<T>
+    extends BaseSparkCommitActionExecutor<T> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ConsistentBucketBulkInsertDataInternalWriterHelper.class);
+
+  public SparkPartitionTTLActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable table,
+                                         String instantTime) {
+    super(context, config, table, instantTime, WriteOperationType.DELETE_PARTITION);
+  }
+
+  @Override
+  public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
+    try {
+      PartitionTTLStrategy strategy = HoodiePartitionTTLStrategyFactory.createStrategy(table, config.getProps(), instantTime);
+      List<String> expiredPartitions = strategy.getExpiredPartitionPaths();
+      LOG.info("Partition ttl find the following expired partitions to delete:  " + String.join(",", expiredPartitions));
+      return new SparkDeletePartitionCommitActionExecutor<>(context, config, table, instantTime, expiredPartitions).execute();
+    } catch (IOException e) {
+      throw new HoodieIOException("Error executing hoodie partition ttl: ", e);
+    }
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
index edd6d981d185..2b78df96765e 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
@@ -97,7 +97,8 @@ public UpsertPartitioner(WorkloadProfile profile, HoodieEngineContext context, H
     assignUpdates(profile);
     assignInserts(profile, context);
 
-    LOG.info("Total Buckets: " + totalBuckets);
+    LOG.info("Total Buckets: {}, bucketInfoMap size: {}, partitionPathToInsertBucketInfos size: {}, updateLocationToBucket size: {}",
+        totalBuckets, bucketInfoMap.size(), partitionPathToInsertBucketInfos.size(), updateLocationToBucket.size());
     if (LOG.isDebugEnabled()) {
       LOG.debug("Buckets info => " + bucketInfoMap + ", \n"
               + "Partition to insert buckets => " + partitionPathToInsertBucketInfos + ", \n"
@@ -189,6 +190,7 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
 
         this.smallFiles.addAll(smallFiles);
 
+        LOG.info("For partitionPath : " + partitionPath + " Total Small Files => " + smallFiles.size());
         LOG.debug("For partitionPath : " + partitionPath + " Small Files => " + smallFiles);
 
         long totalUnassignedInserts = pStat.getNumInserts();
@@ -230,7 +232,7 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
           }
 
           int insertBuckets = (int) Math.ceil((1.0 * totalUnassignedInserts) / insertRecordsPerBucket);
-          LOG.debug("After small file assignment: unassignedInserts => " + totalUnassignedInserts
+          LOG.info("After small file assignment: unassignedInserts => " + totalUnassignedInserts
               + ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => " + insertRecordsPerBucket);
           for (int b = 0; b < insertBuckets; b++) {
             bucketNumbers.add(totalBuckets);
@@ -258,7 +260,7 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
           currentCumulativeWeight += bkt.weight;
           insertBuckets.add(new InsertBucketCumulativeWeightPair(bkt, currentCumulativeWeight));
         }
-        LOG.debug("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
+        LOG.info("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
         partitionPathToInsertBucketInfos.put(partitionPath, insertBuckets);
       }
       if (profile.hasOutputWorkLoadStats()) {
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
index 55877938f8cb..95962d1ca443 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
@@ -23,6 +23,7 @@ import org.apache.avro.generic.GenericRecord
 import org.apache.avro.{JsonProperties, Schema}
 import org.apache.hudi.HoodieSparkUtils.sparkAdapter
 import org.apache.hudi.avro.AvroSchemaUtils
+import org.apache.hudi.exception.SchemaCompatibilityException
 import org.apache.hudi.internal.schema.HoodieSchemaException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -58,9 +59,16 @@ object AvroConversionUtils {
    */
   def createInternalRowToAvroConverter(rootCatalystType: StructType, rootAvroType: Schema, nullable: Boolean): InternalRow => GenericRecord = {
     val serializer = sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
-    row => serializer
-      .serialize(row)
-      .asInstanceOf[GenericRecord]
+    row => {
+      try {
+        serializer
+          .serialize(row)
+          .asInstanceOf[GenericRecord]
+      } catch {
+        case e: HoodieSchemaException => throw e
+        case e => throw new SchemaCompatibilityException("Failed to convert spark record into avro record", e)
+      }
+    }
   }
 
   /**
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala
index 0214b0a10302..3c30d825ebf8 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala
@@ -17,6 +17,7 @@
 
 package org.apache.hudi
 
+import org.apache.hudi.HoodieSparkUtils.injectSQLConf
 import org.apache.hudi.client.WriteStatus
 import org.apache.hudi.client.model.HoodieInternalRow
 import org.apache.hudi.common.config.TypedProperties
@@ -25,6 +26,7 @@ import org.apache.hudi.common.engine.TaskContextSupplier
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.util.ReflectionUtils
 import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.data.HoodieJavaRDD
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.index.HoodieIndex.BucketIndexEngineType
 import org.apache.hudi.index.{HoodieIndex, SparkHoodieIndexFactory}
@@ -40,11 +42,14 @@ import org.apache.spark.sql.HoodieUnsafeUtils.getNumPartitions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.execution.SQLConfInjectingRDD
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.sql.{DataFrame, Dataset, HoodieUnsafeUtils, Row}
 import org.apache.spark.unsafe.types.UTF8String
 
 import scala.collection.JavaConverters.{asScalaBufferConverter, seqAsJavaListConverter}
+import scala.reflect.ClassTag
 
 object HoodieDatasetBulkInsertHelper
   extends ParallelismHelper[DataFrame](toJavaSerializableFunctionUnchecked(df => getNumPartitions(df))) with Logging {
@@ -65,7 +70,7 @@ object HoodieDatasetBulkInsertHelper
                            instantTime: String): Dataset[Row] = {
     val populateMetaFields = config.populateMetaFields()
     val schema = df.schema
-    val autoGenerateRecordKeys = KeyGenUtils.enableAutoGenerateRecordKeys(config.getProps)
+    val autoGenerateRecordKeys = KeyGenUtils.isAutoGeneratedRecordKeysEnabled(config.getProps)
 
     val metaFields = Seq(
       StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, StringType),
@@ -83,8 +88,8 @@ object HoodieDatasetBulkInsertHelper
       val keyGeneratorClassName = config.getStringOrThrow(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME,
         "Key-generator class name is required")
 
-      val prependedRdd: RDD[InternalRow] =
-        df.queryExecution.toRdd.mapPartitions { iter =>
+      val prependedRdd: RDD[InternalRow] = {
+        injectSQLConf(df.queryExecution.toRdd.mapPartitions { iter =>
           val typedProps = new TypedProperties(config.getProps)
           if (autoGenerateRecordKeys) {
             typedProps.setProperty(KeyGenUtils.RECORD_KEY_GEN_PARTITION_ID_CONFIG, String.valueOf(TaskContext.getPartitionId()))
@@ -110,7 +115,8 @@ object HoodieDatasetBulkInsertHelper
             // TODO use mutable row, avoid re-allocating
             new HoodieInternalRow(commitTimestamp, commitSeqNo, recordKey, partitionPath, filename, row, false)
           }
-        }
+        }, SQLConf.get)
+      }
 
       val dedupedRdd = if (config.shouldCombineBeforeInsert) {
         dedupeRows(prependedRdd, updatedSchema, config.getPreCombineField, SparkHoodieIndexFactory.isGlobalIndex(config), targetParallelism)
@@ -144,53 +150,53 @@ object HoodieDatasetBulkInsertHelper
                  arePartitionRecordsSorted: Boolean,
                  shouldPreserveHoodieMetadata: Boolean): HoodieData[WriteStatus] = {
     val schema = dataset.schema
-    val writeStatuses = dataset.queryExecution.toRdd.mapPartitions(iter => {
-      val taskContextSupplier: TaskContextSupplier = table.getTaskContextSupplier
-      val taskPartitionId = taskContextSupplier.getPartitionIdSupplier.get
-      val taskId = taskContextSupplier.getStageIdSupplier.get.toLong
-      val taskEpochId = taskContextSupplier.getAttemptIdSupplier.get
-
-      val writer = writeConfig.getIndexType match {
-        case HoodieIndex.IndexType.BUCKET if writeConfig.getBucketIndexEngineType
-          == BucketIndexEngineType.CONSISTENT_HASHING =>
-          new ConsistentBucketBulkInsertDataInternalWriterHelper(
-            table,
-            writeConfig,
-            instantTime,
-            taskPartitionId,
-            taskId,
-            taskEpochId,
-            schema,
-            writeConfig.populateMetaFields,
-            arePartitionRecordsSorted,
-            shouldPreserveHoodieMetadata)
-        case _ =>
-          new BulkInsertDataInternalWriterHelper(
-            table,
-            writeConfig,
-            instantTime,
-            taskPartitionId,
-            taskId,
-            taskEpochId,
-            schema,
-            writeConfig.populateMetaFields,
-            arePartitionRecordsSorted,
-            shouldPreserveHoodieMetadata)
-      }
+    HoodieJavaRDD.of(
+      injectSQLConf(dataset.queryExecution.toRdd.mapPartitions(iter => {
+        val taskContextSupplier: TaskContextSupplier = table.getTaskContextSupplier
+        val taskPartitionId = taskContextSupplier.getPartitionIdSupplier.get
+        val taskId = taskContextSupplier.getStageIdSupplier.get.toLong
+        val taskEpochId = taskContextSupplier.getAttemptIdSupplier.get
 
-      try {
-        iter.foreach(writer.write)
-      } catch {
-        case t: Throwable =>
-          writer.abort()
-          throw t
-      } finally {
-        writer.close()
-      }
+        val writer = writeConfig.getIndexType match {
+          case HoodieIndex.IndexType.BUCKET if writeConfig.getBucketIndexEngineType
+            == BucketIndexEngineType.CONSISTENT_HASHING =>
+            new ConsistentBucketBulkInsertDataInternalWriterHelper(
+              table,
+              writeConfig,
+              instantTime,
+              taskPartitionId,
+              taskId,
+              taskEpochId,
+              schema,
+              writeConfig.populateMetaFields,
+              arePartitionRecordsSorted,
+              shouldPreserveHoodieMetadata)
+          case _ =>
+            new BulkInsertDataInternalWriterHelper(
+              table,
+              writeConfig,
+              instantTime,
+              taskPartitionId,
+              taskId,
+              taskEpochId,
+              schema,
+              writeConfig.populateMetaFields,
+              arePartitionRecordsSorted,
+              shouldPreserveHoodieMetadata)
+        }
+
+        try {
+          iter.foreach(writer.write)
+        } catch {
+          case t: Throwable =>
+            writer.abort()
+            throw t
+        } finally {
+          writer.close()
+        }
 
-      writer.getWriteStatuses.asScala.iterator
-    }).collect()
-    table.getContext.parallelize(writeStatuses.toList.asJava)
+        writer.getWriteStatuses.asScala.iterator
+      }), SQLConf.get).toJavaRDD())
   }
 
   private def dedupeRows(rdd: RDD[InternalRow], schema: StructType, preCombineFieldRef: String, isGlobalIndex: Boolean, targetParallelism: Int): RDD[InternalRow] = {
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
index 975135c13d58..3393da6bd83c 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
@@ -18,25 +18,25 @@
 
 package org.apache.hudi
 
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.hadoop.fs.Path
 import org.apache.hudi.HoodieConversionUtils.toScalaOption
 import org.apache.hudi.avro.{AvroSchemaUtils, HoodieAvroUtils}
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.hadoop.fs.CachingPath
-
-import org.apache.avro.Schema
-import org.apache.avro.generic.GenericRecord
-import org.apache.hadoop.fs.Path
+import org.apache.hudi.util.ExceptionWrappingIterator
 import org.apache.spark.SPARK_VERSION
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
 import org.apache.spark.sql.execution.SQLConfInjectingRDD
 import org.apache.spark.sql.execution.datasources.SparkParsePartitionUtil
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, HoodieUnsafeUtils}
 import org.apache.spark.unsafe.types.UTF8String
 
 import scala.collection.JavaConverters._
@@ -128,9 +128,19 @@ object HoodieSparkUtils extends SparkAdapterSupport with SparkVersionsSupport wi
     }, SQLConf.get)
   }
 
-  private def injectSQLConf[T: ClassTag](rdd: RDD[T], conf: SQLConf): RDD[T] =
+  def injectSQLConf[T: ClassTag](rdd: RDD[T], conf: SQLConf): RDD[T] =
     new SQLConfInjectingRDD(rdd, conf)
 
+  def maybeWrapDataFrameWithException(df: DataFrame, exceptionClass: String, msg: String, shouldWrap: Boolean): DataFrame = {
+    if (shouldWrap) {
+      HoodieUnsafeUtils.createDataFrameFromRDD(df.sparkSession, injectSQLConf(df.queryExecution.toRdd.mapPartitions {
+        rows => new ExceptionWrappingIterator[InternalRow](rows, exceptionClass, msg)
+      }, SQLConf.get), df.schema)
+    } else {
+      df
+    }
+  }
+
   def safeCreateRDD(df: DataFrame, structName: String, recordNamespace: String, reconcileToLatestSchema: Boolean,
                     latestTableSchema: org.apache.hudi.common.util.Option[Schema] = org.apache.hudi.common.util.Option.empty()):
   Tuple2[RDD[GenericRecord], RDD[String]] = {
@@ -213,7 +223,7 @@ object HoodieSparkUtils extends SparkAdapterSupport with SparkVersionsSupport wi
         val transform: GenericRecord => Either[GenericRecord, String] = record => try {
           Left(HoodieAvroUtils.rewriteRecordDeep(record, schema, true))
         } catch {
-          case _: Throwable => Right(HoodieAvroUtils.avroToJsonString(record, false))
+          case _: Throwable => Right(HoodieAvroUtils.safeAvroToJsonString(record))
         }
         recs.map(transform)
       }
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/ExceptionWrappingIterator.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/ExceptionWrappingIterator.scala
new file mode 100644
index 000000000000..994e6f0eea2d
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/ExceptionWrappingIterator.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.util
+
+import org.apache.hudi.common.util.ReflectionUtils
+
+/**
+ * Used to catch exceptions from an iterator
+ * @param in iterator to catch exceptions from
+ * @param exceptionClass name of exception class to throw when an exception is thrown during iteration
+ * @param msg message the thrown exception should have
+ */
+class ExceptionWrappingIterator[T](val in: Iterator[T], val exceptionClass: String, val msg: String) extends Iterator[T] {
+  override def hasNext: Boolean = try in.hasNext
+  catch {
+    case e: Throwable => throw createException(e)
+  }
+
+  override def next: T = try in.next
+  catch {
+    case e: Throwable => throw createException(e)
+  }
+
+  private def createException(e: Throwable): Throwable = {
+    ReflectionUtils.loadClass(exceptionClass, Array(classOf[String], classOf[Throwable]).asInstanceOf[Array[Class[_]]], msg, e).asInstanceOf[Throwable]
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
index b9110f1ed93b..40e62ddd0efc 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
@@ -144,7 +144,7 @@ trait HoodieCatalystPlansUtils {
   def createMITJoin(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, condition: Option[Expression], hint: String): LogicalPlan
 
   /**
-   * true if both plans produce the same attributes in the the same order
+   * true if both plans produce the same attributes in the same order
    */
   def produceSameOutput(a: LogicalPlan, b: LogicalPlan): Boolean
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestPartitionTTLManagement.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestPartitionTTLManagement.java
new file mode 100644
index 000000000000..46677c9aaa75
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestPartitionTTLManagement.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.config.HoodieStorageConfig;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.table.view.FileSystemViewStorageType;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieTTLConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.table.action.ttl.strategy.PartitionTTLStrategyType;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
+
+import com.github.davidmoten.guavamini.Sets;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.getCommitTimeAtUTC;
+
+/**
+ * Test Cases for partition ttl management.
+ */
+public class TestPartitionTTLManagement extends HoodieClientTestBase {
+
+  protected HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) {
+    return HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(TRIP_EXAMPLE_SCHEMA)
+        .withParallelism(2, 2)
+        .withAutoCommit(autoCommit)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
+            .withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder()
+            .hfileMaxFileSize(1024 * 1024 * 1024).parquetMaxFileSize(1024 * 1024 * 1024).orcMaxFileSize(1024 * 1024 * 1024).build())
+        .forTable("test-trip-table")
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
+            .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build());
+  }
+
+  @Test
+  public void testKeepByCreationTime() {
+    final HoodieWriteConfig cfg = getConfigBuilder(true)
+        .withPath(metaClient.getBasePathV2().toString())
+        .withTTLConfig(HoodieTTLConfig
+            .newBuilder()
+            .withTTLDaysRetain(10)
+            .withTTLStrategyType(PartitionTTLStrategyType.KEEP_BY_CREATION_TIME)
+            .build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().build())
+        .build();
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED);
+    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
+      String partitionPath0 = dataGen.getPartitionPaths()[0];
+      String instant0 = getCommitTimeAtUTC(0);
+      writeRecordsForPartition(client, dataGen, partitionPath0, instant0);
+
+      String instant1 = getCommitTimeAtUTC(1000);
+      String partitionPath1 = dataGen.getPartitionPaths()[1];
+      writeRecordsForPartition(client, dataGen, partitionPath1, instant1);
+
+      String currentInstant = client.createNewInstantTime();
+      String partitionPath2 = dataGen.getPartitionPaths()[2];
+      writeRecordsForPartition(client, dataGen, partitionPath2, currentInstant);
+
+      HoodieWriteResult result = client.managePartitionTTL(client.createNewInstantTime());
+
+      Assertions.assertEquals(Sets.newHashSet(partitionPath0, partitionPath1), result.getPartitionToReplaceFileIds().keySet());
+      Assertions.assertEquals(10, readRecords(new String[] {partitionPath0, partitionPath1, partitionPath2}).size());
+    }
+  }
+
+  @Test
+  public void testKeepByTime() {
+    final HoodieWriteConfig cfg = getConfigBuilder(true)
+        .withPath(metaClient.getBasePathV2().toString())
+        .withTTLConfig(HoodieTTLConfig
+            .newBuilder()
+            .withTTLDaysRetain(10)
+            .withTTLStrategyType(PartitionTTLStrategyType.KEEP_BY_TIME)
+            .build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().build())
+        .build();
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED);
+    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
+      String partitionPath0 = dataGen.getPartitionPaths()[0];
+      String instant0 = getCommitTimeAtUTC(0);
+      writeRecordsForPartition(client, dataGen, partitionPath0, instant0);
+
+      String instant1 = getCommitTimeAtUTC(1000);
+      String partitionPath1 = dataGen.getPartitionPaths()[1];
+      writeRecordsForPartition(client, dataGen, partitionPath1, instant1);
+
+      String currentInstant = client.createNewInstantTime();
+      String partitionPath2 = dataGen.getPartitionPaths()[2];
+      writeRecordsForPartition(client, dataGen, partitionPath2, currentInstant);
+
+      HoodieWriteResult result = client.managePartitionTTL(client.createNewInstantTime());
+
+      Assertions.assertEquals(Sets.newHashSet(partitionPath0, partitionPath1), result.getPartitionToReplaceFileIds().keySet());
+
+      // remain 10 rows
+      Assertions.assertEquals(10, readRecords(new String[] {partitionPath0, partitionPath1, partitionPath2}).size());
+    }
+  }
+
+  @Test
+  public void testInlinePartitionTTL() {
+    final HoodieWriteConfig cfg = getConfigBuilder(true)
+        .withPath(metaClient.getBasePathV2().toString())
+        .withTTLConfig(HoodieTTLConfig
+            .newBuilder()
+            .withTTLDaysRetain(10)
+            .withTTLStrategyType(PartitionTTLStrategyType.KEEP_BY_TIME)
+            .enableInlinePartitionTTL(true)
+            .build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().build())
+        .build();
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED);
+    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
+      String partitionPath0 = dataGen.getPartitionPaths()[0];
+      String instant0 = getCommitTimeAtUTC(0);
+      writeRecordsForPartition(client, dataGen, partitionPath0, instant0);
+
+      // All records will be deleted
+      Assertions.assertEquals(0, readRecords(new String[] {partitionPath0}).size());
+
+      String instant1 = getCommitTimeAtUTC(1000);
+      String partitionPath1 = dataGen.getPartitionPaths()[1];
+      writeRecordsForPartition(client, dataGen, partitionPath1, instant1);
+
+      // All records will be deleted
+      Assertions.assertEquals(0, readRecords(new String[] {partitionPath1}).size());
+
+      String currentInstant = client.createNewInstantTime();
+      String partitionPath2 = dataGen.getPartitionPaths()[2];
+      writeRecordsForPartition(client, dataGen, partitionPath2, currentInstant);
+
+      // remain 10 rows
+      Assertions.assertEquals(10, readRecords(new String[] {partitionPath2}).size());
+    }
+  }
+
+  private void writeRecordsForPartition(SparkRDDWriteClient client, HoodieTestDataGenerator dataGen, String partition, String instantTime) {
+    List<HoodieRecord> records = dataGen.generateInsertsForPartition(instantTime, 10, partition);
+    client.startCommitWithTime(instantTime);
+    JavaRDD writeStatuses = client.insert(jsc.parallelize(records, 1), instantTime);
+    client.commit(instantTime, writeStatuses);
+  }
+
+  private List<GenericRecord> readRecords(String[] partitions) {
+    return HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf,
+        Arrays.stream(partitions).map(p -> Paths.get(basePath, p).toString()).collect(Collectors.toList()),
+        basePath, new JobConf(hadoopConf), true, false);
+  }
+
+}
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index 32264bbf35fa..a5e4e6e2ee96 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -62,6 +62,7 @@
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
@@ -119,7 +120,6 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.Time;
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.schema.MessageType;
 import org.apache.spark.api.java.JavaRDD;
@@ -167,6 +167,7 @@
 import static org.apache.hudi.common.model.WriteOperationType.INSERT;
 import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
 import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_EXTENSION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.DELTA_COMMIT_EXTENSION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.INFLIGHT_EXTENSION;
@@ -824,6 +825,43 @@ public void testVirtualKeysInBaseFiles() throws Exception {
     });
   }
 
+  /**
+   * Test MDT compaction with delayed pending instants on DT(induced by multi-writer or async table services).
+   *
+   * <p>A demo:
+   * <pre>
+   *   Time t1
+   *
+   *   Main                 Metadata
+   *   c1.commit    ->   c1.deltacommit
+   *   c2.commit.   ->   c2.deltacommit
+   *   c3.inflight  ->
+   *   c4.commit    ->   c4.deltacommit
+   *
+   *   c5.requested ->   c6.compaction
+   *
+   *
+   *   MDT files:
+   *   F1.c1 (base) -> log(c2) -> log(c4)
+   *   F1.c6 (base)
+   *
+   *
+   *   Time t2 (Now c3 is completed)
+   *
+   *   Main                 Metadata
+   *   c1.commit    ->   c1.deltacommit
+   *   c2.commit.   ->   c2.deltacommit
+   *   c3.commit    ->   c3.deltacommit
+   *   c4.commit    ->   c4.deltacommit
+   *
+   *   c5.requested -> c6.compaction
+   *
+   *
+   *   MDT files:
+   *   F1.c1 (base) -> log(c2) -> log(c4)
+   *   F1.c6 (base) -> log(c3)
+   * </pre>
+   */
   @Test
   public void testMetadataTableCompactionWithPendingInstants() throws Exception {
     init(COPY_ON_WRITE, false);
@@ -853,8 +891,8 @@ public void testMetadataTableCompactionWithPendingInstants() throws Exception {
     doWriteOperation(testTable, "0000007", INSERT);
 
     tableMetadata = metadata(writeConfig, context);
-    // verify that compaction of metadata table does not kick in.
-    assertFalse(tableMetadata.getLatestCompactionTime().isPresent());
+    // verify that compaction of metadata table should kick in.
+    assertTrue(tableMetadata.getLatestCompactionTime().isPresent(), "Compaction of metadata table should kick in");
 
     // move inflight to completed
     testTable.moveInflightCommitToComplete("0000003", inflightCommitMeta);
@@ -1098,7 +1136,7 @@ private void revertTableToInflightState(HoodieWriteConfig writeConfig) throws IO
     // Transition the second init commit for record_index partition to inflight in MDT
     deleteMetaFile(metaClient.getFs(), mdtBasePath, mdtInitCommit2, DELTA_COMMIT_EXTENSION);
     metaClient.getTableConfig().setMetadataPartitionState(
-        metaClient, MetadataPartitionType.RECORD_INDEX, false);
+        metaClient, MetadataPartitionType.RECORD_INDEX.getPartitionPath(), false);
     metaClient.getTableConfig().setMetadataPartitionsInflight(
         metaClient, MetadataPartitionType.RECORD_INDEX);
     timeline = metaClient.getActiveTimeline().reload();
@@ -1445,7 +1483,6 @@ public void testRollbackOperationsNonPartitioned() throws Exception {
    */
   @Test
   public void testManualRollbacks() throws Exception {
-    boolean populateMateFields = false;
     init(COPY_ON_WRITE, false);
     // Setting to archive more aggressively on the Metadata Table than the Dataset
     final int maxDeltaCommitsBeforeCompaction = 4;
@@ -1476,23 +1513,17 @@ public void testManualRollbacks() throws Exception {
     }
     validateMetadata(testTable);
 
-    // We can only rollback those commits whose deltacommit have not been archived yet.
-    int numRollbacks = 0;
-    boolean exceptionRaised = false;
+    // We can only roll back those commits whose deltacommit have not been archived yet.
     List<HoodieInstant> allInstants = metaClient.reloadActiveTimeline().getCommitsTimeline().getReverseOrderedInstants().collect(Collectors.toList());
     for (HoodieInstant instantToRollback : allInstants) {
       try {
-        testTable.doRollback(instantToRollback.getTimestamp(), String.valueOf(Time.now()));
+        testTable.doRollback(instantToRollback.getTimestamp(), metaClient.createNewInstantTime());
         validateMetadata(testTable);
-        ++numRollbacks;
       } catch (HoodieMetadataException e) {
         // This is expected since we are rolling back commits that are older than the latest compaction on the MDT
         break;
       }
     }
-    // Since each rollback also creates a deltacommit, we can only support rolling back of half of the original
-    // instants present before rollback started.
-    // assertTrue(numRollbacks >= minArchiveCommitsDataset / 2, "Rollbacks of non archived instants should work");
   }
 
   /**
@@ -1559,23 +1590,18 @@ public void testMetadataBootstrapLargeCommitList(HoodieTableType tableType, bool
       testTable.setNonPartitioned();
     }
     for (int i = 1; i < 25; i += 7) {
-      long commitTime1 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-      long commitTime2 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-      long commitTime3 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-      long commitTime4 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-      doWriteOperation(testTable, Long.toString(commitTime1), INSERT, nonPartitionedDataset);
-      doWriteOperation(testTable, Long.toString(commitTime2), UPSERT, nonPartitionedDataset);
-      doClean(testTable, Long.toString(commitTime3), Arrays.asList(Long.toString(commitTime1)));
-      doWriteOperation(testTable, Long.toString(commitTime4), UPSERT, nonPartitionedDataset);
+      String commitTime1 = InProcessTimeGenerator.createNewInstantTime();
+      doWriteOperation(testTable, commitTime1, INSERT, nonPartitionedDataset);
+      doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(), UPSERT, nonPartitionedDataset);
+      doClean(testTable, InProcessTimeGenerator.createNewInstantTime(), Collections.singletonList(commitTime1));
+      doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(), UPSERT, nonPartitionedDataset);
       if (tableType == MERGE_ON_READ) {
-        long commitTime5 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-        doCompaction(testTable, Long.toString(commitTime5), nonPartitionedDataset);
+        doCompaction(testTable, InProcessTimeGenerator.createNewInstantTime(), nonPartitionedDataset);
       }
       // added 60s to commitTime6 to make sure it is greater than compaction instant triggered by previous commit
-      long commitTime6 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime()) + 60000L;
-      doWriteOperation(testTable, Long.toString(commitTime6), UPSERT, nonPartitionedDataset);
-      long commitTime7 = Long.parseLong(InProcessTimeGenerator.createNewInstantTime());
-      doRollback(testTable, Long.toString(commitTime6), Long.toString(commitTime7));
+      String commitTime6 = HoodieInstantTimeGenerator.instantTimePlusMillis(InProcessTimeGenerator.createNewInstantTime(), 60000L);
+      doWriteOperation(testTable, commitTime6, UPSERT, nonPartitionedDataset);
+      doRollback(testTable, commitTime6, InProcessTimeGenerator.createNewInstantTime());
     }
     validateMetadata(testTable, emptyList(), nonPartitionedDataset);
   }
@@ -2883,22 +2909,54 @@ public void testErrorCases() throws Exception {
   @Test
   public void testMetadataTableWithLongLog() throws Exception {
     init(COPY_ON_WRITE, false);
-    final int maxNumDeltacommits = 3;
+    final int maxNumDeltaCommits = 3;
     writeConfig = getWriteConfigBuilder(true, true, false)
         .withMetadataConfig(HoodieMetadataConfig.newBuilder()
             .enable(true)
             .enableMetrics(false)
-            .withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltacommits + 100)
-            .withMaxNumDeltacommitsWhenPending(maxNumDeltacommits)
+            .withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltaCommits + 100)
+            .withMaxNumDeltacommitsWhenPending(maxNumDeltaCommits)
             .build()).build();
     initWriteConfigAndMetatableWriter(writeConfig, true);
     testTable.addRequestedCommit(String.format("%016d", 0));
-    for (int i = 1; i <= maxNumDeltacommits; i++) {
+    for (int i = 1; i <= maxNumDeltaCommits; i++) {
       doWriteOperation(testTable, String.format("%016d", i));
     }
-    int instant = maxNumDeltacommits + 1;
-    Throwable t = assertThrows(HoodieMetadataException.class, () -> doWriteOperation(testTable, String.format("%016d", instant)));
-    assertTrue(t.getMessage().startsWith(String.format("Metadata table's deltacommits exceeded %d: ", maxNumDeltacommits)));
+    int instant = maxNumDeltaCommits + 1;
+    assertDoesNotThrow(() -> doWriteOperation(testTable, String.format("%016d", instant)));
+  }
+
+  @Test
+  public void testMORCheckNumDeltaCommits() throws Exception {
+    init(MERGE_ON_READ, true);
+    final int maxNumDeltaCommits = 3;
+    writeConfig = getWriteConfigBuilder(true, true, false)
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+                    .enable(true)
+                    .enableMetrics(false)
+                    .withMaxNumDeltaCommitsBeforeCompaction(maxNumDeltaCommits - 1)
+                    .withMaxNumDeltacommitsWhenPending(maxNumDeltaCommits)
+                    .build())
+            .build();
+    initWriteConfigAndMetatableWriter(writeConfig, true);
+    // write deltacommits to data-table and do compaction in metadata-table (with commit-instant)
+    doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(1));
+    doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(1));
+    // ensure the compaction is triggered and executed
+    try (HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(context, writeConfig.getMetadataConfig(), writeConfig.getBasePath(), true)) {
+      HoodieTableMetaClient metadataMetaClient = metadata.getMetadataMetaClient();
+      final HoodieActiveTimeline activeTimeline = metadataMetaClient.reloadActiveTimeline();
+      Option<HoodieInstant> lastCompaction = activeTimeline.filterCompletedInstants()
+              .filter(s -> s.getAction().equals(COMMIT_ACTION)).lastInstant();
+      assertTrue(lastCompaction.isPresent());
+      // create pending instant in data table
+      testTable.addRequestedCommit(InProcessTimeGenerator.createNewInstantTime(1));
+      // continue writing
+      for (int i = 0; i <= maxNumDeltaCommits; i++) {
+        doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(1));
+      }
+      assertDoesNotThrow(() -> doWriteOperation(testTable, InProcessTimeGenerator.createNewInstantTime(1)));
+    }
   }
 
   @Test
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
index 1a268675ac75..8ca0d4e16a96 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
@@ -19,10 +19,14 @@
 package org.apache.hudi.client.functional;
 
 import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
@@ -32,8 +36,12 @@
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.block.HoodieDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.TableFileSystemView;
 import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.CleanerUtils;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -43,7 +51,6 @@
 import org.apache.hudi.metadata.HoodieMetadataLogRecordReader;
 import org.apache.hudi.metadata.HoodieMetadataPayload;
 import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
-import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 
@@ -66,6 +73,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -76,8 +84,12 @@
 
 import static java.util.Arrays.asList;
 import static java.util.Collections.emptyList;
+import static org.apache.hudi.common.model.WriteOperationType.BULK_INSERT;
+import static org.apache.hudi.common.model.WriteOperationType.COMPACT;
 import static org.apache.hudi.common.model.WriteOperationType.INSERT;
 import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.CLEAN_ACTION;
+import static org.apache.hudi.metadata.MetadataPartitionType.FILES;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -285,6 +297,112 @@ public void testMetadataRecordKeyExcludeFromPayload(final HoodieTableType tableT
     validateMetadata(testTable);
   }
 
+  /**
+   * This tests the case where the two clean actions delete the same file and commit
+   * to the metadata table. The metadata table should not contain the deleted file afterwards.
+   * A new cleaner plan may contain the same file to delete if the previous cleaner
+   * plan has not been successfully executed before the new one is scheduled.
+   */
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testRepeatedCleanActionsWithMetadataTableEnabled(final HoodieTableType tableType) throws Exception {
+    initPath();
+    writeConfig = getWriteConfigBuilder(true, true, false)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .withMaxNumDeltaCommitsBeforeCompaction(4)
+            .build())
+        .build();
+    init(tableType, writeConfig);
+    String partition = "p1";
+    // Simulate two bulk insert operations adding two data files in partition "p1"
+    String instant1 = metaClient.createNewInstantTime();
+    HoodieCommitMetadata commitMetadata1 =
+        testTable.doWriteOperation(instant1, BULK_INSERT, emptyList(), asList(partition), 1);
+    String instant2 = metaClient.createNewInstantTime();
+    HoodieCommitMetadata commitMetadata2 =
+        testTable.doWriteOperation(instant2, BULK_INSERT, emptyList(), asList(partition), 1);
+
+    final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder()
+        .setConf(hadoopConf)
+        .setBasePath(metadataTableBasePath)
+        .build();
+    while (getNumCompactions(metadataMetaClient) == 0) {
+      // Write until the compaction happens in the metadata table
+      testTable.doWriteOperation(
+          metaClient.createNewInstantTime(), BULK_INSERT, emptyList(), asList(partition), 1);
+      metadataMetaClient.reloadActiveTimeline();
+    }
+
+    assertEquals(1, getNumCompactions(metadataMetaClient));
+
+    List<String> fileIdsToReplace = new ArrayList<>();
+    fileIdsToReplace.addAll(commitMetadata1.getFileIdAndRelativePaths().keySet());
+    fileIdsToReplace.addAll(commitMetadata2.getFileIdAndRelativePaths().keySet());
+    // Simulate clustering operation replacing two data files with a new data file
+    testTable.doCluster(
+        metaClient.createNewInstantTime(),
+        Collections.singletonMap(partition, fileIdsToReplace), asList(partition), 1);
+    Set<String> fileSetBeforeCleaning = getFilePathsInPartition(partition);
+
+    // Simulate two clean actions deleting the same set of date files
+    // based on the first two commits
+    String cleanInstant = metaClient.createNewInstantTime();
+    HoodieCleanMetadata cleanMetadata = testTable.doCleanBasedOnCommits(cleanInstant, asList(instant1, instant2));
+    List<String> deleteFileList = cleanMetadata.getPartitionMetadata().get(partition).getDeletePathPatterns();
+    assertTrue(deleteFileList.size() > 0);
+
+    Set<String> fileSetAfterFirstCleaning = getFilePathsInPartition(partition);
+    validateFilesAfterCleaning(deleteFileList, fileSetBeforeCleaning, fileSetAfterFirstCleaning);
+
+    metaClient.reloadActiveTimeline();
+    HoodieCleanerPlan cleanerPlan = CleanerUtils.getCleanerPlan(
+        metaClient, new HoodieInstant(HoodieInstant.State.REQUESTED, CLEAN_ACTION, cleanInstant));
+    testTable.repeatClean(metaClient.createNewInstantTime(), cleanerPlan, cleanMetadata);
+
+    // Compaction should not happen after the first compaction in this test case
+    assertEquals(1, getNumCompactions(metadataMetaClient));
+    Set<String> fileSetAfterSecondCleaning = getFilePathsInPartition(partition);
+    validateFilesAfterCleaning(deleteFileList, fileSetBeforeCleaning, fileSetAfterSecondCleaning);
+  }
+
+  private int getNumCompactions(HoodieTableMetaClient metaClient) {
+    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
+    return timeline
+        .filter(s -> {
+          try {
+            return s.getAction().equals(HoodieTimeline.COMMIT_ACTION)
+                && HoodieCommitMetadata.fromBytes(
+                    timeline.getInstantDetails(s).get(), HoodieCommitMetadata.class)
+                .getOperationType().equals(COMPACT);
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        })
+        .countInstants();
+  }
+
+  private Set<String> getFilePathsInPartition(String partition) throws IOException {
+    HoodieBackedTableMetadata tableMetadata = new HoodieBackedTableMetadata(
+        new HoodieLocalEngineContext(hadoopConf),
+        HoodieMetadataConfig.newBuilder().enable(true).build(),
+        basePath);
+    return Arrays.stream(tableMetadata.getAllFilesInPartition(new Path(basePath, partition)))
+        .map(status -> status.getPath().getName()).collect(Collectors.toSet());
+  }
+
+  private void validateFilesAfterCleaning(List<String> deleteFileList,
+                                          Set<String> fileSetBeforeCleaning,
+                                          Set<String> fileSetAfterCleaning) {
+    assertEquals(deleteFileList.size(), fileSetBeforeCleaning.size() - fileSetAfterCleaning.size());
+    for (String deleteFile : deleteFileList) {
+      assertFalse(fileSetAfterCleaning.contains(deleteFile));
+    }
+    for (String file : fileSetAfterCleaning) {
+      assertTrue(fileSetBeforeCleaning.contains(file));
+    }
+  }
+
   /**
    * Verify the metadata table log files for the record field correctness. On disk format
    * should be based on meta fields and key deduplication config. And the in-memory merged
@@ -302,7 +420,7 @@ private void verifyMetadataRecordKeyExcludeFromPayloadLogFiles(HoodieTable table
     // Compaction should not be triggered yet. Let's verify no base file
     // and few log files available.
     List<FileSlice> fileSlices = table.getSliceView()
-        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+        .getLatestFileSlices(FILES.getPartitionPath()).collect(Collectors.toList());
     if (fileSlices.isEmpty()) {
       throw new IllegalStateException("LogFile slices are not available!");
     }
@@ -377,7 +495,7 @@ private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClien
         .withBasePath(metadataMetaClient.getBasePath())
         .withLogFilePaths(logFilePaths)
         .withLatestInstantTime(latestCommitTimestamp)
-        .withPartition(MetadataPartitionType.FILES.getPartitionPath())
+        .withPartition(FILES.getPartitionPath())
         .withReaderSchema(schema)
         .withMaxMemorySizeInBytes(100000L)
         .withBufferSize(4096)
@@ -401,7 +519,7 @@ private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClien
   private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable table) throws IOException {
     table.getHoodieView().sync();
     List<FileSlice> fileSlices = table.getSliceView()
-        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+        .getLatestFileSlices(FILES.getPartitionPath()).collect(Collectors.toList());
     if (!fileSlices.get(0).getBaseFile().isPresent()) {
       throw new IllegalStateException("Base file not available!");
     }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
index 44105a419834..cff783ebd53c 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
@@ -204,7 +204,6 @@ private static Stream<Arguments> populateMetaFieldsParams() {
   private static Stream<Arguments> rollbackFailedCommitsParams() {
     return Stream.of(
         Arguments.of(HoodieFailedWritesCleaningPolicy.LAZY, true),
-        Arguments.of(HoodieFailedWritesCleaningPolicy.LAZY, false),
         Arguments.of(HoodieFailedWritesCleaningPolicy.NEVER, true),
         Arguments.of(HoodieFailedWritesCleaningPolicy.NEVER, false)
     );
@@ -240,10 +239,9 @@ public void testAutoCommitOnInsert(boolean populateMetaFields) throws Exception
   /**
    * Test Auto Commit behavior for HoodieWriteClient insertPrepped API.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testAutoCommitOnInsertPrepped(boolean populateMetaFields) throws Exception {
-    testAutoCommit(SparkRDDWriteClient::insertPreppedRecords, true, populateMetaFields);
+  @Test
+  public void testAutoCommitOnInsertPrepped() throws Exception {
+    testAutoCommit(SparkRDDWriteClient::insertPreppedRecords, true, true);
   }
 
   /**
@@ -276,11 +274,10 @@ public void testAutoCommitOnBulkInsert(boolean populateMetaFields) throws Except
   /**
    * Test Auto Commit behavior for HoodieWriteClient bulk-insert prepped API.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testAutoCommitOnBulkInsertPrepped(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testAutoCommitOnBulkInsertPrepped() throws Exception {
     testAutoCommit((writeClient, recordRDD, instantTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, instantTime,
-        Option.empty()), true, populateMetaFields);
+        Option.empty()), true, true);
   }
 
   /**
@@ -440,10 +437,9 @@ public void testDeduplicationOnBulkInsert(boolean populateMetaFields) throws Exc
   /**
    * Test De-duplication behavior for HoodieWriteClient upsert API.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testDeduplicationOnUpsert(boolean populateMetaFields) throws Exception {
-    testDeduplication(SparkRDDWriteClient::upsert, populateMetaFields);
+  @Test
+  public void testDeduplicationOnUpsert() throws Exception {
+    testDeduplication(SparkRDDWriteClient::upsert, true);
   }
 
   /**
@@ -598,11 +594,10 @@ public void testUpserts(boolean populateMetaFields) throws Exception {
   /**
    * Test UpsertPrepped API.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testUpsertsPrepped(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testUpsertsPrepped() throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withRollbackUsingMarkers(true);
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     testUpsertsInternal(cfgBuilder.build(), SparkRDDWriteClient::upsertPreppedRecords, true);
   }
 
@@ -837,11 +832,10 @@ public void testInsertsWithHoodieConcatHandle(boolean populateMetaFields) throws
   /**
    * Test InsertPrepped API for HoodieConcatHandle.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testInsertsPreppedWithHoodieConcatHandle(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testInsertsPreppedWithHoodieConcatHandle() throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder();
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     testHoodieConcatHandle(cfgBuilder.build(), true);
   }
 
@@ -994,11 +988,10 @@ public void testPendingRestore() throws IOException {
   /**
    * Tests deletion of records.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testDeletes(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testDeletes() throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(HoodieFailedWritesCleaningPolicy.LAZY);
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     SparkRDDWriteClient client = getHoodieWriteClient(cfgBuilder.build());
     /**
      * Write 1 (inserts and deletes) Write actual 200 insert records and ignore 100 delete records
@@ -1019,7 +1012,7 @@ public void testDeletes(boolean populateMetaFields) throws Exception {
     writeBatch(client, newCommitTime, initCommitTime, Option.empty(), initCommitTime,
         // unused as genFn uses hard-coded number of inserts/updates/deletes
         -1, recordGenFunction, SparkRDDWriteClient::upsert, true, 200, 200, 1, false,
-        populateMetaFields);
+        true);
 
     /**
      * Write 2 (deletes+writes).
@@ -1037,7 +1030,7 @@ public void testDeletes(boolean populateMetaFields) throws Exception {
     };
     writeBatch(client, newCommitTime, prevCommitTime, Option.empty(), initCommitTime, 100, recordGenFunction,
         SparkRDDWriteClient::upsert, true, 50, 150, 2, false,
-        populateMetaFields);
+        true);
   }
 
   /**
@@ -1046,11 +1039,10 @@ public void testDeletes(boolean populateMetaFields) throws Exception {
    *
    * @throws Exception
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testDeletesForInsertsInSameBatch(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testDeletesForInsertsInSameBatch() throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(HoodieFailedWritesCleaningPolicy.LAZY);
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     SparkRDDWriteClient client = getHoodieWriteClient(cfgBuilder.build());
     /**
      * Write 200 inserts and issue deletes to a subset(50) of inserts.
@@ -1071,7 +1063,7 @@ public void testDeletesForInsertsInSameBatch(boolean populateMetaFields) throws
 
     writeBatch(client, newCommitTime, initCommitTime, Option.empty(), initCommitTime,
         -1, recordGenFunction, SparkRDDWriteClient::upsert, true, 150, 150, 1, false,
-        populateMetaFields);
+        true);
   }
 
   private void assertPartitionPathRecordKeys(List<Pair<String, String>> expectedPartitionPathRecKeyPairs, String[] fullPartitionPaths) {
@@ -1898,19 +1890,17 @@ public void testInsertOverwritePartitionHandlingWithMoreRecords(boolean populate
   /**
    * Test scenario of writing fewer file groups than existing number of file groups in partition.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testInsertOverwritePartitionHandlingWithFewerRecords(boolean populateMetaFields) throws Exception {
-    verifyInsertOverwritePartitionHandling(3000, 1000, populateMetaFields);
+  @Test
+  public void testInsertOverwritePartitionHandlingWithFewerRecords() throws Exception {
+    verifyInsertOverwritePartitionHandling(3000, 1000, true);
   }
 
   /**
    * Test scenario of writing similar number file groups in partition.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testInsertOverwritePartitionHandlingWithSimilarNumberOfRecords(boolean populateMetaFields) throws Exception {
-    verifyInsertOverwritePartitionHandling(3000, 3000, populateMetaFields);
+  @Test
+  public void testInsertOverwritePartitionHandlingWithSimilarNumberOfRecords() throws Exception {
+    verifyInsertOverwritePartitionHandling(3000, 3000, true);
   }
 
   /**
@@ -1963,19 +1953,17 @@ public void verifyDeletePartitionsHandlingWithFewerRecordsFirstPartition(boolean
   /**
    * Test scenario of writing similar number file groups in partition.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void verifyDeletePartitionsHandlingWithSimilarNumberOfRecords(boolean populateMetaFields) throws Exception {
-    verifyDeletePartitionsHandling(3000, 3000, 3000, populateMetaFields);
+  @Test
+  public void verifyDeletePartitionsHandlingWithSimilarNumberOfRecords() throws Exception {
+    verifyDeletePartitionsHandling(3000, 3000, 3000, true);
   }
 
   /**
    * Test scenario of writing more file groups for first partition than second and third partition.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void verifyDeletePartitionsHandlingHandlingWithFewerRecordsSecondThirdPartition(boolean populateMetaFields) throws Exception {
-    verifyDeletePartitionsHandling(3000, 1000, 1000, populateMetaFields);
+  @Test
+  public void verifyDeletePartitionsHandlingHandlingWithFewerRecordsSecondThirdPartition() throws Exception {
+    verifyDeletePartitionsHandling(3000, 1000, 1000, true);
   }
 
   private Set<String> insertPartitionRecordsWithCommit(SparkRDDWriteClient client, int recordsCount, String commitTime1, String partitionPath) throws IOException {
@@ -2217,11 +2205,11 @@ public void testDeletesWithoutInserts(boolean populateMetaFields) {
   /**
    * Test to ensure commit metadata points to valid files.
    */
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testCommitWritesRelativePaths(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testCommitWritesRelativePaths() throws Exception {
+
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withAutoCommit(false);
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfgBuilder.build())) {
       HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
       HoodieSparkTable table = HoodieSparkTable.create(cfgBuilder.build(), context, metaClient);
@@ -2393,9 +2381,9 @@ private void testRollbackAfterConsistencyCheckFailureUsingFileList(boolean rollb
   }
 
   @ParameterizedTest
-  @MethodSource("rollbackAfterConsistencyCheckFailureParams")
-  public void testRollbackAfterConsistencyCheckFailureUsingFileList(boolean enableOptimisticConsistencyGuard, boolean populateMetCols) throws Exception {
-    testRollbackAfterConsistencyCheckFailureUsingFileList(false, enableOptimisticConsistencyGuard, populateMetCols);
+  @ValueSource(booleans = {true, false})
+  public void testRollbackAfterConsistencyCheckFailureUsingFileList(boolean enableOptimisticConsistencyGuard) throws Exception {
+    testRollbackAfterConsistencyCheckFailureUsingFileList(false, enableOptimisticConsistencyGuard, true);
   }
 
   @ParameterizedTest
@@ -2486,9 +2474,9 @@ public void testRollbackFailedCommits() throws Exception {
     }
   }
 
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testRollbackFailedCommitsToggleCleaningPolicy(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testRollbackFailedCommitsToggleCleaningPolicy() throws Exception {
+    boolean populateMetaFields = true;
     HoodieTestUtils.init(hadoopConf, basePath);
     HoodieFailedWritesCleaningPolicy cleaningPolicy = EAGER;
     SparkRDDWriteClient client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
@@ -2641,7 +2629,7 @@ private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaCli
     Option<Path> markerFilePath = WriteMarkersFactory.get(
             cfg.getMarkersType(), getHoodieTable(metaClient, cfg), instantTime)
         .create(partitionPath,
-            FSUtils.makeBaseFileName(instantTime, "1-0-1", UUID.randomUUID().toString()),
+            FSUtils.makeBaseFileName(instantTime, "1-0-1", UUID.randomUUID().toString(), BASE_FILE_EXTENSION),
             IOType.MERGE);
     if (!enableOptimisticConsistencyGuard) {
       Exception e = assertThrows(HoodieCommitException.class, () -> {
@@ -2655,12 +2643,11 @@ private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaCli
     return Pair.of(markerFilePath.get(), result);
   }
 
-  @ParameterizedTest
-  @MethodSource("populateMetaFieldsParams")
-  public void testMultiOperationsPerCommit(boolean populateMetaFields) throws IOException {
+  @Test
+  public void testMultiOperationsPerCommit() throws IOException {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withAutoCommit(false)
         .withAllowMultiWriteOnSameInstant(true);
-    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
+    addConfigsForPopulateMetaFields(cfgBuilder, true);
     HoodieWriteConfig cfg = cfgBuilder.build();
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
     String firstInstantTime = "0000";
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
index 709572325f8c..557905ae85a8 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
@@ -329,11 +329,11 @@ public void testLookupIndexWithAndWithoutColumnStats() throws Exception {
 
     // check column_stats partition exists
     metaClient = HoodieTableMetaClient.reload(metaClient);
-    assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS));
+    assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS.getPartitionPath()));
     assertTrue(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
 
     // delete the column_stats partition
-    deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS);
+    deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS.getPartitionPath());
 
     // Now tagLocation for these records, they should be tagged correctly despite column_stats being enabled but not present
     hoodieTable = HoodieSparkTable.create(config, context, metaClient);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestRemoteFileSystemViewWithMetadataTable.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestRemoteFileSystemViewWithMetadataTable.java
index 86cc078e9894..3d2e018c3a06 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestRemoteFileSystemViewWithMetadataTable.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestRemoteFileSystemViewWithMetadataTable.java
@@ -116,7 +116,7 @@ public void initTimelineService() {
               .serverPort(config.getViewStorageConfig().getRemoteViewServerPort()).build(),
           FileSystem.get(new Configuration()),
           FileSystemViewManager.createViewManager(
-              context, config.getMetadataConfig(), config.getViewStorageConfig(),
+              context, config.getViewStorageConfig(),
               config.getCommonConfig(),
               metaClient -> new HoodieBackedTestDelayedTableMetadata(
                   context, config.getMetadataConfig(), metaClient.getBasePathV2().toString(), true)));
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestHBasePutBatchSizeCalculator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestHBasePutBatchSizeCalculator.java
index a6068e6a8f9c..b20bc979b12b 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestHBasePutBatchSizeCalculator.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestHBasePutBatchSizeCalculator.java
@@ -34,28 +34,28 @@ public void testPutBatchSizeCalculation() {
     int putBatchSize = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 0.1f);
     // Total puts that can be sent  in 1 second = (10 * 16667 * 0.1) = 16,667
     // Total puts per batch will be (16,667 / parallelism) = 83.335, where 200 is the maxExecutors
-    assertEquals(putBatchSize, 83);
+    assertEquals(putBatchSize, 84);
 
     // Number of Region Servers are halved, total requests sent in a second are also halved, so batchSize is also halved
     int putBatchSize2 = batchSizeCalculator.getBatchSize(5, 16667, 1200, 200, 0.1f);
-    assertEquals(putBatchSize2, 41);
+    assertEquals(putBatchSize2, 42);
 
     // If the parallelism is halved, batchSize has to double
     int putBatchSize3 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 100, 0.1f);
-    assertEquals(putBatchSize3, 166);
+    assertEquals(putBatchSize3, 167);
 
     // If the parallelism is halved, batchSize has to double.
     // This time parallelism is driven by numTasks rather than numExecutors
     int putBatchSize4 = batchSizeCalculator.getBatchSize(10, 16667, 100, 200, 0.1f);
-    assertEquals(putBatchSize4, 166);
+    assertEquals(putBatchSize4, 167);
 
     // If sleepTimeMs is halved, batchSize has to halve
     int putBatchSize5 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 0.05f);
-    assertEquals(putBatchSize5, 41);
+    assertEquals(putBatchSize5, 42);
 
     // If maxQPSPerRegionServer is doubled, batchSize also doubles
     int putBatchSize6 = batchSizeCalculator.getBatchSize(10, 33334, 1200, 200, 0.1f);
-    assertEquals(putBatchSize6, 166);
+    assertEquals(putBatchSize6, 167);
   }
 
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index aec9b6314000..b496db5165ba 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -58,6 +58,7 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsConfig;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
@@ -115,6 +116,8 @@
 import static org.apache.hudi.metrics.HoodieMetrics.ARCHIVE_ACTION;
 import static org.apache.hudi.metrics.HoodieMetrics.DELETE_INSTANTS_NUM_STR;
 import static org.apache.hudi.metrics.HoodieMetrics.DURATION_STR;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -825,35 +828,48 @@ public void testPendingClusteringWillBlockArchival(boolean enableMetadata) throw
   public void testArchiveRollbacksTestTable(boolean enableMetadata) throws Exception {
     HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 4, 5, 2);
 
-    for (int i = 1; i < 13; i += 2) {
+    List<Pair<String, String>> instants = new ArrayList<>();
+    boolean hasArchivedInstants = false;
+    for (int i = 1; i < 8; i += 3) {
+      String commitInstant1 = metaClient.createNewInstantTime();
+      instants.add(Pair.of(commitInstant1, HoodieTimeline.COMMIT_ACTION));
       testTable.doWriteOperation(
-          "000000" + String.format("%02d", i),
+          commitInstant1,
           WriteOperationType.UPSERT,
           i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"),
           2);
-      testTable.doRollback(
-          "000000" + String.format("%02d", i), "000000" + String.format("%02d", i + 1));
+      try {
+        String rollbackInstant = metaClient.createNewInstantTime();
+        testTable.doRollback(commitInstant1, rollbackInstant);
+        instants.add(Pair.of(rollbackInstant, HoodieTimeline.ROLLBACK_ACTION));
+      } catch (HoodieMetadataException e) {
+        // The instant that triggers compaction can not be rolled back
+        // because it's instant time is less than the latest compaction instant
+        // on the MDT timeline.
+        // ignore
+      }
+
+      // we need enough delta commits to trigger archival on MDT.
+      String commitInstant2 = metaClient.createNewInstantTime();
+      instants.add(Pair.of(commitInstant2, HoodieTimeline.COMMIT_ACTION));
+      testTable.doWriteOperation(commitInstant2, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
 
       // trigger archival
       Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
       List<HoodieInstant> originalCommits = commitsList.getKey();
       List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
 
-      if (i != 11) {
-        assertEquals(originalCommits, commitsAfterArchival);
-      } else {
-        // only time when archival will kick in
-        List<HoodieInstant> expectedArchivedInstants = new ArrayList<>();
-        expectedArchivedInstants.addAll(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000003")));
-        expectedArchivedInstants.addAll(getAllArchivedCommitInstants(Collections.singletonList("00000002"), HoodieTimeline.ROLLBACK_ACTION));
-        List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
-        expectedActiveInstants.addAll(getActiveCommitInstants(
-            Arrays.asList("00000005", "00000007", "00000009", "00000011")));
-        expectedActiveInstants.addAll(getActiveCommitInstants(
-            Arrays.asList("00000004", "00000006", "00000008", "00000010", "00000012"), HoodieTimeline.ROLLBACK_ACTION));
+      int numArchivedInstants = originalCommits.size() - commitsAfterArchival.size();
+      if ((originalCommits.size() - commitsAfterArchival.size()) > 0) {
+        hasArchivedInstants = true;
+        List<HoodieInstant> expectedArchivedInstants = instants.subList(0, numArchivedInstants).stream()
+            .map(p -> new HoodieInstant(State.COMPLETED, p.getValue(), p.getKey())).collect(Collectors.toList());
+        List<HoodieInstant> expectedActiveInstants = instants.subList(numArchivedInstants, instants.size()).stream()
+            .map(p -> new HoodieInstant(State.COMPLETED, p.getValue(), p.getKey())).collect(Collectors.toList());
         verifyArchival(expectedArchivedInstants, expectedActiveInstants, commitsAfterArchival);
       }
     }
+    assertTrue(hasArchivedInstants, "Some instants should be archived");
   }
 
   @ParameterizedTest
@@ -1071,37 +1087,53 @@ public void testArchiveRollbacksAndCleanTestTable() throws Exception {
     int maxArchiveCommits = 4;
     HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, minArchiveCommits, maxArchiveCommits, 2);
 
+    List<Pair<String, String>> instants = new ArrayList<>();
+    String instant1 = metaClient.createNewInstantTime();
+    instants.add(Pair.of(instant1, HoodieTimeline.COMMIT_ACTION));
     // trigger 1 commit to add a lot of files so that future cleans can clean them up
-    testTable.doWriteOperation(String.format("%08d", 1), WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 20);
+    testTable.doWriteOperation(instant1, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 20);
 
     Map<String, Integer> partitionToFileDeleteCount = new HashMap<>();
     partitionToFileDeleteCount.put("p1", 1);
     partitionToFileDeleteCount.put("p2", 1);
 
     for (int i = 2; i < 5; i++) {
-      testTable.doClean(String.format("%08d", i), partitionToFileDeleteCount);
+      String cleanInstant = metaClient.createNewInstantTime();
+      instants.add(Pair.of(cleanInstant, HoodieTimeline.CLEAN_ACTION));
+      testTable.doClean(cleanInstant, partitionToFileDeleteCount);
     }
 
-    for (int i = 5; i <= 11; i += 2) {
-      testTable.doWriteOperation(String.format("%08d", i), WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
-      testTable.doRollback(String.format("%08d", i), String.format("%08d", i + 1));
+    for (int i = 5; i <= 13; i += 3) {
+      String commitInstant1 = metaClient.createNewInstantTime();
+      instants.add(Pair.of(commitInstant1, HoodieTimeline.COMMIT_ACTION));
+      testTable.doWriteOperation(commitInstant1, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
+      try {
+        String rollbackInstant = metaClient.createNewInstantTime();
+        testTable.doRollback(commitInstant1, rollbackInstant);
+        instants.add(Pair.of(rollbackInstant, HoodieTimeline.ROLLBACK_ACTION));
+      } catch (HoodieMetadataException e) {
+        // The instant that triggers compaction can not be rolled back
+        // because it's instant time is less than the latest compaction instant
+        // on the MDT timeline.
+        // ignore
+      }
+      // write more commits than rollback so that the MDT archival can be triggered,
+      // then the DT archival can be triggered.
+      String commitInstant2 = metaClient.createNewInstantTime();
+      instants.add(Pair.of(commitInstant2, HoodieTimeline.COMMIT_ACTION));
+      testTable.doWriteOperation(commitInstant2, WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
     }
 
     // trigger archival:
-    // clean: 2,3: after archival -> null
-    // write: 1,5,7,9,11: after archival -> 9,11
-    // rollback: 6,8,10,12: after archival -> 8,10,12
     Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
+    List<HoodieInstant> allCommits = commitsList.getKey();
     List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
+    assertThat("The archived commits number is not as expected", allCommits.size() - commitsAfterArchival.size(), is(5));
 
-    List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
-    expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000008", "00000010", "00000012"), HoodieTimeline.ROLLBACK_ACTION));
-    expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000009", "00000011")));
-
-    List<HoodieInstant> expectedArchiveInstants = new ArrayList<>();
-    expectedArchiveInstants.addAll(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000005", "00000007")));
-    expectedArchiveInstants.addAll(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000003", "00000004"), HoodieTimeline.CLEAN_ACTION));
-    expectedArchiveInstants.addAll(getAllArchivedCommitInstants(Collections.singletonList("00000006"), HoodieTimeline.ROLLBACK_ACTION));
+    List<HoodieInstant> expectedArchiveInstants = instants.subList(0, 5).stream()
+        .map(p -> new HoodieInstant(State.COMPLETED, p.getValue(), p.getKey())).collect(Collectors.toList());
+    List<HoodieInstant> expectedActiveInstants = instants.subList(5, instants.size()).stream()
+        .map(p -> new HoodieInstant(State.COMPLETED, p.getValue(), p.getKey())).collect(Collectors.toList());
 
     verifyArchival(expectedArchiveInstants, expectedActiveInstants, commitsAfterArchival);
   }
@@ -1469,7 +1501,7 @@ public void testArchivalAndCompactionInMetadataTable() throws Exception {
         .setLoadActiveTimelineOnLoad(true).build();
 
     List<String> instants = new ArrayList<>();
-    for (int i = 1; i <= 18; i++) {
+    for (int i = 1; i <= 19; i++) {
       String instant = metaClient.createNewInstantTime();
       instants.add(instant);
       if (i != 2) {
@@ -1495,65 +1527,71 @@ public void testArchivalAndCompactionInMetadataTable() throws Exception {
             new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, SOLO_COMMIT_TIMESTAMP + "010")));
         assertTrue(metadataTableInstants.contains(
             new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(0))));
-      } else if (i <= 8) {
+      } else if (i == 2) {
+        assertEquals(i - 1, metadataTableInstants.size());
+        assertTrue(metadataTableInstants.contains(
+            new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, SOLO_COMMIT_TIMESTAMP + "010")));
+        assertFalse(metadataTableInstants.contains(
+            new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(1))));
+      } else if (i <= 9) {
         // In the metadata table timeline, the first delta commit is "00000000000000"
-        // from metadata table init, delta commits 1 till 7 are added
+        // from metadata table init, delta commits 1 till 8 are added
         // later on without archival or compaction
         // rollback in DT will also trigger rollback in MDT
-        assertEquals(i, metadataTableInstants.size());
+        assertEquals(i - 1, metadataTableInstants.size());
         assertTrue(metadataTableInstants.contains(
             new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, SOLO_COMMIT_TIMESTAMP + "010")));
         // rolled back commits may not be present in MDT timeline [1]
-        IntStream.range(2, i).forEach(j ->
+        IntStream.range(3, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
-      } else if (i == 9) {
-        // i == 9
+      } else if (i == 10) {
+        // i == 10
         // The instant "00000000000010" was archived since it's less than
         // the earliest commit on the dataset active timeline,
         // the dataset active timeline has instants:
-        //   2.rollback, 7.commit, 8.commit
+        //   [7.commit, 8.commit, 9.commit, 10.commit]
         assertEquals(9, metadataTableInstants.size());
-        // mdt timeline 2, 3,..., 8, a completed compaction commit, 9
-        IntStream.range(2, i).forEach(j ->
+        // mdt timeline 3,..., 10, a completed compaction commit
+        IntStream.range(3, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
-      } else if (i <= 12) {
-        // In the metadata table timeline, the first delta commit is 6
+      } else if (i <= 13) {
+        // In the metadata table timeline, the first delta commit is 7
         // because it equals with the earliest commit on the dataset timeline, after archival,
-        // delta commits 6 till 10 are added later on without archival or compaction
-        // mdt timeline [6, 7, 8, a completed compaction commit, 9, 10] for i = 10
-        assertEquals(i - 4, metadataTableInstants.size());
+        // delta commits 11 till 12 are added later on without archival or compaction
+        // mdt timeline [7, 8, 9, 10, a completed compaction commit] for i = 10
+        assertEquals(i - 5, metadataTableInstants.size());
         assertEquals(1, metadataTableMetaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().countInstants());
-        IntStream.range(6, i).forEach(j ->
+        IntStream.range(7, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
-      } else if (i <= 16) {
-        // In the metadata table timeline, the first delta commit is a compaction commit
-        // from metadata table compaction, after archival, delta commits 9
-        // till 16 are added later on without archival or compaction
-        // mdt timeline: [a completed compaction commit, 9, ... 13]
-        assertEquals(i - 7, metadataTableInstants.size());
+      } else if (i <= 17) {
+        // In the metadata table timeline, the second commit is a compaction commit
+        // from metadata table compaction, after archival, delta commits 14
+        // till 17 are added later on without archival or compaction
+        // mdt timeline: [10, a completed compaction commit, 11, ... 14, 15, ... 17]
+        assertEquals(i - 8, metadataTableInstants.size());
         assertEquals(1, metadataTableMetaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().countInstants());
-        IntStream.range(9, i).forEach(j ->
+        IntStream.range(10, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
-      } else if (i == 17) {
-        // i == 17
-        // commits in MDT [a completed compaction commit, 9, ... 16, 17, a completed compaction commit]
-        // another compaction is triggered by this commit so everything upto 16 is compacted.
+      } else if (i == 18) {
+        // i == 18
+        // commits in MDT [10, a completed compaction commit, 11, ... 17, 18, a completed compaction commit]
+        // another compaction is triggered by this commit so everything upto 18 is compacted.
         assertEquals(11, metadataTableInstants.size());
         assertEquals(2, metadataTableMetaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().countInstants());
-        IntStream.range(9, i).forEach(j ->
+        IntStream.range(10, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
       } else {
-        // i == 18
+        // i == 19
         // compaction happened in last commit, and archival is triggered with latest compaction retained plus maxInstantToKeep = 6
         // commits in MDT [14, .... 17, a completed compaction commit, 18]
         assertEquals(6, metadataTableInstants.size());
         assertTrue(metadata(writeConfig, context).getLatestCompactionTime().isPresent());
-        IntStream.range(14, i).forEach(j ->
+        IntStream.range(15, i).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instants.get(j - 1)))));
       }
@@ -1582,6 +1620,39 @@ public void testPendingClusteringAfterArchiveCommit(boolean enableMetadata) thro
         "Since we have a pending clustering instant at 00000002, we should never archive any commit after 00000000");
   }
 
+  @Test
+  public void testRetryArchivalAfterPreviousFailedDeletion() throws Exception {
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 2);
+    for (int i = 0; i <= 5; i++) {
+      testTable.doWriteOperation("10" + i, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), 1);
+    }
+    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
+
+    HoodieTimeline timeline = metaClient.getActiveTimeline().getWriteTimeline();
+    assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
+    assertTrue(archiver.archiveIfRequired(context) > 0);
+    // Simulate archival failing to delete by re-adding the .commit instant files
+    // (101.commit, 102.commit, and 103.commit instant files)
+    HoodieTestDataGenerator.createOnlyCompletedCommitFile(basePath, "101_1001", wrapperFs.getConf());
+    HoodieTestDataGenerator.createOnlyCompletedCommitFile(basePath, "102_1021", wrapperFs.getConf());
+    HoodieTestDataGenerator.createOnlyCompletedCommitFile(basePath, "103_1031", wrapperFs.getConf());
+    timeline = metaClient.getActiveTimeline().reload().getWriteTimeline();
+    assertEquals(5, timeline.countInstants(), "Due to simulating partial archival deletion, there should"
+        + "be 5 instants (as instant times 101-103 .commit files should remain in timeline)");
+    // Re-running archival again should archive and delete the 101.commit, 102.commit, and 103.commit instant files
+    table.getMetaClient().reloadActiveTimeline();
+    table = HoodieSparkTable.create(writeConfig, context, metaClient);
+    archiver = new HoodieTimelineArchiver(writeConfig, table);
+    assertTrue(archiver.archiveIfRequired(context) > 0);
+    timeline = metaClient.getActiveTimeline().reload().getWriteTimeline();
+    assertEquals(2, timeline.countInstants(), "The instants from prior archival should "
+        + "be deleted now");
+  }
+
+  /**
+   * IMPORTANT: this method is only suitable for one time trigger of archival validation.
+   */
   private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
     return archiveAndGetCommitsList(writeConfig, false);
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkBuildClusteringGroupsForPartition.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkBuildClusteringGroupsForPartition.java
index cb2fd4eebb5b..ada5f4954ab1 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkBuildClusteringGroupsForPartition.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkBuildClusteringGroupsForPartition.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkCopyOnWriteTable;
@@ -41,6 +42,9 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TestSparkBuildClusteringGroupsForPartition {
+
+  protected static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
+
   @Mock
   HoodieSparkCopyOnWriteTable table;
   @Mock
@@ -109,13 +113,13 @@ public void testBuildClusteringGroupsWithLimitScan() {
 
   private FileSlice generateFileSlice(String partitionPath, String fileId, String baseInstant) {
     FileSlice fs = new FileSlice(new HoodieFileGroupId(partitionPath, fileId), baseInstant);
-    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId)));
+    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId, BASE_FILE_EXTENSION)));
     return fs;
   }
 
   private FileSlice generateFileSliceWithLen(String partitionPath, String fileId, String baseInstant, long fileLen) {
     FileSlice fs = new FileSlice(new HoodieFileGroupId(partitionPath, fileId), baseInstant);
-    HoodieBaseFile hoodieBaseFile = new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId));
+    HoodieBaseFile hoodieBaseFile = new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId, BASE_FILE_EXTENSION));
     hoodieBaseFile.setFileLen(fileLen);
     fs.setBaseFile(hoodieBaseFile);
     return fs;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
index 4574b34393d5..f40ba3be02e7 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
@@ -130,7 +130,7 @@ public void testMakeNewPath() {
     }).collect().get(0);
 
     assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath,
-        FSUtils.makeBaseFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
+        FSUtils.makeBaseFileName(instantTime, newPathWithWriteToken.getRight(), fileName, BASE_FILE_EXTENSION)).toString());
   }
 
   private HoodieWriteConfig makeHoodieClientConfig() {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java
index 98e94b1dd5e9..e425c4c4352f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.config.HoodieStorageConfig;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieFileGroupId;
@@ -40,7 +41,6 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
-import org.apache.hudi.common.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.HoodieTable;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java
index b27f40e2addd..367229b18da4 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java
@@ -20,7 +20,6 @@
 
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.HoodieCommonConfig;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.table.marker.MarkerType;
 import org.apache.hudi.common.table.view.FileSystemViewManager;
@@ -66,15 +65,13 @@ public void setup() throws IOException {
 
     FileSystemViewStorageConfig storageConf =
         FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
-    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
     HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
 
     try {
       timelineService = new TimelineService(localEngineContext, new Configuration(),
           TimelineService.Config.builder().serverPort(0).enableMarkerRequests(true).build(),
           FileSystem.get(new Configuration()),
-          FileSystemViewManager.createViewManager(
-              localEngineContext, metadataConfig, storageConf, HoodieCommonConfig.newBuilder().build()));
+          FileSystemViewManager.createViewManager(localEngineContext, storageConf, HoodieCommonConfig.newBuilder().build()));
       timelineService.startService();
     } catch (Exception ex) {
       throw new RuntimeException(ex);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
index ca26cad75d10..0fa28f2ef422 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
@@ -540,7 +540,7 @@ public void testDowngradeSixToFiveShouldDeleteRecordIndexPartition() throws Exce
             .withEnableRecordIndex(true).build())
         .build();
     for (MetadataPartitionType partitionType : MetadataPartitionType.values()) {
-      metaClient.getTableConfig().setMetadataPartitionState(metaClient, partitionType, true);
+      metaClient.getTableConfig().setMetadataPartitionState(metaClient, partitionType.getPartitionPath(), true);
     }
     metaClient.getTableConfig().setMetadataPartitionsInflight(metaClient, MetadataPartitionType.values());
     String metadataTableBasePath = Paths.get(basePath, METADATA_TABLE_FOLDER_PATH).toString();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index b59b1ea8d670..57a2793f0f66 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -87,7 +87,7 @@ public class HoodieClientTestUtils {
    */
   public static SparkConf getSparkConfForTest(String appName) {
     SparkConf sparkConf = new SparkConf().setAppName(appName)
-        .setMaster("local[4]")
+        .setMaster("local[8]")
         .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
         .set("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar")
         .set("spark.sql.shuffle.partitions", "4")
@@ -281,8 +281,7 @@ public static TimelineService initTimelineService(
           TimelineService.Config.builder().enableMarkerRequests(true)
               .serverPort(config.getViewStorageConfig().getRemoteViewServerPort()).build(),
           FileSystem.get(new Configuration()),
-          FileSystemViewManager.createViewManager(context, config.getMetadataConfig(),
-              config.getViewStorageConfig(), config.getCommonConfig()));
+          FileSystemViewManager.createViewManager(context, config.getViewStorageConfig(), config.getCommonConfig()));
       timelineService.startService();
       LOG.info("Timeline service server port: " + timelineServicePort);
       return timelineService;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java
index 3a8bb1a300f1..91045034e5f3 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java
@@ -38,7 +38,7 @@ public interface SparkProvider extends org.apache.hudi.testutils.providers.Hoodi
   default SparkConf conf(Map<String, String> overwritingConfigs) {
     SparkConf sparkConf = new SparkConf();
     sparkConf.set("spark.app.name", getClass().getName());
-    sparkConf.set("spark.master", "local[*]");
+    sparkConf.set("spark.master", "local[8]");
     sparkConf.set("spark.default.parallelism", "4");
     sparkConf.set("spark.sql.shuffle.partitions", "4");
     sparkConf.set("spark.driver.maxResultSize", "2g");
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCompatibility.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCompatibility.java
index 63629b31dddc..b7bf072b218b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCompatibility.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCompatibility.java
@@ -36,6 +36,7 @@
 import java.util.Collections;
 import java.util.Deque;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -283,6 +284,35 @@ private SchemaCompatibilityResult getCompatibility(final Schema reader,
       return result;
     }
 
+    private static String getLocationName(final Deque<LocationInfo> locations, Type readerType) {
+      StringBuilder sb = new StringBuilder();
+      Iterator<LocationInfo> locationInfoIterator = locations.iterator();
+      boolean addDot = false;
+      while (locationInfoIterator.hasNext()) {
+        if (addDot) {
+          sb.append(".");
+        } else {
+          addDot = true;
+        }
+        LocationInfo next = locationInfoIterator.next();
+        sb.append(next.name);
+        //we check the reader type if we are at the last location. This is because
+        //if the type is array/map, that means the problem is that the field type
+        //of the writer is not array/map. If the type is something else, the problem
+        //is between the array element/map value of the reader and writer schemas
+        if (next.type.equals(Type.MAP)) {
+          if (locationInfoIterator.hasNext() || !readerType.equals(Type.MAP)) {
+            sb.append(".value");
+          }
+        } else if (next.type.equals(Type.ARRAY)) {
+          if (locationInfoIterator.hasNext() || !readerType.equals(Type.ARRAY)) {
+            sb.append(".element");
+          }
+        }
+      }
+      return sb.toString();
+    }
+
     /**
      * Calculates the compatibility of a reader/writer schema pair.
      *
@@ -335,7 +365,7 @@ private SchemaCompatibilityResult calculateCompatibility(final Schema reader, fi
             for (final Schema writerBranch : writer.getTypes()) {
               SchemaCompatibilityResult compatibility = getCompatibility(reader, writerBranch, locations);
               if (compatibility.getCompatibility() == SchemaCompatibilityType.INCOMPATIBLE) {
-                String message = String.format("reader union lacking writer type: %s", writerBranch.getType());
+                String message = String.format("reader union lacking writer type: %s for field: '%s'", writerBranch.getType(), getLocationName(locations, reader.getType()));
                 result = result.mergedWith(SchemaCompatibilityResult.incompatible(
                     SchemaIncompatibilityType.MISSING_UNION_BRANCH, reader, writer, message, asList(locations)));
               }
@@ -407,7 +437,7 @@ private SchemaCompatibilityResult calculateCompatibility(final Schema reader, fi
             }
             // No branch in the reader union has been found compatible with the writer
             // schema:
-            String message = String.format("reader union lacking writer type: %s", writer.getType());
+            String message = String.format("reader union lacking writer type: %s for field: '%s'", writer.getType(), getLocationName(locations, reader.getType()));
             return result.mergedWith(SchemaCompatibilityResult
                 .incompatible(SchemaIncompatibilityType.MISSING_UNION_BRANCH, reader, writer, message, asList(locations)));
           }
@@ -433,9 +463,10 @@ private SchemaCompatibilityResult checkReaderWriterRecordFields(final Schema rea
           // reader field must have a default value.
           if (defaultValueAccessor.getDefaultValue(readerField) == null) {
             // reader field has no default value
+            String message = String.format("Field '%s.%s' has no default value", getLocationName(locations, readerField.schema().getType()), readerField.name());
             result = result.mergedWith(
                 SchemaCompatibilityResult.incompatible(SchemaIncompatibilityType.READER_FIELD_MISSING_DEFAULT_VALUE,
-                    reader, writer, readerField.name(), asList(locations)));
+                    reader, writer, message, asList(locations)));
           }
         } else {
           locations.addLast(new LocationInfo(readerField.name(), readerField.schema().getType()));
@@ -482,8 +513,9 @@ private SchemaCompatibilityResult checkReaderEnumContainsAllWriterEnumSymbols(fi
       final Set<String> symbols = new TreeSet<>(writer.getEnumSymbols());
       symbols.removeAll(reader.getEnumSymbols());
       if (!symbols.isEmpty()) {
+        String message = String.format("Field '%s' missing enum symbols: %s", getLocationName(locations, reader.getType()), symbols);
         result = SchemaCompatibilityResult.incompatible(SchemaIncompatibilityType.MISSING_ENUM_SYMBOLS, reader,
-            writer, symbols.toString(), asList(locations));
+            writer, message, asList(locations));
       }
       return result;
     }
@@ -494,7 +526,7 @@ private SchemaCompatibilityResult checkFixedSize(final Schema reader, final Sche
       int actual = reader.getFixedSize();
       int expected = writer.getFixedSize();
       if (actual != expected) {
-        String message = String.format("expected: %d, found: %d", expected, actual);
+        String message = String.format("Fixed size field '%s' expected: %d, found: %d", getLocationName(locations, reader.getType()), expected, actual);
         result = SchemaCompatibilityResult.incompatible(SchemaIncompatibilityType.FIXED_SIZE_MISMATCH, reader, writer,
             message, asList(locations));
       }
@@ -511,7 +543,7 @@ private SchemaCompatibilityResult checkSchemaNames(final Schema reader, final Sc
       boolean shouldCheckNames = checkNaming && (locations.size() == 1 || locations.peekLast().type == Type.UNION);
       SchemaCompatibilityResult result = SchemaCompatibilityResult.compatible();
       if (shouldCheckNames && !Objects.equals(reader.getFullName(), writer.getFullName())) {
-        String message = String.format("expected: %s", writer.getFullName());
+        String message = String.format("Reader schema name: '%s' is not compatible with writer schema name: '%s'", reader.getFullName(), writer.getFullName());
         result = SchemaCompatibilityResult.incompatible(SchemaIncompatibilityType.NAME_MISMATCH, reader, writer,
             message, asList(locations));
       }
@@ -520,8 +552,8 @@ private SchemaCompatibilityResult checkSchemaNames(final Schema reader, final Sc
 
     private SchemaCompatibilityResult typeMismatch(final Schema reader, final Schema writer,
                                                    final Deque<LocationInfo> locations) {
-      String message = String.format("reader type: %s not compatible with writer type: %s", reader.getType(),
-          writer.getType());
+      String message = String.format("reader type '%s' not compatible with writer type '%s' for field '%s'", reader.getType(),
+          writer.getType(), getLocationName(locations, reader.getType()));
       return SchemaCompatibilityResult.incompatible(SchemaIncompatibilityType.TYPE_MISMATCH, reader, writer, message,
           asList(locations));
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
index b0489d75ae01..ba747a63cbc0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
@@ -19,14 +19,19 @@
 package org.apache.hudi.avro;
 
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieAvroSchemaException;
+import org.apache.hudi.exception.InvalidUnionTypeException;
+import org.apache.hudi.exception.MissingSchemaFieldException;
+import org.apache.hudi.exception.SchemaBackwardsCompatibilityException;
 import org.apache.hudi.exception.SchemaCompatibilityException;
 
-import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.SchemaCompatibility;
 
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Deque;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
@@ -59,7 +64,7 @@ public static boolean isSchemaCompatible(Schema prevSchema, Schema newSchema, bo
   /**
    * Establishes whether {@code newSchema} is compatible w/ {@code prevSchema}, as
    * defined by Avro's {@link AvroSchemaCompatibility}.
-   * From avro's compatability standpoint, prevSchema is writer schema and new schema is reader schema.
+   * From avro's compatibility standpoint, prevSchema is writer schema and new schema is reader schema.
    * {@code newSchema} is considered compatible to {@code prevSchema}, iff data written using {@code prevSchema}
    * could be read by {@code newSchema}
    *
@@ -92,20 +97,20 @@ public static boolean isSchemaCompatible(Schema prevSchema, Schema newSchema, bo
    * @return true if prev schema is a projection of new schema.
    */
   public static boolean canProject(Schema prevSchema, Schema newSchema) {
-    return canProject(prevSchema, newSchema, Collections.emptySet());
+    return findMissingFields(prevSchema, newSchema, Collections.emptySet()).isEmpty();
   }
 
   /**
-   * Check that each field in the prevSchema can be populated in the newSchema except specified columns
+   * Check that each top level field in the prevSchema can be populated in the newSchema except specified columns
    * @param prevSchema prev schema.
    * @param newSchema new schema
-   * @return true if prev schema is a projection of new schema.
+   * @return List of fields that should be in the new schema
    */
-  public static boolean canProject(Schema prevSchema, Schema newSchema, Set<String> exceptCols) {
+  private static List<Schema.Field> findMissingFields(Schema prevSchema, Schema newSchema, Set<String> exceptCols) {
     return prevSchema.getFields().stream()
         .filter(f -> !exceptCols.contains(f.name()))
-        .map(oldSchemaField -> SchemaCompatibility.lookupWriterField(newSchema, oldSchemaField))
-        .noneMatch(Objects::isNull);
+        .filter(oldSchemaField -> SchemaCompatibility.lookupWriterField(newSchema, oldSchemaField) == null)
+        .collect(Collectors.toList());
   }
 
   /**
@@ -121,31 +126,6 @@ public static String getAvroRecordQualifiedName(String tableName) {
     return "hoodie." + sanitizedTableName + "." + sanitizedTableName + "_record";
   }
 
-  /**
-   * Validate whether the {@code targetSchema} is a valid evolution of {@code sourceSchema}.
-   * Basically {@link #isCompatibleProjectionOf(Schema, Schema)} but type promotion in the
-   * opposite direction
-   */
-  public static boolean isValidEvolutionOf(Schema sourceSchema, Schema targetSchema) {
-    return (sourceSchema.getType() == Schema.Type.NULL) || isProjectionOfInternal(sourceSchema, targetSchema,
-        AvroSchemaUtils::isAtomicSchemasCompatibleEvolution);
-  }
-
-  /**
-   * Establishes whether {@code newReaderSchema} is compatible w/ {@code prevWriterSchema}, as
-   * defined by Avro's {@link AvroSchemaCompatibility}.
-   * {@code newReaderSchema} is considered compatible to {@code prevWriterSchema}, iff data written using {@code prevWriterSchema}
-   * could be read by {@code newReaderSchema}
-   * @param newReaderSchema new reader schema instance.
-   * @param prevWriterSchema prev writer schema instance.
-   * @return true if its compatible. else false.
-   */
-  private static boolean isAtomicSchemasCompatibleEvolution(Schema newReaderSchema, Schema prevWriterSchema) {
-    // NOTE: Checking for compatibility of atomic types, we should ignore their
-    //       corresponding fully-qualified names (as irrelevant)
-    return isSchemaCompatible(prevWriterSchema, newReaderSchema, false, true);
-  }
-
   /**
    * Validate whether the {@code targetSchema} is a "compatible" projection of {@code sourceSchema}.
    * Only difference of this method from {@link #isStrictProjectionOf(Schema, Schema)} is
@@ -337,7 +317,7 @@ public static Schema resolveUnionSchema(Schema schema, String fieldSchemaFullNam
             .orElse(null);
 
     if (nonNullType == null) {
-      throw new AvroRuntimeException(
+      throw new HoodieAvroSchemaException(
           String.format("Unsupported Avro UNION type %s: Only UNION of a null type and a non-null type is supported", schema));
     }
 
@@ -369,14 +349,14 @@ public static Schema resolveNullableSchema(Schema schema) {
     List<Schema> innerTypes = schema.getTypes();
 
     if (innerTypes.size() != 2) {
-      throw new AvroRuntimeException(
+      throw new HoodieAvroSchemaException(
           String.format("Unsupported Avro UNION type %s: Only UNION of a null type and a non-null type is supported", schema));
     }
     Schema firstInnerType = innerTypes.get(0);
     Schema secondInnerType = innerTypes.get(1);
     if ((firstInnerType.getType() != Schema.Type.NULL && secondInnerType.getType() != Schema.Type.NULL)
         || (firstInnerType.getType() == Schema.Type.NULL && secondInnerType.getType() == Schema.Type.NULL)) {
-      throw new AvroRuntimeException(
+      throw new HoodieAvroSchemaException(
           String.format("Unsupported Avro UNION type %s: Only UNION of a null type and a non-null type is supported", schema));
     }
     return firstInnerType.getType() == Schema.Type.NULL ? secondInnerType : firstInnerType;
@@ -428,25 +408,118 @@ public static void checkSchemaCompatible(
       boolean allowProjection,
       Set<String> dropPartitionColNames) throws SchemaCompatibilityException {
 
-    String errorMessage = null;
-
-    if (!allowProjection && !canProject(tableSchema, writerSchema, dropPartitionColNames)) {
-      errorMessage = "Column dropping is not allowed";
+    if (!allowProjection) {
+      List<Schema.Field> missingFields = findMissingFields(tableSchema, writerSchema, dropPartitionColNames);
+      if (!missingFields.isEmpty()) {
+        throw new MissingSchemaFieldException(missingFields.stream().map(Schema.Field::name).collect(Collectors.toList()), writerSchema, tableSchema);
+      }
     }
 
     // TODO(HUDI-4772) re-enable validations in case partition columns
     //                 being dropped from the data-file after fixing the write schema
-    if (dropPartitionColNames.isEmpty() && shouldValidate && !isSchemaCompatible(tableSchema, writerSchema)) {
-      errorMessage = "Failed schema compatibility check";
+    if (dropPartitionColNames.isEmpty() && shouldValidate) {
+      AvroSchemaCompatibility.SchemaPairCompatibility result =
+          AvroSchemaCompatibility.checkReaderWriterCompatibility(writerSchema, tableSchema, true);
+      if (result.getType() != AvroSchemaCompatibility.SchemaCompatibilityType.COMPATIBLE) {
+        throw new SchemaBackwardsCompatibilityException(result, writerSchema, tableSchema);
+      }
     }
+  }
 
-    if (errorMessage != null) {
-      String errorDetails = String.format(
-          "%s\nwriterSchema: %s\ntableSchema: %s",
-          errorMessage,
-          writerSchema,
-          tableSchema);
-      throw new SchemaCompatibilityException(errorDetails);
+  /**
+   * Validate whether the {@code incomingSchema} is a valid evolution of {@code tableSchema}.
+   *
+   * @param incomingSchema schema of the incoming dataset
+   * @param tableSchema latest table schema
+   */
+  public static void checkValidEvolution(Schema incomingSchema, Schema tableSchema) {
+    if (incomingSchema.getType() == Schema.Type.NULL) {
+      return;
     }
+
+    //not really needed for `hoodie.write.set.null.for.missing.columns` but good to check anyway
+    List<String> missingFields = new ArrayList<>();
+    findAnyMissingFields(incomingSchema, tableSchema, new ArrayDeque<>(), missingFields);
+    if (!missingFields.isEmpty()) {
+      throw new MissingSchemaFieldException(missingFields, incomingSchema, tableSchema);
+    }
+
+    //make sure that the table schema can be read using the incoming schema
+    AvroSchemaCompatibility.SchemaPairCompatibility result =
+        AvroSchemaCompatibility.checkReaderWriterCompatibility(incomingSchema, tableSchema, false);
+    if (result.getType() != AvroSchemaCompatibility.SchemaCompatibilityType.COMPATIBLE) {
+      throw new SchemaBackwardsCompatibilityException(result, incomingSchema, tableSchema);
+    }
+  }
+
+  /**
+   * Find all fields in the latest table schema that are not in
+   * the incoming schema.
+   */
+  private static void findAnyMissingFields(Schema incomingSchema,
+                                           Schema latestTableSchema,
+                                           Deque<String> visited,
+                                           List<String> missingFields) {
+    findAnyMissingFieldsRec(incomingSchema, latestTableSchema, visited,
+        missingFields, incomingSchema, latestTableSchema);
+  }
+
+  /**
+   * We want to pass the full schemas so that the error message has the entire schema to print from
+   */
+  private static void findAnyMissingFieldsRec(Schema incomingSchema,
+                                              Schema latestTableSchema,
+                                              Deque<String> visited,
+                                              List<String> missingFields,
+                                              Schema fullIncomingSchema,
+                                              Schema fullTableSchema) {
+    if (incomingSchema.getType() == latestTableSchema.getType()) {
+      if (incomingSchema.getType() == Schema.Type.RECORD) {
+        visited.addLast(latestTableSchema.getName());
+        for (Schema.Field targetField : latestTableSchema.getFields()) {
+          visited.addLast(targetField.name());
+          Schema.Field sourceField = incomingSchema.getField(targetField.name());
+          if (sourceField == null) {
+            missingFields.add(String.join(".", visited));
+          } else {
+            findAnyMissingFieldsRec(sourceField.schema(), targetField.schema(), visited,
+                missingFields, fullIncomingSchema, fullTableSchema);
+          }
+          visited.removeLast();
+        }
+        visited.removeLast();
+      } else if (incomingSchema.getType() == Schema.Type.ARRAY) {
+        visited.addLast("element");
+        findAnyMissingFieldsRec(incomingSchema.getElementType(), latestTableSchema.getElementType(),
+            visited, missingFields, fullIncomingSchema, fullTableSchema);
+        visited.removeLast();
+      } else if (incomingSchema.getType() == Schema.Type.MAP) {
+        visited.addLast("value");
+        findAnyMissingFieldsRec(incomingSchema.getValueType(), latestTableSchema.getValueType(),
+            visited, missingFields, fullIncomingSchema, fullTableSchema);
+        visited.removeLast();
+      } else if (incomingSchema.getType() == Schema.Type.UNION) {
+        List<Schema> incomingNestedSchemas = incomingSchema.getTypes();
+        List<Schema> latestTableNestedSchemas = latestTableSchema.getTypes();
+        if (incomingNestedSchemas.size() != latestTableNestedSchemas.size()) {
+          throw new InvalidUnionTypeException(createSchemaErrorString(
+              String.format("Incoming batch field '%s' has union with %d types, while the table schema has %d types",
+              String.join(".", visited), incomingNestedSchemas.size(), latestTableNestedSchemas.size()), fullIncomingSchema, fullTableSchema));
+        }
+        if (incomingNestedSchemas.size() > 2) {
+          throw new InvalidUnionTypeException(createSchemaErrorString(
+              String.format("Union for incoming batch field '%s' should not have more than 2 types but has %d",
+              String.join(".", visited), incomingNestedSchemas.size()), fullIncomingSchema, fullTableSchema));
+        }
+        for (int i = 0; i < incomingNestedSchemas.size(); ++i) {
+          findAnyMissingFieldsRec(incomingNestedSchemas.get(i), latestTableNestedSchemas.get(i), visited,
+              missingFields, fullIncomingSchema, fullTableSchema);
+        }
+      }
+    }
+  }
+
+  public static String createSchemaErrorString(String errorMessage, Schema writerSchema, Schema tableSchema) {
+    return String.format("%s\nwriterSchema: %s\ntableSchema: %s", errorMessage, writerSchema, tableSchema);
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index b352099cb1e1..2172c7b1ae0f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -36,6 +36,7 @@
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieAvroSchemaException;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.SchemaCompatibilityException;
@@ -196,6 +197,20 @@ public static String avroToJsonString(GenericRecord record, boolean pretty) thro
     return avroToJsonHelper(record, pretty).toString();
   }
 
+  /**
+   * Convert a given avro record to a JSON string. If the record contents are invalid, return the record.toString().
+   * Use this method over {@link HoodieAvroUtils#avroToJsonString} when simply trying to print the record contents without any guarantees around their correctness.
+   * @param record The GenericRecord to convert
+   * @return a JSON string
+   */
+  public static String safeAvroToJsonString(GenericRecord record) {
+    try {
+      return avroToJsonString(record, false);
+    } catch (Exception e) {
+      return record.toString();
+    }
+  }
+
   /**
    * Convert a given avro record to json and return the encoded bytes.
    *
@@ -931,7 +946,9 @@ private static Object rewriteRecordWithNewSchema(Object oldRecord, Schema oldAvr
   private static Object rewriteRecordWithNewSchemaInternal(Object oldRecord, Schema oldSchema, Schema newSchema, Map<String, String> renameCols, Deque<String> fieldNames) {
     switch (newSchema.getType()) {
       case RECORD:
-        ValidationUtils.checkArgument(oldRecord instanceof IndexedRecord, "cannot rewrite record with different type");
+        if (!(oldRecord instanceof IndexedRecord)) {
+          throw new SchemaCompatibilityException("cannot rewrite record with different type");
+        }
         IndexedRecord indexedRecord = (IndexedRecord) oldRecord;
         List<Schema.Field> fields = newSchema.getFields();
         GenericData.Record newRecord = new GenericData.Record(newSchema);
@@ -963,15 +980,17 @@ private static Object rewriteRecordWithNewSchemaInternal(Object oldRecord, Schem
         }
         return newRecord;
       case ENUM:
-        ValidationUtils.checkArgument(
-            oldSchema.getType() == Schema.Type.STRING || oldSchema.getType() == Schema.Type.ENUM,
-            "Only ENUM or STRING type can be converted ENUM type");
+        if (oldSchema.getType() != Schema.Type.STRING && oldSchema.getType() != Schema.Type.ENUM) {
+          throw new SchemaCompatibilityException(String.format("Only ENUM or STRING type can be converted ENUM type. Schema type was %s", oldSchema.getType().getName()));
+        }
         if (oldSchema.getType() == Schema.Type.STRING) {
           return new GenericData.EnumSymbol(newSchema, oldRecord);
         }
         return oldRecord;
       case ARRAY:
-        ValidationUtils.checkArgument(oldRecord instanceof Collection, "cannot rewrite record with different type");
+        if (!(oldRecord instanceof Collection)) {
+          throw new SchemaCompatibilityException(String.format("Cannot rewrite %s as an array", oldRecord.getClass().getName()));
+        }
         Collection array = (Collection) oldRecord;
         List<Object> newArray = new ArrayList<>(array.size());
         fieldNames.push("element");
@@ -981,7 +1000,9 @@ private static Object rewriteRecordWithNewSchemaInternal(Object oldRecord, Schem
         fieldNames.pop();
         return newArray;
       case MAP:
-        ValidationUtils.checkArgument(oldRecord instanceof Map, "cannot rewrite record with different type");
+        if (!(oldRecord instanceof Map)) {
+          throw new SchemaCompatibilityException(String.format("Cannot rewrite %s as a map", oldRecord.getClass().getName()));
+        }
         Map<Object, Object> map = (Map<Object, Object>) oldRecord;
         Map<Object, Object> newMap = new HashMap<>(map.size(), 1.0f);
         fieldNames.push("value");
@@ -1029,7 +1050,7 @@ private static Object rewritePrimaryType(Object oldValue, Schema oldSchema, Sche
               BigDecimal bd = new BigDecimal(new BigInteger(bytes), decimal.getScale()).setScale(((Decimal) newSchema.getLogicalType()).getScale());
               return DECIMAL_CONVERSION.toFixed(bd, newSchema, newSchema.getLogicalType());
             } else {
-              throw new UnsupportedOperationException("Fixed type size change is not currently supported");
+              throw new HoodieAvroSchemaException("Fixed type size change is not currently supported");
             }
           }
 
@@ -1045,7 +1066,7 @@ private static Object rewritePrimaryType(Object oldValue, Schema oldSchema, Sche
           }
 
         default:
-          throw new AvroRuntimeException("Unknown schema type: " + newSchema.getType());
+          throw new HoodieAvroSchemaException("Unknown schema type: " + newSchema.getType());
       }
     } else {
       return rewritePrimaryTypeWithDiffSchemaType(oldValue, oldSchema, newSchema);
@@ -1130,7 +1151,7 @@ private static Object rewritePrimaryTypeWithDiffSchemaType(Object oldValue, Sche
         break;
       default:
     }
-    throw new AvroRuntimeException(String.format("cannot support rewrite value for schema type: %s since the old schema type is: %s", newSchema, oldSchema));
+    throw new HoodieAvroSchemaException(String.format("cannot support rewrite value for schema type: %s since the old schema type is: %s", newSchema, oldSchema));
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
index ac93de2d58fb..7ef766a2a3c5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
@@ -199,7 +199,7 @@ public String toString() {
   }
 
   /**
-   * @return size of the the bloomfilter
+   * @return size of the bloomfilter
    */
   public int getVectorSize() {
     return this.vectorSize;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java
index 37ae6e68f73a..f14d301ae3b3 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java
@@ -136,7 +136,7 @@ public int hashCode() {
   /**
    * Serialize the fields of this object to <code>out</code>.
    *
-   * @param out <code>DataOuput</code> to serialize this object into.
+   * @param out <code>DataOutput</code> to serialize this object into.
    * @throws IOException
    */
   public void write(DataOutput out) throws IOException {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java
index 8a1dbf04b3b5..1a4c2e317807 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java
@@ -72,15 +72,6 @@ public class HoodieCommonConfig extends HoodieConfig {
           + "This enables us, to always extend the table's schema during evolution and never lose the data (when, for "
           + "ex, existing column is being dropped in a new batch)");
 
-  public static final ConfigProperty<Boolean> MAKE_NEW_COLUMNS_NULLABLE = ConfigProperty
-      .key("hoodie.datasource.write.new.columns.nullable")
-      .defaultValue(false)
-      .markAdvanced()
-      .sinceVersion("0.14.0")
-      .withDocumentation("When a non-nullable column is added to datasource during a write operation, the write "
-          + " operation will fail schema compatibility check. Set this option to true will make the newly added "
-          + " column nullable to successfully complete the write operation.");
-
   public static final ConfigProperty<String> SET_NULL_FOR_MISSING_COLUMNS = ConfigProperty
       .key("hoodie.write.set.null.for.missing.columns")
       .defaultValue("false")
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java
index d68b8326ca8c..f3ad183def43 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java
@@ -226,6 +226,17 @@ public class HoodieStorageConfig extends HoodieConfig {
           + "and it is loaded at runtime. This is only required when trying to "
           + "override the existing write context.");
 
+  public static final ConfigProperty<String> HOODIE_PARQUET_SPARK_ROW_WRITE_SUPPORT_CLASS = ConfigProperty
+      .key("hoodie.parquet.spark.row.write.support.class")
+      .defaultValue("org.apache.hudi.io.storage.row.HoodieRowParquetWriteSupport")
+      .markAdvanced()
+      .sinceVersion("0.15.0")
+      .withDocumentation("Provided write support class should extend HoodieRowParquetWriteSupport class "
+          + "and it is loaded at runtime. This is only required when trying to "
+          + "override the existing write context when `hoodie.datasource.write.row.writer.enable=true`.");
+
+
+
   /**
    * @deprecated Use {@link #PARQUET_MAX_FILE_SIZE} and its methods instead
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/data/HoodieBaseListData.java b/hudi-common/src/main/java/org/apache/hudi/common/data/HoodieBaseListData.java
index 7bc276b36e67..6f3dbfcef993 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/data/HoodieBaseListData.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/data/HoodieBaseListData.java
@@ -53,7 +53,7 @@ protected Stream<T> asStream() {
 
   protected boolean isEmpty() {
     if (lazy) {
-      return data.asLeft().findAny().isPresent();
+      return !data.asLeft().findAny().isPresent();
     } else {
       return data.asRight().isEmpty();
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java
index 9c556e84f82c..f230afed30f8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java
@@ -245,4 +245,11 @@ public long extractRecordPosition(T record, Schema schema, String fieldName, lon
     }
     return providedPositionIfNeeded;
   }
+
+  /**
+   * Constructs engine specific delete record.
+   */
+  public T constructRawDeleteRecord(Map<String, Object> metadata) {
+    return null;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index d84c677e3418..44e153dcce0a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -142,12 +142,6 @@ public static String makeWriteToken(int taskPartitionId, int stageId, long taskA
     return String.format("%d-%d-%d", taskPartitionId, stageId, taskAttemptId);
   }
 
-  // TODO: this should be removed
-  public static String makeBaseFileName(String instantTime, String writeToken, String fileId) {
-    return String.format("%s_%s_%s%s", fileId, writeToken, instantTime,
-        HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension());
-  }
-
   public static String makeBaseFileName(String instantTime, String writeToken, String fileId, String fileExtension) {
     return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, fileExtension);
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareDataOutputStream.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareDataOutputStream.java
index 1cc3da6fe3cb..350665d2521c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareDataOutputStream.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareDataOutputStream.java
@@ -55,7 +55,7 @@ public void write(byte[] v) throws IOException {
   }
 
   public void write(byte[] v, int offset, int len) throws IOException {
-    size.addAndGet(len + offset);
+    size.addAndGet((long) len + offset);
     outputStream.write(v, offset, len);
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
index daa1dcb0207f..a3e6ce1f1331 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
@@ -37,9 +37,11 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
+ * Default payload.
  * {@link HoodieRecordPayload} impl that honors ordering field in both preCombine and combineAndGetUpdateValue.
  * <p>
- * 1. preCombine - Picks the latest delta record for a key, based on an ordering field 2. combineAndGetUpdateValue/getInsertValue - Chooses the latest record based on ordering field value.
+ * 1. preCombine - Picks the latest delta record for a key, based on an ordering field
+ * 2. combineAndGetUpdateValue/getInsertValue - Chooses the latest record based on ordering field value.
  */
 public class DefaultHoodieRecordPayload extends OverwriteWithLatestAvroPayload {
   public static final String METADATA_EVENT_TIME_KEY = "metadata.event_time.key";
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/FirstValueAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/FirstValueAvroPayload.java
new file mode 100644
index 000000000000..33da44e3bccd
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/FirstValueAvroPayload.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.util.ConfigUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+
+import java.util.Properties;
+
+/**
+ * Payload clazz that is used for Hudi Table.
+ *
+ * <p>Simplified FirstValueAvroPayload Logic:
+ * <pre>
+ *
+ *  Illustration with simple data.
+ *  the order field is 'ts', recordkey is 'id' and schema is :
+ *  {
+ *    [
+ *      {"name":"id","type":"string"},
+ *      {"name":"ts","type":"long"},
+ *      {"name":"name","type":"string"},
+ *      {"name":"price","type":"string"}
+ *    ]
+ *  }
+ *
+ *  case 1
+ *  Current data:
+ *      id      ts      name    price
+ *      1       1       name_1  price_1
+ *  Insert data:
+ *      id      ts      name    price
+ *      1       1       name_2  price_2
+ *
+ *  Result data after #preCombine or #combineAndGetUpdateValue:
+ *      id      ts      name    price
+ *      1       1       name_1  price_1
+ *
+ *  If precombine is the same, would keep the first one record
+ *
+ *  case 2
+ *  Current data:
+ *      id      ts      name    price
+ *      1       1       name_1  price_1
+ *  Insert data:
+ *      id      ts      name    price
+ *      1       2       name_2  price_2
+ *
+ *  Result data after preCombine or combineAndGetUpdateValue:
+ *      id      ts      name    price
+ *      1       2       name_2  price_2
+ *
+ *  The other functionalities are inherited from DefaultHoodieRecordPayload.
+ * </pre>
+ */
+public class FirstValueAvroPayload extends DefaultHoodieRecordPayload {
+
+  public FirstValueAvroPayload(GenericRecord record, Comparable orderingVal) {
+    super(record, orderingVal);
+  }
+
+  public FirstValueAvroPayload(Option<GenericRecord> record) {
+    super(record);
+  }
+
+  @Override
+  public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload oldValue) {
+    if (oldValue.recordBytes.length == 0) {
+      // use natural order for delete record
+      return this;
+    }
+    if (oldValue.orderingVal.compareTo(orderingVal) >= 0) {
+      // pick the payload with greatest ordering value
+      return oldValue;
+    } else {
+      return this;
+    }
+  }
+
+  @Override
+  protected boolean needUpdatingPersistedRecord(IndexedRecord currentValue,
+                                                IndexedRecord incomingRecord, Properties properties) {
+    /*
+     * Combining strategy here returns currentValue on disk if incoming record is older absolutely.
+     * The incoming record can be either a delete (sent as an upsert with _hoodie_is_deleted set to true)
+     * or an insert/update record. In any case, if it is older absolutely than the record in disk, the currentValue
+     * in disk is returned (to be rewritten with new commit time).
+     */
+    String orderField = ConfigUtils.getOrderingField(properties);
+    if (orderField == null) {
+      return true;
+    }
+    boolean consistentLogicalTimestampEnabled = Boolean.parseBoolean(properties.getProperty(
+            KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
+            KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()));
+    Object persistedOrderingVal = HoodieAvroUtils.getNestedFieldVal((GenericRecord) currentValue,
+            orderField,
+            true, consistentLogicalTimestampEnabled);
+    Comparable incomingOrderingVal = (Comparable) HoodieAvroUtils.getNestedFieldVal((GenericRecord) incomingRecord,
+            orderField,
+            true, consistentLogicalTimestampEnabled);
+    return persistedOrderingVal == null || ((Comparable) persistedOrderingVal).compareTo(incomingOrderingVal) < 0;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
index 4fee7cdcb6ea..0593e280e6f9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
@@ -94,6 +94,20 @@ public Option<Map<String, HoodieColumnRangeMetadata<Comparable>>> getColumnStats
     return recordsStats;
   }
 
+  /**
+   * Make a new write status and copy basic fields from current object
+   * @return copy write status
+   */
+  public HoodieDeltaWriteStat copy() {
+    HoodieDeltaWriteStat copy = new HoodieDeltaWriteStat();
+    copy.setFileId(getFileId());
+    copy.setPartitionPath(getPartitionPath());
+    copy.setPrevCommit(getPrevCommit());
+    copy.setBaseFile(getBaseFile());
+    copy.setLogFiles(new ArrayList<>(getLogFiles()));
+    return copy;
+  }
+
   private static Map<String, HoodieColumnRangeMetadata<Comparable>> mergeRecordsStats(
       Map<String, HoodieColumnRangeMetadata<Comparable>> stats1,
       Map<String, HoodieColumnRangeMetadata<Comparable>> stats2) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
index d9fbd4cba05c..dac9b8288969 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
@@ -30,8 +30,6 @@
 import java.util.Objects;
 
 /**
- * Default payload.
- *
  * <ol>
  * <li> preCombine - Picks the latest delta record for a key, based on an ordering field;
  * <li> combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/RecordPayloadType.java b/hudi-common/src/main/java/org/apache/hudi/common/model/RecordPayloadType.java
index d1eae004dc51..953a79348b7d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/RecordPayloadType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/RecordPayloadType.java
@@ -49,7 +49,7 @@ public enum RecordPayloadType {
   @EnumFieldDescription("Subclass of OVERWRITE_LATEST_AVRO used for delta streamer.")
   OVERWRITE_NON_DEF_LATEST_AVRO(OverwriteNonDefaultsWithLatestAvroPayload.class.getName()),
 
-  @EnumFieldDescription("Default payload used for delta streamer.")
+  @EnumFieldDescription("Honors ordering field in preCombine and overwrites storage with latest delta record in combineAndGetUpdateValue")
   OVERWRITE_LATEST_AVRO(OverwriteWithLatestAvroPayload.class.getName()),
 
   @EnumFieldDescription("Used for partial update to Hudi Table.")
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
index f70f9456a868..3cac820d4a81 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
@@ -24,12 +24,12 @@
 import org.apache.hudi.common.config.OrderedProperties;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.BootstrapIndexType;
+import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordMerger;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieTimelineTimeZone;
-import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.model.RecordPayloadType;
 import org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode;
 import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
@@ -167,14 +167,14 @@ public class HoodieTableConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
       .key("hoodie.compaction.payload.class")
-      .defaultValue(OverwriteWithLatestAvroPayload.class.getName())
+      .defaultValue(DefaultHoodieRecordPayload.class.getName())
       .deprecatedAfter("1.0.0")
       .withDocumentation("Payload class to use for performing compactions, i.e merge delta logs with current base file and then "
           + " produce a new base file.");
 
   public static final ConfigProperty<String> PAYLOAD_TYPE = ConfigProperty
       .key("hoodie.compaction.payload.type")
-      .defaultValue(RecordPayloadType.OVERWRITE_LATEST_AVRO.name())
+      .defaultValue(RecordPayloadType.HOODIE_AVRO_DEFAULT.name())
       .sinceVersion("1.0.0")
       .withDocumentation(RecordPayloadType.class);
 
@@ -747,52 +747,52 @@ public boolean isMetadataPartitionAvailable(MetadataPartitionType partition) {
   /**
    * Enables or disables the specified metadata table partition.
    *
-   * @param partitionType The partition
+   * @param partitionPath The partition
    * @param enabled       If true, the partition is enabled, else disabled
    */
-  public void setMetadataPartitionState(HoodieTableMetaClient metaClient, MetadataPartitionType partitionType, boolean enabled) {
-    ValidationUtils.checkArgument(!partitionType.getPartitionPath().contains(CONFIG_VALUES_DELIMITER),
-        "Metadata Table partition path cannot contain a comma: " + partitionType.getPartitionPath());
+  public void setMetadataPartitionState(HoodieTableMetaClient metaClient, String partitionPath, boolean enabled) {
+    ValidationUtils.checkArgument(!partitionPath.contains(CONFIG_VALUES_DELIMITER),
+        "Metadata Table partition path cannot contain a comma: " + partitionPath);
     Set<String> partitions = getMetadataPartitions();
     Set<String> partitionsInflight = getMetadataPartitionsInflight();
     if (enabled) {
-      partitions.add(partitionType.getPartitionPath());
-      partitionsInflight.remove(partitionType.getPartitionPath());
-    } else if (partitionType.equals(MetadataPartitionType.FILES)) {
+      partitions.add(partitionPath);
+      partitionsInflight.remove(partitionPath);
+    } else if (partitionPath.equals(MetadataPartitionType.FILES.getPartitionPath())) {
       // file listing partition is required for all other partitions to work
       // Disabling file partition will also disable all partitions
       partitions.clear();
       partitionsInflight.clear();
     } else {
-      partitions.remove(partitionType.getPartitionPath());
-      partitionsInflight.remove(partitionType.getPartitionPath());
+      partitions.remove(partitionPath);
+      partitionsInflight.remove(partitionPath);
     }
     setValue(TABLE_METADATA_PARTITIONS, partitions.stream().sorted().collect(Collectors.joining(CONFIG_VALUES_DELIMITER)));
     setValue(TABLE_METADATA_PARTITIONS_INFLIGHT, partitionsInflight.stream().sorted().collect(Collectors.joining(CONFIG_VALUES_DELIMITER)));
     update(metaClient.getFs(), new Path(metaClient.getMetaPath()), getProps());
-    LOG.info(String.format("MDT %s partition %s has been %s", metaClient.getBasePathV2(), partitionType.name(), enabled ? "enabled" : "disabled"));
+    LOG.info(String.format("MDT %s partition %s has been %s", metaClient.getBasePathV2(), partitionPath, enabled ? "enabled" : "disabled"));
   }
 
   /**
    * Enables the specified metadata table partition as inflight.
    *
-   * @param partitionTypes The list of partitions to enable as inflight.
+   * @param partitionPaths The list of partitions to enable as inflight.
    */
-  public void setMetadataPartitionsInflight(HoodieTableMetaClient metaClient, List<MetadataPartitionType> partitionTypes) {
+  public void setMetadataPartitionsInflight(HoodieTableMetaClient metaClient, List<String> partitionPaths) {
     Set<String> partitionsInflight = getMetadataPartitionsInflight();
-    partitionTypes.forEach(t -> {
-      ValidationUtils.checkArgument(!t.getPartitionPath().contains(CONFIG_VALUES_DELIMITER),
-          "Metadata Table partition path cannot contain a comma: " + t.getPartitionPath());
-      partitionsInflight.add(t.getPartitionPath());
+    partitionPaths.forEach(partitionPath -> {
+      ValidationUtils.checkArgument(!partitionPath.contains(CONFIG_VALUES_DELIMITER),
+          "Metadata Table partition path cannot contain a comma: " + partitionPath);
+      partitionsInflight.add(partitionPath);
     });
 
     setValue(TABLE_METADATA_PARTITIONS_INFLIGHT, partitionsInflight.stream().sorted().collect(Collectors.joining(CONFIG_VALUES_DELIMITER)));
     update(metaClient.getFs(), new Path(metaClient.getMetaPath()), getProps());
-    LOG.info(String.format("MDT %s partitions %s have been set to inflight", metaClient.getBasePathV2(), partitionTypes));
+    LOG.info(String.format("MDT %s partitions %s have been set to inflight", metaClient.getBasePathV2(), partitionPaths));
   }
 
   public void setMetadataPartitionsInflight(HoodieTableMetaClient metaClient, MetadataPartitionType... partitionTypes) {
-    setMetadataPartitionsInflight(metaClient, Arrays.stream(partitionTypes).collect(Collectors.toList()));
+    setMetadataPartitionsInflight(metaClient, Arrays.stream(partitionTypes).map(MetadataPartitionType::getPartitionPath).collect(Collectors.toList()));
   }
 
   /**
@@ -800,7 +800,7 @@ public void setMetadataPartitionsInflight(HoodieTableMetaClient metaClient, Meta
    * {@link HoodieTableConfig#TABLE_METADATA_PARTITIONS_INFLIGHT}.
    */
   public void clearMetadataPartitions(HoodieTableMetaClient metaClient) {
-    setMetadataPartitionState(metaClient, MetadataPartitionType.FILES, false);
+    setMetadataPartitionState(metaClient, MetadataPartitionType.FILES.getPartitionPath(), false);
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
index a2455e08356b..6468d165568d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
@@ -636,11 +636,11 @@ private static void initTableMetaClient(Configuration hadoopConf, String basePat
       fs.mkdirs(auxiliaryFolder);
     }
 
-    initializeBootstrapDirsIfNotExists(hadoopConf, basePath, fs);
+    initializeBootstrapDirsIfNotExists(basePath, fs);
     HoodieTableConfig.create(fs, metaPathDir, props);
   }
 
-  public static void initializeBootstrapDirsIfNotExists(Configuration hadoopConf, String basePath, FileSystem fs) throws IOException {
+  public static void initializeBootstrapDirsIfNotExists(String basePath, FileSystem fs) throws IOException {
 
     // Create bootstrap index by partition folder if it does not exist
     final Path bootstrap_index_folder_by_partition =
@@ -801,7 +801,7 @@ public String toString() {
   }
 
   public void initializeBootstrapDirsIfNotExists() throws IOException {
-    initializeBootstrapDirsIfNotExists(getHadoopConf(), basePath.toString(), getFs());
+    initializeBootstrapDirsIfNotExists(basePath.toString(), getFs());
   }
 
   private static HoodieTableMetaClient newMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
index a8f46c416f9d..9ab8ef52d19d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -37,8 +37,8 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.exception.HoodieIncompatibleSchemaException;
 import org.apache.hudi.exception.InvalidTableException;
+import org.apache.hudi.internal.schema.HoodieSchemaException;
 import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
 import org.apache.hudi.internal.schema.utils.SerDeHelper;
@@ -571,7 +571,7 @@ public static Schema appendPartitionColumns(Schema dataSchema, Option<String[]>
     boolean hasPartitionColNotInSchema = Arrays.stream(partitionFields.get()).anyMatch(pf -> !containsFieldInSchema(dataSchema, pf));
     boolean hasPartitionColInSchema = Arrays.stream(partitionFields.get()).anyMatch(pf -> containsFieldInSchema(dataSchema, pf));
     if (hasPartitionColNotInSchema && hasPartitionColInSchema) {
-      throw new HoodieIncompatibleSchemaException("Partition columns could not be partially contained w/in the data schema");
+      throw new HoodieSchemaException("Partition columns could not be partially contained w/in the data schema");
     }
 
     if (hasPartitionColNotInSchema) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupRecordBuffer.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupRecordBuffer.java
index ccc001e79c91..d9ba8bcd90eb 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupRecordBuffer.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupRecordBuffer.java
@@ -34,8 +34,12 @@
 
 public interface HoodieFileGroupRecordBuffer<T> {
   enum BufferType {
-    KEY_BASED,
-    POSITION_BASED
+    // Merging based on record key.
+    KEY_BASED_MERGE,
+    // Merging based on record position.
+    POSITION_BASED_MERGE,
+    // No Merging at all.
+    UNMERGED
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieKeyBasedFileGroupRecordBuffer.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieKeyBasedFileGroupRecordBuffer.java
index b4e32be8c656..0430a42e8639 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieKeyBasedFileGroupRecordBuffer.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieKeyBasedFileGroupRecordBuffer.java
@@ -65,7 +65,7 @@ public HoodieKeyBasedFileGroupRecordBuffer(HoodieReaderContext<T> readerContext,
 
   @Override
   public BufferType getBufferType() {
-    return BufferType.KEY_BASED;
+    return BufferType.KEY_BASED_MERGE;
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodiePositionBasedFileGroupRecordBuffer.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodiePositionBasedFileGroupRecordBuffer.java
index 4412713928ff..50e969343e15 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodiePositionBasedFileGroupRecordBuffer.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodiePositionBasedFileGroupRecordBuffer.java
@@ -72,7 +72,7 @@ public HoodiePositionBasedFileGroupRecordBuffer(HoodieReaderContext<T> readerCon
 
   @Override
   public BufferType getBufferType() {
-    return BufferType.POSITION_BASED;
+    return BufferType.POSITION_BASED_MERGE;
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieUnmergedFileGroupRecordBuffer.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieUnmergedFileGroupRecordBuffer.java
new file mode 100644
index 000000000000..76aa28308c44
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieUnmergedFileGroupRecordBuffer.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.common.table.read;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.engine.HoodieReaderContext;
+import org.apache.hudi.common.model.DeleteRecord;
+import org.apache.hudi.common.model.HoodieRecordMerger;
+import org.apache.hudi.common.table.log.KeySpec;
+import org.apache.hudi.common.table.log.block.HoodieDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.ClosableIterator;
+import org.apache.hudi.common.util.collection.ExternalSpillableMap;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+
+import org.apache.avro.Schema;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+
+public class HoodieUnmergedFileGroupRecordBuffer<T> extends HoodieBaseFileGroupRecordBuffer<T> {
+  // Used to order the records in the record map.
+  private Long putIndex = 0L;
+  private Long getIndex = 0L;
+
+  public HoodieUnmergedFileGroupRecordBuffer(
+      HoodieReaderContext<T> readerContext,
+      Schema readerSchema,
+      Schema baseFileSchema,
+      Option<String> partitionNameOverrideOpt,
+      Option<String[]> partitionPathFieldOpt,
+      HoodieRecordMerger recordMerger,
+      TypedProperties payloadProps,
+      long maxMemorySizeInBytes,
+      String spillableMapBasePath,
+      ExternalSpillableMap.DiskMapType diskMapType,
+      boolean isBitCaskDiskMapCompressionEnabled) {
+    super(readerContext, readerSchema, baseFileSchema, partitionNameOverrideOpt, partitionPathFieldOpt,
+        recordMerger, payloadProps, maxMemorySizeInBytes, spillableMapBasePath, diskMapType, isBitCaskDiskMapCompressionEnabled);
+  }
+
+  @Override
+  protected boolean doHasNext() throws IOException {
+    ValidationUtils.checkState(baseFileIterator != null, "Base file iterator has not been set yet");
+
+    // Output from base file first.
+    if (baseFileIterator.hasNext()) {
+      nextRecord = baseFileIterator.next();
+      return true;
+    }
+
+    // Output records based on the index to preserve the order.
+    if (!records.isEmpty()) {
+      Pair<Option<T>, Map<String, Object>> nextRecordInfo = records.remove(getIndex++);
+
+      if (nextRecordInfo == null) {
+        throw new HoodieException("Row index should be continuous!");
+      }
+
+      if (nextRecordInfo.getLeft().isPresent()) {
+        nextRecord = nextRecordInfo.getKey().get();
+      } else {
+        nextRecord = readerContext.constructRawDeleteRecord(nextRecordInfo.getRight());
+      }
+      return true;
+    }
+
+    return false;
+  }
+
+  @Override
+  public Iterator<Pair<Option<T>, Map<String, Object>>> getLogRecordIterator() {
+    return records.values().iterator();
+  }
+
+  @Override
+  public BufferType getBufferType() {
+    return BufferType.UNMERGED;
+  }
+
+  @Override
+  public void processDataBlock(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt) {
+    Pair<ClosableIterator<T>, Schema> recordsIteratorSchemaPair =
+        getRecordsIterator(dataBlock, keySpecOpt);
+    if (dataBlock.containsPartialUpdates()) {
+      throw new HoodieException("Partial update is not supported for unmerged record read");
+    }
+
+    try (ClosableIterator<T> recordIterator = recordsIteratorSchemaPair.getLeft()) {
+      while (recordIterator.hasNext()) {
+        T nextRecord = recordIterator.next();
+        Map<String, Object> metadata = readerContext.generateMetadataForRecord(
+            nextRecord, recordsIteratorSchemaPair.getRight());
+        processNextDataRecord(nextRecord, metadata, putIndex++);
+      }
+    }
+  }
+
+  @Override
+  public void processNextDataRecord(T record, Map<String, Object> metadata, Serializable index) {
+    records.put(index, Pair.of(Option.ofNullable(readerContext.seal(record)), metadata));
+  }
+
+  @Override
+  public void processDeleteBlock(HoodieDeleteBlock deleteBlock) {
+    Iterator<DeleteRecord> it = Arrays.stream(deleteBlock.getRecordsToDelete()).iterator();
+    while (it.hasNext()) {
+      DeleteRecord record = it.next();
+      processNextDeletedRecord(record, putIndex++);
+    }
+  }
+
+  @Override
+  public void processNextDeletedRecord(DeleteRecord deleteRecord, Serializable index) {
+    records.put(index, Pair.of(Option.empty(), readerContext.generateMetadataForRecord(
+        deleteRecord.getRecordKey(), deleteRecord.getPartitionPath(), deleteRecord.getOrderingValue())));
+  }
+
+  @Override
+  public boolean containsLogRecord(String recordKey) {
+    return records.containsKey(recordKey);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/IncrementalQueryAnalyzer.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/IncrementalQueryAnalyzer.java
index 03596354bb1c..ca8ae575898f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/read/IncrementalQueryAnalyzer.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/read/IncrementalQueryAnalyzer.java
@@ -111,6 +111,7 @@ public class IncrementalQueryAnalyzer {
   private final InstantRange.RangeType rangeType;
   private final boolean skipCompaction;
   private final boolean skipClustering;
+  private final boolean skipInsertOverwrite;
   private final int limit;
 
   private IncrementalQueryAnalyzer(
@@ -120,6 +121,7 @@ private IncrementalQueryAnalyzer(
       InstantRange.RangeType rangeType,
       boolean skipCompaction,
       boolean skipClustering,
+      boolean skipInsertOverwrite,
       int limit) {
     this.metaClient = metaClient;
     this.startTime = Option.ofNullable(startTime);
@@ -127,6 +129,7 @@ private IncrementalQueryAnalyzer(
     this.rangeType = rangeType;
     this.skipCompaction = skipCompaction;
     this.skipClustering = skipClustering;
+    this.skipInsertOverwrite = skipInsertOverwrite;
     this.limit = limit;
   }
 
@@ -206,13 +209,13 @@ private static Pair<List<String>, List<String>> splitInstantByActiveness(List<St
 
   private HoodieTimeline getFilteredTimeline(HoodieTableMetaClient metaClient) {
     HoodieTimeline timeline = metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants();
-    return filterInstantsAsPerUserConfigs(metaClient, timeline, this.skipCompaction, this.skipClustering);
+    return filterInstantsAsPerUserConfigs(metaClient, timeline, this.skipCompaction, this.skipClustering, this.skipInsertOverwrite);
   }
 
   private HoodieTimeline getArchivedReadTimeline(HoodieTableMetaClient metaClient, String startInstant) {
     HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline(startInstant, false);
     HoodieTimeline archivedCompleteTimeline = archivedTimeline.getCommitsTimeline().filterCompletedInstants();
-    return filterInstantsAsPerUserConfigs(metaClient, archivedCompleteTimeline, this.skipCompaction, this.skipClustering);
+    return filterInstantsAsPerUserConfigs(metaClient, archivedCompleteTimeline, this.skipCompaction, this.skipClustering, this.skipInsertOverwrite);
   }
 
   /**
@@ -223,14 +226,17 @@ private HoodieTimeline getArchivedReadTimeline(HoodieTableMetaClient metaClient,
    * @return the filtered timeline
    */
   @VisibleForTesting
-  public static HoodieTimeline filterInstantsAsPerUserConfigs(HoodieTableMetaClient metaClient, HoodieTimeline timeline, boolean skipCompaction, boolean skipClustering) {
+  public static HoodieTimeline filterInstantsAsPerUserConfigs(HoodieTableMetaClient metaClient, HoodieTimeline timeline, boolean skipCompaction, boolean skipClustering, boolean skipInsertOverwrite) {
     final HoodieTimeline oriTimeline = timeline;
     if (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ & skipCompaction) {
       // the compaction commit uses 'commit' as action which is tricky
       timeline = timeline.filter(instant -> !instant.getAction().equals(HoodieTimeline.COMMIT_ACTION));
     }
     if (skipClustering) {
-      timeline = timeline.filter(instant -> !ClusteringUtils.isClusteringInstant(instant, oriTimeline));
+      timeline = timeline.filter(instant -> !ClusteringUtils.isCompletedClusteringInstant(instant, oriTimeline));
+    }
+    if (skipInsertOverwrite) {
+      timeline = timeline.filter(instant -> !ClusteringUtils.isInsertOverwriteInstant(instant, oriTimeline));
     }
     return timeline;
   }
@@ -254,6 +260,7 @@ public static class Builder {
     private HoodieTableMetaClient metaClient;
     private boolean skipCompaction = false;
     private boolean skipClustering = false;
+    private boolean skipInsertOverwrite = false;
     /**
      * Maximum number of instants to read per run.
      */
@@ -292,6 +299,11 @@ public Builder skipClustering(boolean skipClustering) {
       return this;
     }
 
+    public Builder skipInsertOverwrite(boolean skipInsertOverwrite) {
+      this.skipInsertOverwrite = skipInsertOverwrite;
+      return this;
+    }
+
     public Builder limit(int limit) {
       this.limit = limit;
       return this;
@@ -299,7 +311,7 @@ public Builder limit(int limit) {
 
     public IncrementalQueryAnalyzer build() {
       return new IncrementalQueryAnalyzer(Objects.requireNonNull(this.metaClient), this.startTime, this.endTime,
-          Objects.requireNonNull(this.rangeType), this.skipCompaction, this.skipClustering, this.limit);
+          Objects.requireNonNull(this.rangeType), this.skipCompaction, this.skipClustering, this.skipInsertOverwrite, this.limit);
     }
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
index c9aba9ebc0dd..d6167ea61e7d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
@@ -72,6 +72,13 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
 
   private static final Logger LOG = LoggerFactory.getLogger(HoodieArchivedTimeline.class);
 
+  /**
+   * Used for loading the archived timeline incrementally, the earliest loaded instant time get memorized
+   * each time the timeline is loaded. The instant time is then used as the end boundary
+   * of the next loading.
+   */
+  private String cursorInstant;
+
   /**
    * Loads all the archived instants.
    * Note that there is no lazy loading, so this may not work if the archived timeline range is really long.
@@ -80,6 +87,7 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
   public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) {
     this.metaClient = metaClient;
     setInstants(this.loadInstants());
+    this.cursorInstant = firstInstant().map(HoodieInstant::getTimestamp).orElse(null);
     // multiple casts will make this lambda serializable -
     // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
     this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
@@ -92,6 +100,7 @@ public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) {
   public HoodieArchivedTimeline(HoodieTableMetaClient metaClient, String startTs) {
     this.metaClient = metaClient;
     setInstants(loadInstants(new StartTsFilter(startTs), LoadMode.METADATA));
+    this.cursorInstant = startTs;
     // multiple casts will make this lambda serializable -
     // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
     this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
@@ -152,6 +161,26 @@ public HoodieArchivedTimeline reload() {
     return new HoodieArchivedTimeline(metaClient);
   }
 
+  /**
+   * Reloads the archived timeline incrementally with given beginning timestamp {@code startTs}.
+   * This method is not thread safe.
+   *
+   * <p>IMPORTANT: this is for multiple loading of one static snapshot of the timeline, if there is new instants got archived,
+   * use {@link #reload()} instead.
+   */
+  public HoodieArchivedTimeline reload(String startTs) {
+    if (this.cursorInstant != null) {
+      if (HoodieTimeline.compareTimestamps(startTs, LESSER_THAN, this.cursorInstant)) {
+        appendInstants(loadInstants(new ClosedOpenTimeRangeFilter(startTs, this.cursorInstant), LoadMode.METADATA));
+        this.cursorInstant = startTs;
+      }
+      return this;
+    } else {
+      // a null cursor instant indicates an empty timeline
+      return new HoodieArchivedTimeline(metaClient, startTs);
+    }
+  }
+
   private HoodieInstant readCommit(String instantTime, GenericRecord record, Option<BiConsumer<String, GenericRecord>> instantDetailsConsumer) {
     final String action = record.get(ACTION_ARCHIVED_META_FIELD).toString();
     final String completionTime = record.get(COMPLETION_TIME_ARCHIVED_META_FIELD).toString();
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
index d08194f266c9..9b231b4ee855 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
@@ -18,23 +18,22 @@
 
 package org.apache.hudi.common.table.timeline;
 
-import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
 import org.apache.hudi.common.util.ClusteringUtils;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieException;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.io.Serializable;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
@@ -75,15 +74,23 @@ public HoodieDefaultTimeline(Stream<HoodieInstant> instants, Function<HoodieInst
 
   public void setInstants(List<HoodieInstant> instants) {
     this.instants = instants;
-    final MessageDigest md;
-    try {
-      md = MessageDigest.getInstance(HASHING_ALGORITHM);
-      this.instants.forEach(i -> md
-          .update(getUTF8Bytes(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()))));
-    } catch (NoSuchAlgorithmException nse) {
-      throw new HoodieException(nse);
+    this.timelineHash = computeTimelineHash(this.instants);
+    clearState();
+  }
+
+  public void appendInstants(List<HoodieInstant> newInstants) {
+    if (newInstants.isEmpty()) {
+      // the new instants is empty, nothing to do.
+      return;
+    }
+    if (this.instants.isEmpty()) {
+      // the existing instants is empty, set up the new ones directly.
+      setInstants(newInstants);
+      return;
     }
-    this.timelineHash = StringUtils.toHexString(md.digest());
+    this.instants = mergeInstants(newInstants, this.instants);
+    this.timelineHash = computeTimelineHash(this.instants);
+    clearState();
   }
 
   /**
@@ -502,25 +509,18 @@ public Option<HoodieInstant> getFirstNonSavepointCommit() {
   }
 
   @Override
-  public Option<HoodieInstant> getLastClusterCommit() {
-    return  Option.fromJavaOptional(getCommitsTimeline().filter(s -> s.getAction().equalsIgnoreCase(HoodieTimeline.REPLACE_COMMIT_ACTION))
+  public Option<HoodieInstant> getLastClusteringInstant() {
+    return Option.fromJavaOptional(getCommitsTimeline().filter(s -> s.getAction().equalsIgnoreCase(HoodieTimeline.REPLACE_COMMIT_ACTION))
         .getReverseOrderedInstants()
-        .filter(i -> {
-          try {
-            HoodieCommitMetadata metadata = TimelineUtils.getCommitMetadata(i, this);
-            return metadata.getOperationType().equals(WriteOperationType.CLUSTER);
-          } catch (IOException e) {
-            LOG.warn("Unable to read commit metadata for " + i + " due to " + e.getMessage());
-            return false;
-          }
-        }).findFirst());
+        .filter(i -> ClusteringUtils.isClusteringInstant(this, i))
+        .findFirst());
   }
 
   @Override
   public Option<HoodieInstant> getLastPendingClusterInstant() {
     return  Option.fromJavaOptional(filterPendingReplaceTimeline()
         .getReverseOrderedInstants()
-        .filter(i -> ClusteringUtils.isPendingClusteringInstant(this, i)).findFirst());
+        .filter(i -> ClusteringUtils.isClusteringInstant(this, i)).findFirst());
   }
 
   @Override
@@ -567,6 +567,11 @@ private static Option<HoodieInstant> findFirstNonSavepointCommit(List<HoodieInst
     return Option.fromJavaOptional(instants.stream().findFirst());
   }
 
+  private void clearState() {
+    instantTimeSet = null;
+    firstNonSavepointCommit = null;
+  }
+
   /**
    * Merge this timeline with the given timeline.
    */
@@ -581,4 +586,42 @@ public HoodieDefaultTimeline mergeTimeline(HoodieDefaultTimeline timeline) {
     };
     return new HoodieDefaultTimeline(instantStream, details);
   }
+
+  /**
+   * Computes the timeline hash and returns.
+   */
+  private String computeTimelineHash(List<HoodieInstant> instants) {
+    final MessageDigest md;
+    try {
+      md = MessageDigest.getInstance(HASHING_ALGORITHM);
+      instants.forEach(i -> md
+          .update(getUTF8Bytes(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()))));
+    } catch (NoSuchAlgorithmException nse) {
+      throw new HoodieException(nse);
+    }
+    return StringUtils.toHexString(md.digest());
+  }
+
+  /**
+   * Merges the given instant list into one and keep the sequence.
+   */
+  private static List<HoodieInstant> mergeInstants(List<HoodieInstant> instants1, List<HoodieInstant> instants2) {
+    ValidationUtils.checkArgument(!instants1.isEmpty() && !instants2.isEmpty(), "The instants to merge can not be empty");
+    // some optimizations are based on the assumption all the instant lists are already sorted.
+    // skip when one list contains all the instants of the other one.
+    final List<HoodieInstant> merged;
+    if (HoodieTimeline.compareTimestamps(instants1.get(instants1.size() - 1).getTimestamp(), LESSER_THAN_OR_EQUALS, instants2.get(0).getTimestamp())) {
+      merged = new ArrayList<>(instants1);
+      merged.addAll(instants2);
+    } else if (HoodieTimeline.compareTimestamps(instants2.get(instants2.size() - 1).getTimestamp(), LESSER_THAN_OR_EQUALS, instants1.get(0).getTimestamp())) {
+      merged = new ArrayList<>(instants2);
+      merged.addAll(instants1);
+    } else {
+      merged = new ArrayList<>(instants1);
+      merged.addAll(instants2);
+      // sort the instants explicitly
+      Collections.sort(merged);
+    }
+    return merged;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java
index a4f4b2cdf24f..efa9c6f120a8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java
@@ -120,7 +120,7 @@ public static String instantTimeMinusMillis(String timestamp, long milliseconds)
     }
   }
 
-  private static String fixInstantTimeCompatibility(String instantTime) {
+  public static String fixInstantTimeCompatibility(String instantTime) {
     // Enables backwards compatibility with non-millisecond granularity instants
     if (isSecondGranularity(instantTime)) {
       // Add milliseconds to the instant in order to parse successfully
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
index ac77e1eb6060..b02e797f23d5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
@@ -397,9 +397,8 @@ public interface HoodieTimeline extends Serializable {
 
   /**
    * get the most recent cluster commit if present
-   *
    */
-  public Option<HoodieInstant> getLastClusterCommit();
+  public Option<HoodieInstant> getLastClusteringInstant();
 
   /**
    * get the most recent pending cluster commit if present
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGeneratorBase.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGeneratorBase.java
index 4acb8d2af54b..c8e56c6b6d87 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGeneratorBase.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGeneratorBase.java
@@ -50,15 +50,7 @@ public abstract class TimeGeneratorBase implements TimeGenerator, Serializable {
   /**
    * The lock provider.
    */
-  private volatile LockProvider lockProvider;
-  /**
-   * The maximum times to retry in case there are failures.
-   */
-  private final int maxRetries;
-  /**
-   * The maximum time to wait for each time generation to resolve the clock skew issue on distributed hosts.
-   */
-  private final long maxWaitTimeInMs;
+  private volatile LockProvider<?> lockProvider;
   /**
    * The maximum time to block for acquiring a lock.
    */
@@ -79,24 +71,26 @@ public TimeGeneratorBase(HoodieTimeGeneratorConfig config, SerializableConfigura
     this.lockConfiguration = config.getLockConfiguration();
     this.hadoopConf = hadoopConf;
 
-    maxRetries = lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY,
+    // The maximum times to retry in case there are failures.
+    int maxRetries = lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY,
         Integer.parseInt(DEFAULT_LOCK_ACQUIRE_NUM_RETRIES));
     lockAcquireWaitTimeInMs = lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,
         DEFAULT_LOCK_ACQUIRE_WAIT_TIMEOUT_MS);
-    maxWaitTimeInMs = lockConfiguration.getConfig().getLong(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY,
+    // The maximum time to wait for each time generation to resolve the clock skew issue on distributed hosts.
+    long maxWaitTimeInMs = lockConfiguration.getConfig().getLong(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY,
         Long.parseLong(DEFAULT_LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS));
     lockRetryHelper = new RetryHelper<>(maxWaitTimeInMs, maxRetries, maxWaitTimeInMs,
         Arrays.asList(HoodieLockException.class, InterruptedException.class), "acquire timeGenerator lock");
   }
 
-  protected LockProvider getLockProvider() {
+  protected LockProvider<?> getLockProvider() {
     // Perform lazy initialization of lock provider only if needed
     if (lockProvider == null) {
       synchronized (this) {
         if (lockProvider == null) {
           String lockProviderClass = lockConfiguration.getConfig().getString("hoodie.write.lock.provider");
           LOG.info("LockProvider for TimeGenerator: " + lockProviderClass);
-          lockProvider = (LockProvider) ReflectionUtils.loadClass(lockProviderClass,
+          lockProvider = (LockProvider<?>) ReflectionUtils.loadClass(lockProviderClass,
               lockConfiguration, hadoopConf.get());
         }
       }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGenerators.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGenerators.java
index 3f394a47a6d9..4ae2f1054a03 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGenerators.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimeGenerators.java
@@ -29,7 +29,7 @@
 import static org.apache.hudi.common.config.HoodieCommonConfig.BASE_PATH;
 
 /**
- * Holds all different {@link TimeGenerator} implementations, use {@link HoodieCommonConfig.BASE_PATH}
+ * Holds all different {@link TimeGenerator} implementations, use {@link org.apache.hudi.common.config.HoodieCommonConfig#BASE_PATH}
  * to cache the existing instances.
  */
 public class TimeGenerators {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineDiffHelper.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineDiffHelper.java
index aa7e2a30754d..a98b71aa5711 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineDiffHelper.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineDiffHelper.java
@@ -37,8 +37,11 @@ public class TimelineDiffHelper {
 
   private static final Logger LOG = LoggerFactory.getLogger(TimelineDiffHelper.class);
 
+  private TimelineDiffHelper() {
+  }
+
   public static TimelineDiffResult getNewInstantsForIncrementalSync(HoodieTimeline oldTimeline,
-      HoodieTimeline newTimeline) {
+                                                                    HoodieTimeline newTimeline) {
 
     HoodieTimeline oldT = oldTimeline.filterCompletedAndCompactionInstants();
     HoodieTimeline newT = newTimeline.filterCompletedAndCompactionInstants();
@@ -57,14 +60,14 @@ public static TimelineDiffResult getNewInstantsForIncrementalSync(HoodieTimeline
       List<HoodieInstant> newInstants = new ArrayList<>();
 
       // Check If any pending compaction is lost. If so, do not allow incremental timeline sync
-      List<Pair<HoodieInstant, HoodieInstant>> compactionInstants = getPendingCompactionTransitions(oldT, newT);
+      List<Pair<HoodieInstant, HoodieInstant>> compactionInstants = getPendingActionTransitions(oldT.filterPendingCompactionTimeline(),
+          newT, HoodieTimeline.COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION);
       List<HoodieInstant> lostPendingCompactions = compactionInstants.stream()
           .filter(instantPair -> instantPair.getValue() == null).map(Pair::getKey).collect(Collectors.toList());
       if (!lostPendingCompactions.isEmpty()) {
         // If a compaction is unscheduled, fall back to complete refresh of fs view since some log files could have been
         // moved. Its unsafe to incrementally sync in that case.
-        LOG.warn("Some pending compactions are no longer in new timeline (unscheduled ?). They are :"
-            + lostPendingCompactions);
+        LOG.warn("Some pending compactions are no longer in new timeline (unscheduled ?). They are: {}", lostPendingCompactions);
         return TimelineDiffResult.UNSAFE_SYNC_RESULT;
       }
       List<HoodieInstant> finishedCompactionInstants = compactionInstants.stream()
@@ -74,7 +77,8 @@ public static TimelineDiffResult getNewInstantsForIncrementalSync(HoodieTimeline
 
       newTimeline.getInstantsAsStream().filter(instant -> !oldTimelineInstants.contains(instant)).forEach(newInstants::add);
 
-      List<Pair<HoodieInstant, HoodieInstant>> logCompactionInstants = getPendingLogCompactionTransitions(oldTimeline, newTimeline);
+      List<Pair<HoodieInstant, HoodieInstant>> logCompactionInstants = getPendingActionTransitions(oldTimeline.filterPendingLogCompactionTimeline(),
+          newTimeline, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.LOG_COMPACTION_ACTION);
       List<HoodieInstant> finishedOrRemovedLogCompactionInstants = logCompactionInstants.stream()
           .filter(instantPair -> !instantPair.getKey().isCompleted()
               && (instantPair.getValue() == null || instantPair.getValue().isCompleted()))
@@ -87,52 +91,24 @@ public static TimelineDiffResult getNewInstantsForIncrementalSync(HoodieTimeline
     }
   }
 
-  /**
-   * Getting pending log compaction transitions.
-   */
-  private static List<Pair<HoodieInstant, HoodieInstant>> getPendingLogCompactionTransitions(HoodieTimeline oldTimeline,
-                                                                                          HoodieTimeline newTimeline) {
-    Set<HoodieInstant> newTimelineInstants = newTimeline.getInstantsAsStream().collect(Collectors.toSet());
-
-    return oldTimeline.filterPendingLogCompactionTimeline().getInstantsAsStream().map(instant -> {
-      if (newTimelineInstants.contains(instant)) {
-        return Pair.of(instant, instant);
-      } else {
-        HoodieInstant logCompacted =
-            new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instant.getTimestamp());
-        if (newTimelineInstants.contains(logCompacted)) {
-          return Pair.of(instant, logCompacted);
-        }
-        HoodieInstant inflightLogCompacted =
-            new HoodieInstant(State.INFLIGHT, HoodieTimeline.LOG_COMPACTION_ACTION, instant.getTimestamp());
-        if (newTimelineInstants.contains(inflightLogCompacted)) {
-          return Pair.of(instant, inflightLogCompacted);
-        }
-        return Pair.<HoodieInstant, HoodieInstant>of(instant, null);
-      }
-    }).collect(Collectors.toList());
-  }
-
-  /**
-   * Getting pending compaction transitions.
-   */
-  private static List<Pair<HoodieInstant, HoodieInstant>> getPendingCompactionTransitions(HoodieTimeline oldTimeline,
-      HoodieTimeline newTimeline) {
+  private static List<Pair<HoodieInstant, HoodieInstant>> getPendingActionTransitions(HoodieTimeline pendingActionTimelineFromOld,
+                                                                                             HoodieTimeline newTimeline,
+                                                                                             String completedAction, String pendingAction) {
     Set<HoodieInstant> newTimelineInstants = newTimeline.getInstantsAsStream().collect(Collectors.toSet());
 
-    return oldTimeline.filterPendingCompactionTimeline().getInstantsAsStream().map(instant -> {
+    return pendingActionTimelineFromOld.getInstantsAsStream().map(instant -> {
       if (newTimelineInstants.contains(instant)) {
         return Pair.of(instant, instant);
       } else {
-        HoodieInstant compacted =
-            new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, instant.getTimestamp());
-        if (newTimelineInstants.contains(compacted)) {
-          return Pair.of(instant, compacted);
+        HoodieInstant completedInstant =
+            new HoodieInstant(State.COMPLETED, completedAction, instant.getTimestamp());
+        if (newTimelineInstants.contains(completedInstant)) {
+          return Pair.of(instant, completedInstant);
         }
-        HoodieInstant inflightCompacted =
-            new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, instant.getTimestamp());
-        if (newTimelineInstants.contains(inflightCompacted)) {
-          return Pair.of(instant, inflightCompacted);
+        HoodieInstant inflightInstant =
+            new HoodieInstant(State.INFLIGHT, pendingAction, instant.getTimestamp());
+        if (newTimelineInstants.contains(inflightInstant)) {
+          return Pair.of(instant, inflightInstant);
         }
         return Pair.<HoodieInstant, HoodieInstant>of(instant, null);
       }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineUtils.java
index 5e710800d6f4..dbe8f83fdbe1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineUtils.java
@@ -81,13 +81,15 @@ public static List<String> getWrittenPartitions(HoodieTimeline timeline) {
   }
 
   /**
-   * Returns partitions that have been deleted or marked for deletion in the given timeline.
+   * Returns partitions that have been deleted or marked for deletion in the timeline between given commit time range.
    * Does not include internal operations such as clean in the timeline.
    */
-  public static List<String> getDroppedPartitions(HoodieTimeline timeline) {
+  public static List<String> getDroppedPartitions(HoodieTableMetaClient metaClient, Option<String> lastCommitTimeSynced, Option<String> lastCommitCompletionTimeSynced) {
+    HoodieTimeline timeline = lastCommitTimeSynced.isPresent()
+        ? TimelineUtils.getCommitsTimelineAfter(metaClient, lastCommitTimeSynced.get(), lastCommitCompletionTimeSynced)
+        : metaClient.getActiveTimeline();
     HoodieTimeline completedTimeline = timeline.getWriteTimeline().filterCompletedInstants();
     HoodieTimeline replaceCommitTimeline = completedTimeline.getCompletedReplaceTimeline();
-
     Map<String, String> partitionToLatestDeleteTimestamp = replaceCommitTimeline.getInstantsAsStream()
         .map(instant -> {
           try {
@@ -102,6 +104,21 @@ public static List<String> getDroppedPartitions(HoodieTimeline timeline) {
         .flatMap(pair -> pair.getRight().getPartitionToReplaceFileIds().keySet().stream()
             .map(partition -> new AbstractMap.SimpleEntry<>(partition, pair.getLeft().getTimestamp()))
         ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (existing, replace) -> replace));
+    // cleaner could delete a partition when there are no active filegroups in the partition
+    HoodieTimeline cleanerTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
+    cleanerTimeline.getInstantsAsStream()
+        .forEach(instant -> {
+          try {
+            HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(cleanerTimeline.getInstantDetails(instant).get());
+            cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
+              if (partitionMetadata.getIsPartitionDeleted()) {
+                partitionToLatestDeleteTimestamp.put(partition, instant.getTimestamp());
+              }
+            });
+          } catch (IOException e) {
+            throw new HoodieIOException("Failed to get partitions cleaned at " + instant, e);
+          }
+        });
 
     if (partitionToLatestDeleteTimestamp.isEmpty()) {
       // There is no dropped partitions
@@ -244,7 +261,7 @@ public static boolean isClusteringCommit(HoodieTableMetaClient metaClient, Hoodi
 
       return false;
     } catch (IOException e) {
-      throw new HoodieIOException("Unable to read instant information: " + instant + " for " + metaClient.getBasePath(), e);
+      throw new HoodieIOException("Unable to read instant information: " + instant + " for " + metaClient.getBasePathV2().toString(), e);
     }
   }
 
@@ -440,7 +457,7 @@ public static HoodieTimeline handleHollowCommitIfNeeded(HoodieTimeline completed
   }
 
   public enum HollowCommitHandling {
-    FAIL, BLOCK, USE_TRANSITION_TIME;
+    FAIL, BLOCK, USE_TRANSITION_TIME
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
index 76750171fb55..cdac0eeeb200 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
@@ -801,11 +801,20 @@ public final Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commi
   }
 
   @Override
-  public Void loadAllPartitions() {
+  public void loadAllPartitions() {
     try {
       readLock.lock();
       ensureAllPartitionsLoadedCorrectly();
-      return null;
+    } finally {
+      readLock.unlock();
+    }
+  }
+
+  @Override
+  public void loadPartitions(List<String> partitionPaths) {
+    try {
+      readLock.lock();
+      ensurePartitionsLoadedCorrectly(partitionPaths);
     } finally {
       readLock.unlock();
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
index d5697e83eeba..172b5e41af77 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
@@ -31,7 +31,6 @@
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.metadata.HoodieMetadataFileSystemView;
 import org.apache.hudi.metadata.HoodieTableMetadata;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -66,17 +65,19 @@ public class FileSystemViewManager {
   private final SerializableConfiguration conf;
   // The View Storage config used to store file-system views
   private final FileSystemViewStorageConfig viewStorageConfig;
-  // Map from Base-Path to View
-  private final ConcurrentHashMap<String, SyncableFileSystemView> globalViewMap;
   // Factory Map to create file-system views
   private final Function2<HoodieTableMetaClient, FileSystemViewStorageConfig, SyncableFileSystemView> viewCreator;
+  // Map from Base-Path to View
+  private final ConcurrentHashMap<String, SyncableFileSystemView> globalViewMap;
 
-  private FileSystemViewManager(HoodieEngineContext context, FileSystemViewStorageConfig viewStorageConfig,
+  private FileSystemViewManager(
+      HoodieEngineContext context,
+      FileSystemViewStorageConfig viewStorageConfig,
       Function2<HoodieTableMetaClient, FileSystemViewStorageConfig, SyncableFileSystemView> viewCreator) {
     this.conf = context.getHadoopConf();
     this.viewStorageConfig = viewStorageConfig;
-    this.globalViewMap = new ConcurrentHashMap<>();
     this.viewCreator = viewCreator;
+    this.globalViewMap = new ConcurrentHashMap<>();
   }
 
   /**
@@ -95,7 +96,7 @@ public void clearFileSystemView(String basePath) {
    * Main API to get the file-system view for the base-path.
    *
    * @param basePath Hoodie table base path
-   * @return
+   * @return {@link SyncableFileSystemView}
    */
   public SyncableFileSystemView getFileSystemView(String basePath) {
     return globalViewMap.computeIfAbsent(basePath, (path) -> {
@@ -108,10 +109,10 @@ public SyncableFileSystemView getFileSystemView(String basePath) {
    * Main API to get the file-system view for the base-path.
    *
    * @param metaClient HoodieTableMetaClient
-   * @return
+   * @return {@link SyncableFileSystemView}
    */
   public SyncableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) {
-    return globalViewMap.computeIfAbsent(metaClient.getBasePath(),
+    return globalViewMap.computeIfAbsent(metaClient.getBasePathV2().toString(),
         (path) -> viewCreator.apply(metaClient, viewStorageConfig));
   }
 
@@ -130,12 +131,12 @@ public void close() {
   /**
    * Create RocksDB based file System view for a table.
    *
-   * @param viewConf View Storage Configuration
+   * @param viewConf   View Storage Configuration
    * @param metaClient HoodieTableMetaClient
-   * @return
+   * @return {@link RocksDbBasedFileSystemView}
    */
   private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(FileSystemViewStorageConfig viewConf,
-      HoodieTableMetaClient metaClient) {
+                                                                             HoodieTableMetaClient metaClient) {
     HoodieTimeline timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
     return new RocksDbBasedFileSystemView(metaClient, timeline, viewConf);
   }
@@ -143,24 +144,25 @@ private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(FileS
   /**
    * Create a spillable Map based file System view for a table.
    *
-   * @param viewConf View Storage Configuration
+   * @param viewConf   View Storage Configuration
    * @param metaClient HoodieTableMetaClient
-   * @return
+   * @return {@link SpillableMapBasedFileSystemView}
    */
-  private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(FileSystemViewStorageConfig viewConf,
-      HoodieTableMetaClient metaClient, HoodieCommonConfig commonConfig) {
-    LOG.info("Creating SpillableMap based view for basePath " + metaClient.getBasePath());
+  private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(
+      FileSystemViewStorageConfig viewConf, HoodieTableMetaClient metaClient, HoodieCommonConfig commonConfig) {
+    LOG.info("Creating SpillableMap based view for basePath {}.", metaClient.getBasePathV2());
     HoodieTimeline timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
     return new SpillableMapBasedFileSystemView(metaClient, timeline, viewConf, commonConfig);
   }
 
   /**
    * Create an in-memory file System view for a table.
-   *
    */
-  private static HoodieTableFileSystemView createInMemoryFileSystemView(HoodieMetadataConfig metadataConfig, FileSystemViewStorageConfig viewConf,
-                                                                        HoodieTableMetaClient metaClient, SerializableFunctionUnchecked<HoodieTableMetaClient, HoodieTableMetadata> metadataCreator) {
-    LOG.info("Creating InMemory based view for basePath " + metaClient.getBasePathV2());
+  private static HoodieTableFileSystemView createInMemoryFileSystemView(
+      FileSystemViewStorageConfig viewConf,
+      HoodieTableMetaClient metaClient,
+      SerializableFunctionUnchecked<HoodieTableMetaClient, HoodieTableMetadata> metadataCreator) {
+    LOG.info("Creating InMemory based view for basePath {}.", metaClient.getBasePathV2());
     HoodieTimeline timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
     if (metaClient.getTableConfig().isMetadataTableAvailable()) {
       ValidationUtils.checkArgument(metadataCreator != null, "Metadata supplier is null. Cannot instantiate metadata file system view");
@@ -168,31 +170,30 @@ private static HoodieTableFileSystemView createInMemoryFileSystemView(HoodieMeta
     }
     if (metaClient.getMetaserverConfig().isMetaserverEnabled()) {
       return (HoodieTableFileSystemView) ReflectionUtils.loadClass(HOODIE_METASERVER_FILE_SYSTEM_VIEW_CLASS,
-          new Class<?>[] {HoodieTableMetaClient.class, HoodieTimeline.class, HoodieMetaserverConfig.class},
+          new Class<?>[]{HoodieTableMetaClient.class, HoodieTimeline.class, HoodieMetaserverConfig.class},
           metaClient, timeline, metaClient.getMetaserverConfig());
     }
     return new HoodieTableFileSystemView(metaClient, timeline, viewConf.isIncrementalTimelineSyncEnabled());
   }
 
-  public static HoodieTableFileSystemView createInMemoryFileSystemView(HoodieEngineContext engineContext, HoodieTableMetaClient metaClient,
-                                                                       HoodieMetadataConfig metadataConfig) {
-
+  public static HoodieTableFileSystemView createInMemoryFileSystemView(
+      HoodieEngineContext engineContext, HoodieTableMetaClient metaClient, HoodieMetadataConfig metadataConfig) {
     return createInMemoryFileSystemViewWithTimeline(engineContext, metaClient, metadataConfig,
         metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants());
-
   }
 
-  public static HoodieTableFileSystemView createInMemoryFileSystemViewWithTimeline(HoodieEngineContext engineContext,
-                                                                                   HoodieTableMetaClient metaClient,
-                                                                                   HoodieMetadataConfig metadataConfig,
-                                                                                   HoodieTimeline timeline) {
-    LOG.info("Creating InMemory based view for basePath " + metaClient.getBasePath());
+  public static HoodieTableFileSystemView createInMemoryFileSystemViewWithTimeline(
+      HoodieEngineContext engineContext,
+      HoodieTableMetaClient metaClient,
+      HoodieMetadataConfig metadataConfig,
+      HoodieTimeline timeline) {
+    LOG.info("Creating InMemory based view for basePath {}.", metaClient.getBasePathV2());
     if (metaClient.getTableConfig().isMetadataTableAvailable()) {
       return new HoodieMetadataFileSystemView(engineContext, metaClient, timeline, metadataConfig);
     }
     if (metaClient.getMetaserverConfig().isMetaserverEnabled()) {
       return (HoodieTableFileSystemView) ReflectionUtils.loadClass(HOODIE_METASERVER_FILE_SYSTEM_VIEW_CLASS,
-          new Class<?>[] {HoodieTableMetaClient.class, HoodieTimeline.class, HoodieMetadataConfig.class},
+          new Class<?>[]{HoodieTableMetaClient.class, HoodieTimeline.class, HoodieMetadataConfig.class},
           metaClient, timeline, metaClient.getMetaserverConfig());
     }
     return new HoodieTableFileSystemView(metaClient, timeline);
@@ -201,43 +202,40 @@ public static HoodieTableFileSystemView createInMemoryFileSystemViewWithTimeline
   /**
    * Create a remote file System view for a table.
    *
-   * @param viewConf View Storage Configuration
+   * @param viewConf   View Storage Configuration
    * @param metaClient Hoodie Table MetaClient for the table.
-   * @return
+   * @return {@link RemoteHoodieTableFileSystemView}
    */
   private static RemoteHoodieTableFileSystemView createRemoteFileSystemView(FileSystemViewStorageConfig viewConf,
-      HoodieTableMetaClient metaClient) {
-    LOG.info("Creating remote view for basePath " + metaClient.getBasePath() + ". Server="
-        + viewConf.getRemoteViewServerHost() + ":" + viewConf.getRemoteViewServerPort() + ", Timeout="
-        + viewConf.getRemoteTimelineClientTimeoutSecs());
+                                                                            HoodieTableMetaClient metaClient) {
+    LOG.info("Creating remote view for basePath {}. Server={}:{}, Timeout={}", metaClient.getBasePathV2(),
+        viewConf.getRemoteViewServerHost(), viewConf.getRemoteViewServerPort(), viewConf.getRemoteTimelineClientTimeoutSecs());
     return new RemoteHoodieTableFileSystemView(metaClient, viewConf);
   }
 
+  public static FileSystemViewManager createViewManagerWithTableMetadata(
+      final HoodieEngineContext context,
+      final HoodieMetadataConfig metadataConfig,
+      final FileSystemViewStorageConfig config,
+      final HoodieCommonConfig commonConfig) {
+    return createViewManager(context, config, commonConfig,
+        metaClient -> HoodieTableMetadata.create(context, metadataConfig, metaClient.getBasePathV2().toString(), true));
+  }
+
   public static FileSystemViewManager createViewManager(final HoodieEngineContext context,
-                                                        final HoodieMetadataConfig metadataConfig,
                                                         final FileSystemViewStorageConfig config,
                                                         final HoodieCommonConfig commonConfig) {
-    return createViewManager(context, metadataConfig, config, commonConfig, null);
-  }
-
-  public static FileSystemViewManager createViewManagerWithTableMetadata(final HoodieEngineContext context,
-                                                                         final HoodieMetadataConfig metadataConfig,
-                                                                         final FileSystemViewStorageConfig config,
-                                                                         final HoodieCommonConfig commonConfig) {
-    return createViewManager(context, metadataConfig, config, commonConfig,
-        metaClient -> HoodieTableMetadata.create(context, metadataConfig, metaClient.getBasePathV2().toString(), true));
+    return createViewManager(context, config, commonConfig, null);
   }
 
   /**
    * Main Factory method for building file-system views.
-   *
    */
   public static FileSystemViewManager createViewManager(final HoodieEngineContext context,
-                                                        final HoodieMetadataConfig metadataConfig,
                                                         final FileSystemViewStorageConfig config,
                                                         final HoodieCommonConfig commonConfig,
                                                         final SerializableFunctionUnchecked<HoodieTableMetaClient, HoodieTableMetadata> metadataCreator) {
-    LOG.info("Creating View Manager with storage type :" + config.getStorageType());
+    LOG.info("Creating View Manager with storage type {}.", config.getStorageType());
     switch (config.getStorageType()) {
       case EMBEDDED_KV_STORE:
         LOG.info("Creating embedded rocks-db based Table View");
@@ -250,7 +248,7 @@ public static FileSystemViewManager createViewManager(final HoodieEngineContext
       case MEMORY:
         LOG.info("Creating in-memory based Table View");
         return new FileSystemViewManager(context, config,
-            (metaClient, viewConfig) -> createInMemoryFileSystemView(metadataConfig, viewConfig, metaClient, metadataCreator));
+            (metaClient, viewConfig) -> createInMemoryFileSystemView(viewConfig, metaClient, metadataCreator));
       case REMOTE_ONLY:
         LOG.info("Creating remote only table view");
         return new FileSystemViewManager(context, config, (metaClient, viewConfig) -> createRemoteFileSystemView(viewConfig,
@@ -263,7 +261,7 @@ public static FileSystemViewManager createViewManager(final HoodieEngineContext
           SyncableFileSystemView secondaryView;
           switch (viewConfig.getSecondaryStorageType()) {
             case MEMORY:
-              secondaryView = createInMemoryFileSystemView(metadataConfig, viewConfig, metaClient, metadataCreator);
+              secondaryView = createInMemoryFileSystemView(viewConfig, metaClient, metadataCreator);
               break;
             case EMBEDDED_KV_STORE:
               secondaryView = createRocksDBBasedFileSystemView(viewConfig, metaClient);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/PriorityBasedFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/PriorityBasedFileSystemView.java
index 56d7c7cc25cf..1e4b1852d1b2 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/PriorityBasedFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/PriorityBasedFileSystemView.java
@@ -168,8 +168,29 @@ public Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToRe
   }
 
   @Override
-  public Void loadAllPartitions() {
-    return execute(preferredView::loadAllPartitions, secondaryView::loadAllPartitions);
+  public void loadAllPartitions() {
+    execute(
+        () -> {
+          preferredView.loadAllPartitions();
+          return null;
+        },
+        () -> {
+          secondaryView.loadAllPartitions();
+          return null;
+        });
+  }
+
+  @Override
+  public void loadPartitions(List<String> partitionPaths) {
+    execute(
+        () -> {
+          preferredView.loadPartitions(partitionPaths);
+          return null;
+        },
+        () -> {
+          secondaryView.loadPartitions(partitionPaths);
+          return null;
+        });
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java
index 4363a7daf271..61c90c6eb020 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java
@@ -127,8 +127,10 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
   // POST Requests
   public static final String REFRESH_TABLE = String.format("%s/%s", BASE_URL, "refresh/");
   public static final String LOAD_ALL_PARTITIONS_URL = String.format("%s/%s", BASE_URL, "loadallpartitions/");
+  public static final String LOAD_PARTITIONS_URL = String.format("%s/%s", BASE_URL, "loadpartitions/");
 
   public static final String PARTITION_PARAM = "partition";
+  public static final String PARTITIONS_PARAM = "partitions";
   public static final String BASEPATH_PARAM = "basepath";
   public static final String INSTANT_PARAM = "instant";
   public static final String MAX_INSTANT_PARAM = "maxinstant";
@@ -526,11 +528,21 @@ public boolean refresh() {
   }
 
   @Override
-  public Void loadAllPartitions() {
+  public void loadAllPartitions() {
     Map<String, String> paramsMap = getParams();
     try {
       executeRequest(LOAD_ALL_PARTITIONS_URL, paramsMap, BOOLEAN_TYPE_REFERENCE, RequestMethod.POST);
-      return null;
+    } catch (IOException e) {
+      throw new HoodieRemoteException(e);
+    }
+  }
+
+  @Override
+  public void loadPartitions(List<String> partitionPaths) {
+    try {
+      Map<String, String> paramsMap = getParams();
+      paramsMap.put(PARTITIONS_PARAM, OBJECT_MAPPER.writeValueAsString(partitionPaths));
+      executeRequest(LOAD_PARTITIONS_URL, paramsMap, BOOLEAN_TYPE_REFERENCE, RequestMethod.POST);
     } catch (IOException e) {
       throw new HoodieRemoteException(e);
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/TableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/TableFileSystemView.java
index 1bcd1de61bc5..87b3db142e67 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/TableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/TableFileSystemView.java
@@ -246,5 +246,11 @@ interface SliceView extends SliceViewWithLatestSlice {
   /**
    * Load all partition and file slices into view
    */
-  Void loadAllPartitions();
+  void loadAllPartitions();
+
+  /**
+   * Load all partition and file slices into view for the provided partition paths
+   * @param partitionPaths List of partition paths to load
+   */
+  void loadPartitions(List<String> partitionPaths);
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
index e9ae9cfbdfb6..64eb27453b3b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
@@ -75,15 +75,22 @@ public static Stream<Pair<HoodieInstant, HoodieClusteringPlan>> getAllPendingClu
   }
 
   /**
-   * Checks if the replacecommit is clustering commit.
+   * Checks if the requested, inflight, or completed instant of replacecommit action
+   * is a clustering operation, by checking whether the requested instant contains
+   * a clustering plan.
+   *
+   * @param timeline       Hudi timeline.
+   * @param replaceInstant the instant of replacecommit action to check.
+   * @return whether the instant is a clustering operation.
    */
-  public static boolean isClusteringCommit(HoodieTableMetaClient metaClient, HoodieInstant pendingReplaceInstant) {
-    return getClusteringPlan(metaClient, pendingReplaceInstant).isPresent();
+  public static boolean isClusteringInstant(HoodieTimeline timeline, HoodieInstant replaceInstant) {
+    return getClusteringPlan(timeline, replaceInstant).isPresent();
   }
 
   /**
    * Get requested replace metadata from timeline.
-   * @param timeline used to get the bytes stored in the requested replace instant in the timeline
+   *
+   * @param timeline              used to get the bytes stored in the requested replace instant in the timeline
    * @param pendingReplaceInstant can be in any state, because it will always be converted to requested state
    * @return option of the replace metadata if present, else empty
    * @throws IOException
@@ -238,16 +245,8 @@ private static Map<String, Double> buildMetrics(List<FileSlice> fileSlices) {
 
   public static List<HoodieInstant> getPendingClusteringInstantTimes(HoodieTableMetaClient metaClient) {
     return metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstantsAsStream()
-            .filter(instant -> isPendingClusteringInstant(metaClient, instant))
-            .collect(Collectors.toList());
-  }
-
-  public static boolean isPendingClusteringInstant(HoodieTableMetaClient metaClient, HoodieInstant instant) {
-    return getClusteringPlan(metaClient, instant).isPresent();
-  }
-
-  public static boolean isPendingClusteringInstant(HoodieTimeline timeline, HoodieInstant instant) {
-    return getClusteringPlan(timeline, instant).isPresent();
+        .filter(instant -> isClusteringInstant(metaClient.getActiveTimeline(), instant))
+        .collect(Collectors.toList());
   }
 
   /**
@@ -301,9 +300,11 @@ public static Option<HoodieInstant> getEarliestInstantToRetainForClustering(
   }
 
   /**
-   * Returns whether the given instant {@code instant} is with clustering operation.
+   * @param instant  Hudi instant to check.
+   * @param timeline Hudi timeline.
+   * @return whether the given {@code instant} is a completed clustering operation.
    */
-  public static boolean isClusteringInstant(HoodieInstant instant, HoodieTimeline timeline) {
+  public static boolean isCompletedClusteringInstant(HoodieInstant instant, HoodieTimeline timeline) {
     if (!instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
       return false;
     }
@@ -313,4 +314,19 @@ public static boolean isClusteringInstant(HoodieInstant instant, HoodieTimeline
       throw new HoodieException("Resolve replace commit metadata error for instant: " + instant, e);
     }
   }
+
+  /**
+   * Returns whether the given instant {@code instant} is with insert overwrite operation.
+   */
+  public static boolean isInsertOverwriteInstant(HoodieInstant instant, HoodieTimeline timeline) {
+    if (!instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
+      return false;
+    }
+    try {
+      WriteOperationType opType = TimelineUtils.getCommitMetadata(instant, timeline).getOperationType();
+      return opType.equals(WriteOperationType.INSERT_OVERWRITE) || opType.equals(WriteOperationType.INSERT_OVERWRITE_TABLE);
+    } catch (IOException e) {
+      throw new HoodieException("Resolve replace commit metadata error for instant: " + instant, e);
+    }
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java
index 5df8f97d4b96..44b7a8020357 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java
@@ -284,7 +284,10 @@ public Collection<R> values() {
     }
     List<R> result = new ArrayList<>(inMemoryMap.size() + diskBasedMap.size());
     result.addAll(inMemoryMap.values());
-    result.addAll(diskBasedMap.values());
+    Iterator<R> iterator = diskBasedMap.iterator();
+    while (iterator.hasNext()) {
+      result.add(iterator.next());
+    }
     return result;
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java
index 20b9c802f605..f2843c56b031 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java
@@ -215,7 +215,7 @@ public E execute() {
         // to be interrupted as well
         Thread.currentThread().interrupt();
       }
-      // throw if we have any other exception seen already. There is a chance that cancellation/closing of producers with CompeletableFuture wins before the actual exception
+      // throw if we have any other exception seen already. There is a chance that cancellation/closing of producers with CompletableFuture wins before the actual exception
       // is thrown.
       if (this.queue.getThrowable() != null) {
         throw new HoodieException(queue.getThrowable());
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieAvroSchemaException.java b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieAvroSchemaException.java
new file mode 100644
index 000000000000..c19c88c15c8b
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieAvroSchemaException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.exception;
+
+/**
+ * Thrown when we detect in Hudi code that a record schema
+ * violates Avro rules. This can happen even when using Spark
+ * because we use Avro schema internally
+ */
+public class HoodieAvroSchemaException extends SchemaCompatibilityException {
+  public HoodieAvroSchemaException(String message) {
+    super(message);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieNullSchemaTypeException.java b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieNullSchemaTypeException.java
new file mode 100644
index 000000000000..ff4abadcde9e
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieNullSchemaTypeException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.exception;
+
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+
+/**
+ * Thrown if a schema is null or empty. Or if a field has type null
+ * (null is ok if it is in a union with 1 (one) other type)
+ */
+public class HoodieNullSchemaTypeException extends HoodieSchemaException {
+  public HoodieNullSchemaTypeException(String message) {
+    super(message);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieRecordCreationException.java b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieRecordCreationException.java
new file mode 100644
index 000000000000..dec70b369dae
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieRecordCreationException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.exception;
+
+/**
+ * Exception thrown during HoodieRecord construction for any failure
+ * that is not a KeyGeneration failure. An example of a failure would be if the
+ * record is malformed.
+ */
+public class HoodieRecordCreationException extends HoodieException {
+
+  public HoodieRecordCreationException(String message, Throwable t) {
+    super(message, t);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/InvalidUnionTypeException.java b/hudi-common/src/main/java/org/apache/hudi/exception/InvalidUnionTypeException.java
new file mode 100644
index 000000000000..370ad9438cc4
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/InvalidUnionTypeException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.exception;
+
+/**
+ * Thrown when a field is a union and at least one of the following is true:
+ * <ul>
+ *   <li>the incoming union and the latest table union have differing numbers of types</li>
+ *   <li>the incoming union has more than two types</li>
+ * </ul>
+ */
+public class InvalidUnionTypeException extends SchemaCompatibilityException {
+  public InvalidUnionTypeException(String message) {
+    super(message);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/MissingSchemaFieldException.java b/hudi-common/src/main/java/org/apache/hudi/exception/MissingSchemaFieldException.java
new file mode 100644
index 000000000000..4727ff814f10
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/MissingSchemaFieldException.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.exception;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+
+import org.apache.avro.Schema;
+
+import java.util.List;
+
+/**
+ * Thrown when the schema of the incoming data is missing fields that are in the table schema.
+ */
+public class MissingSchemaFieldException extends SchemaCompatibilityException {
+
+  public MissingSchemaFieldException(List<String> missingFields, Schema writerSchema, Schema tableSchema) {
+    super(constructExceptionMessage(missingFields, writerSchema, tableSchema));
+  }
+
+  private static String constructExceptionMessage(List<String> missingFields, Schema writerSchema, Schema tableSchema) {
+    return AvroSchemaUtils.createSchemaErrorString(
+        "Schema validation failed due to missing field. Fields missing from incoming schema: {"
+        + String.join(", ", missingFields) + "}", writerSchema, tableSchema);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/SchemaBackwardsCompatibilityException.java b/hudi-common/src/main/java/org/apache/hudi/exception/SchemaBackwardsCompatibilityException.java
new file mode 100644
index 000000000000..c38d13c9e292
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/SchemaBackwardsCompatibilityException.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.exception;
+
+import org.apache.hudi.avro.AvroSchemaCompatibility;
+import org.apache.hudi.avro.AvroSchemaUtils;
+
+import org.apache.avro.Schema;
+
+import java.util.stream.Collectors;
+
+/**
+ * Thrown when there is a backwards compatibility issue with the incoming schema.
+ * i.e. when the incoming schema cannot be used to read older data files
+ */
+public class SchemaBackwardsCompatibilityException extends SchemaCompatibilityException {
+
+  public SchemaBackwardsCompatibilityException(AvroSchemaCompatibility.SchemaPairCompatibility compatibility, Schema writerSchema, Schema tableSchema) {
+    super(constructExceptionMessage(compatibility, writerSchema, tableSchema));
+  }
+
+  private static String constructExceptionMessage(AvroSchemaCompatibility.SchemaPairCompatibility compatibility, Schema writerSchema, Schema tableSchema) {
+    return AvroSchemaUtils.createSchemaErrorString("Schema validation backwards compatibility check failed with the following issues: {"
+        + compatibility.getResult().getIncompatibilities().stream()
+            .map(incompatibility -> incompatibility.getType().name() + ": " + incompatibility.getMessage())
+            .collect(Collectors.joining(", ")) + "}", writerSchema, tableSchema);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/SchemaCompatibilityException.java b/hudi-common/src/main/java/org/apache/hudi/exception/SchemaCompatibilityException.java
index 478ec0d42697..92d2f6744c14 100644
--- a/hudi-common/src/main/java/org/apache/hudi/exception/SchemaCompatibilityException.java
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/SchemaCompatibilityException.java
@@ -18,10 +18,12 @@
 
 package org.apache.hudi.exception;
 
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+
 /**
  * An exception thrown when schema has compatibility problems.
  */
-public class SchemaCompatibilityException extends HoodieException {
+public class SchemaCompatibilityException extends HoodieSchemaException {
 
   public SchemaCompatibilityException(String message) {
     super(message);
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java
index 69977563e85f..0f8d1606f2ad 100644
--- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.internal.schema.convert;
 
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieNullSchemaTypeException;
 import org.apache.hudi.internal.schema.HoodieSchemaException;
 import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.hudi.internal.schema.Type;
@@ -32,6 +33,7 @@
 import java.util.ArrayList;
 import java.util.Deque;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -79,7 +81,7 @@ public static Schema convert(InternalSchema internalSchema, String name) {
    * but for the metadata table HoodieMetadata.avsc uses a trick where we have a bunch of
    * different types wrapped in record for col stats.
    *
-   * @param Schema avro schema.
+   * @param schema avro schema.
    * @return an avro Schema where null is the first.
    */
   public static Schema fixNullOrdering(Schema schema) {
@@ -156,6 +158,29 @@ public static Type buildTypeFromAvroSchema(Schema schema) {
     return visitAvroSchemaToBuildType(schema, visited, true, nextId);
   }
 
+  private static void checkNullType(Type fieldType, String fieldName, Deque<String> visited) {
+    if (fieldType == null) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Field '");
+      Iterator<String> visitedIterator = visited.descendingIterator();
+      while (visitedIterator.hasNext()) {
+        sb.append(visitedIterator.next());
+        sb.append(".");
+      }
+      sb.append(fieldName);
+      sb.append("' has type null");
+      throw new HoodieNullSchemaTypeException(sb.toString());
+    } else if (fieldType.typeId() == Type.TypeID.ARRAY) {
+      visited.push(fieldName);
+      checkNullType(((Types.ArrayType) fieldType).elementType(), "element", visited);
+      visited.pop();
+    } else if (fieldType.typeId() == Type.TypeID.MAP) {
+      visited.push(fieldName);
+      checkNullType(((Types.MapType) fieldType).valueType(), "value", visited);
+      visited.pop();
+    }
+  }
+
   /**
    * Converts an avro schema into hudi type.
    *
@@ -182,7 +207,9 @@ private static Type visitAvroSchemaToBuildType(Schema schema, Deque<String> visi
         }
         nextId.set(nextAssignId + fields.size());
         fields.stream().forEach(field -> {
-          fieldTypes.add(visitAvroSchemaToBuildType(field.schema(), visited, false, nextId));
+          Type fieldType = visitAvroSchemaToBuildType(field.schema(), visited, false, nextId);
+          checkNullType(fieldType, field.name(), visited);
+          fieldTypes.add(fieldType);
         });
         visited.pop();
         List<Types.Field> internalFields = new ArrayList<>(fields.size());
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
index e714d99f0e0e..cf7e87f457b8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
@@ -25,11 +25,9 @@
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 
-import static org.apache.hudi.common.config.HoodieCommonConfig.MAKE_NEW_COLUMNS_NULLABLE;
 import static org.apache.hudi.common.util.CollectionUtils.reduce;
 import static org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter.convert;
 
@@ -136,10 +134,9 @@ public static Schema reconcileSchema(Schema incomingSchema, Schema oldTableSchem
    *
    * @param sourceSchema source schema that needs reconciliation
    * @param targetSchema target schema that source schema will be reconciled against
-   * @param opts         config options
    * @return schema (based off {@code source} one) that has nullability constraints and datatypes reconciled
    */
-  public static Schema reconcileSchemaRequirements(Schema sourceSchema, Schema targetSchema, Map<String, String> opts) {
+  public static Schema reconcileSchemaRequirements(Schema sourceSchema, Schema targetSchema) {
     if (targetSchema.getType() == Schema.Type.NULL || targetSchema.getFields().isEmpty()) {
       return sourceSchema;
     }
@@ -153,14 +150,12 @@ public static Schema reconcileSchemaRequirements(Schema sourceSchema, Schema tar
 
     List<String> colNamesSourceSchema = sourceInternalSchema.getAllColsFullName();
     List<String> colNamesTargetSchema = targetInternalSchema.getAllColsFullName();
-    boolean makeNewColsNullable = "true".equals(opts.get(MAKE_NEW_COLUMNS_NULLABLE.key()));
 
     List<String> nullableUpdateColsInSource = new ArrayList<>();
     List<String> typeUpdateColsInSource = new ArrayList<>();
     colNamesSourceSchema.forEach(field -> {
       // handle columns that needs to be made nullable
-      if ((makeNewColsNullable && !colNamesTargetSchema.contains(field))
-          || colNamesTargetSchema.contains(field) && sourceInternalSchema.findField(field).isOptional() != targetInternalSchema.findField(field).isOptional()) {
+      if (colNamesTargetSchema.contains(field) && sourceInternalSchema.findField(field).isOptional() != targetInternalSchema.findField(field).isOptional()) {
         nullableUpdateColsInSource.add(field);
       }
       // handle columns that needs type to be updated
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java
index 0a511d10b031..84aed905a4d1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.io.storage;
 
+import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.util.Option;
 
 import org.apache.avro.Schema;
@@ -29,15 +30,18 @@
 import java.io.IOException;
 
 public class HoodieAvroFileReaderFactory extends HoodieFileReaderFactory {
+
+  @Override
   protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) {
     return new HoodieAvroParquetReader(conf, path);
   }
 
-  protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader,
+  @Override
+  protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
                                                 Configuration conf,
                                                 Path path,
                                                 Option<Schema> schemaOption) throws IOException {
-    if (useNativeHFileReader) {
+    if (isUseNativeHFileReaderEnabled(hoodieConfig)) {
       return new HoodieNativeAvroHFileReader(conf, path, schemaOption);
     }
     CacheConfig cacheConfig = new CacheConfig(conf);
@@ -47,14 +51,15 @@ protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader,
     return new HoodieHBaseAvroHFileReader(conf, path, cacheConfig);
   }
 
-  protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader,
+  @Override
+  protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
                                                 Configuration conf,
                                                 Path path,
                                                 FileSystem fs,
                                                 byte[] content,
                                                 Option<Schema> schemaOption)
       throws IOException {
-    if (useNativeHFileReader) {
+    if (isUseNativeHFileReaderEnabled(hoodieConfig)) {
       return new HoodieNativeAvroHFileReader(conf, content, schemaOption);
     }
     CacheConfig cacheConfig = new CacheConfig(conf);
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
index f4b4bedc468b..ac2736f8829a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
@@ -84,11 +84,9 @@ public HoodieFileReader getFileReader(HoodieConfig hoodieConfig,
                                         Option<Schema> schemaOption) throws IOException {
     switch (format) {
       case PARQUET:
-        return this.newParquetFileReader(conf, path);
+        return newParquetFileReader(conf, path);
       case HFILE:
-        boolean useNativeHFileReader =
-            hoodieConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER);
-        return newHFileFileReader(useNativeHFileReader, conf, path, schemaOption);
+        return newHFileFileReader(hoodieConfig, conf, path, schemaOption);
       case ORC:
         return newOrcFileReader(conf, path);
       default:
@@ -96,15 +94,13 @@ public HoodieFileReader getFileReader(HoodieConfig hoodieConfig,
     }
   }
 
-  public HoodieFileReader getContentReader(HoodieConfig config,
+  public HoodieFileReader getContentReader(HoodieConfig hoodieConfig,
                                            Configuration conf, Path path, HoodieFileFormat format,
                                            FileSystem fs, byte[] content,
                                            Option<Schema> schemaOption) throws IOException {
     switch (format) {
       case HFILE:
-        boolean useNativeHFileReader =
-            config.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER);
-        return newHFileFileReader(useNativeHFileReader, conf, path, fs, content, schemaOption);
+        return newHFileFileReader(hoodieConfig, conf, path, fs, content, schemaOption);
       default:
         throw new UnsupportedOperationException(format + " format not supported yet.");
     }
@@ -114,13 +110,13 @@ protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) {
     throw new UnsupportedOperationException();
   }
 
-  protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader,
+  protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
                                                 Configuration conf, Path path,
                                                 Option<Schema> schemaOption) throws IOException {
     throw new UnsupportedOperationException();
   }
 
-  protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader,
+  protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
                                                 Configuration conf, Path path,
                                                 FileSystem fs,
                                                 byte[] content, Option<Schema> schemaOption)
@@ -138,4 +134,8 @@ public HoodieFileReader newBootstrapFileReader(HoodieFileReader skeletonFileRead
                                                  Object[] partitionValues) {
     throw new UnsupportedOperationException();
   }
+
+  protected static boolean isUseNativeHFileReaderEnabled(HoodieConfig hoodieConfig) {
+    return hoodieConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER);
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index 10c094fdfb68..4d0b8a5d662f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -49,6 +49,8 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
 
@@ -99,7 +101,7 @@
  * During compaction on the table, the deletions are merged with additions and hence records are pruned.
  */
 public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadataPayload> {
-
+  private static final Logger LOG = LoggerFactory.getLogger(HoodieMetadataPayload.class);
   /**
    * Type of the record. This can be an enum in the schema but Avro1.8
    * has a bug - https://issues.apache.org/jira/browse/AVRO-1810
@@ -217,7 +219,7 @@ public HoodieMetadataPayload(Option<GenericRecord> recordOpt) {
         //       Otherwise, it has to be present or the record would be considered invalid
         if (bloomFilterRecord == null) {
           checkArgument(record.getSchema().getField(SCHEMA_FIELD_ID_BLOOM_FILTER) == null,
-              String.format("Valid %s record expected for type: %s", SCHEMA_FIELD_ID_BLOOM_FILTER, METADATA_TYPE_COLUMN_STATS));
+              String.format("Valid %s record expected for type: %s", SCHEMA_FIELD_ID_BLOOM_FILTER, METADATA_TYPE_BLOOM_FILTER));
         } else {
           bloomFilterMetadata = new HoodieMetadataBloomFilter(
               (String) bloomFilterRecord.get(BLOOM_FILTER_FIELD_TYPE),
@@ -550,27 +552,34 @@ private Map<String, HoodieMetadataFileInfo> combineFileSystemMetadata(HoodieMeta
             //          - First we merge records from all of the delta log-files
             //          - Then we merge records from base-files with the delta ones (coming as a result
             //          of the previous step)
-            (oldFileInfo, newFileInfo) ->
-                // NOTE: We can’t assume that MT update records will be ordered the same way as actual
-                //       FS operations (since they are not atomic), therefore MT record merging should be a
-                //       _commutative_ & _associative_ operation (ie one that would work even in case records
-                //       will get re-ordered), which is
-                //          - Possible for file-sizes (since file-sizes will ever grow, we can simply
-                //          take max of the old and new records)
-                //          - Not possible for is-deleted flags*
-                //
-                //       *However, we’re assuming that the case of concurrent write and deletion of the same
-                //       file is _impossible_ -- it would only be possible with concurrent upsert and
-                //       rollback operation (affecting the same log-file), which is implausible, b/c either
-                //       of the following have to be true:
-                //          - We’re appending to failed log-file (then the other writer is trying to
-                //          rollback it concurrently, before it’s own write)
-                //          - Rollback (of completed instant) is running concurrently with append (meaning
-                //          that restore is running concurrently with a write, which is also nut supported
-                //          currently)
-                newFileInfo.getIsDeleted()
-                    ? null
-                    : new HoodieMetadataFileInfo(Math.max(newFileInfo.getSize(), oldFileInfo.getSize()), false));
+            (oldFileInfo, newFileInfo) -> {
+              // NOTE: We can’t assume that MT update records will be ordered the same way as actual
+              //       FS operations (since they are not atomic), therefore MT record merging should be a
+              //       _commutative_ & _associative_ operation (ie one that would work even in case records
+              //       will get re-ordered), which is
+              //          - Possible for file-sizes (since file-sizes will ever grow, we can simply
+              //          take max of the old and new records)
+              //          - Not possible for is-deleted flags*
+              //
+              //       *However, we’re assuming that the case of concurrent write and deletion of the same
+              //       file is _impossible_ -- it would only be possible with concurrent upsert and
+              //       rollback operation (affecting the same log-file), which is implausible, b/c either
+              //       of the following have to be true:
+              //          - We’re appending to failed log-file (then the other writer is trying to
+              //          rollback it concurrently, before it’s own write)
+              //          - Rollback (of completed instant) is running concurrently with append (meaning
+              //          that restore is running concurrently with a write, which is also nut supported
+              //          currently)
+              if (newFileInfo.getIsDeleted()) {
+                if (oldFileInfo.getIsDeleted()) {
+                  LOG.warn("A file is repeatedly deleted in the files partition of the metadata table: " + key);
+                  return newFileInfo;
+                }
+                return null;
+              }
+              return new HoodieMetadataFileInfo(
+                  Math.max(newFileInfo.getSize(), oldFileInfo.getSize()), false);
+            });
       });
     }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index e19570443054..e81cd63f66e8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -68,6 +68,7 @@
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.ExternalFilePathUtil;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.HoodieRecordUtils;
 import org.apache.hudi.common.util.Option;
@@ -328,11 +329,11 @@ public static void deleteMetadataTable(String basePath, HoodieEngineContext cont
    *
    * @param basePath      - base path of the dataset
    * @param context       - instance of {@link HoodieEngineContext}
-   * @param partitionType - {@link MetadataPartitionType} of the partition to delete
+   * @param partitionPath - Partition path of the partition to delete
    */
-  public static void deleteMetadataPartition(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType) {
+  public static void deleteMetadataPartition(String basePath, HoodieEngineContext context, String partitionPath) {
     HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder().setBasePath(basePath).setConf(context.getHadoopConf().get()).build();
-    deleteMetadataTablePartition(dataMetaClient, context, partitionType, false);
+    deleteMetadataTablePartition(dataMetaClient, context, partitionPath, false);
   }
 
   /**
@@ -341,13 +342,13 @@ public static void deleteMetadataPartition(String basePath, HoodieEngineContext
    * @param basePath base path of the dataset
    * @param context  instance of {@link HoodieEngineContext}.
    */
-  public static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType) {
+  public static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, String partitionPath) {
     final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
     FileSystem fs = HadoopFSUtils.getFs(metadataTablePath, context.getHadoopConf().get());
     try {
-      return fs.exists(new Path(metadataTablePath, partitionType.getPartitionPath()));
+      return fs.exists(new Path(metadataTablePath, partitionPath));
     } catch (Exception e) {
-      throw new HoodieIOException(String.format("Failed to check metadata partition %s exists.", partitionType.getPartitionPath()));
+      throw new HoodieIOException(String.format("Failed to check metadata partition %s exists.", partitionPath));
     }
   }
 
@@ -455,6 +456,19 @@ private static List<String> getPartitionsAdded(HoodieCommitMetadata commitMetada
         .collect(Collectors.toList());
   }
 
+  /**
+   * Returns all the incremental write partition paths as a set with the given commits metadata.
+   *
+   * @param metadataList The commits metadata
+   * @return the partition path set
+   */
+  public static Set<String> getWritePartitionPaths(List<HoodieCommitMetadata> metadataList) {
+    return metadataList.stream()
+        .map(HoodieCommitMetadata::getWritePartitionPaths)
+        .flatMap(Collection::stream)
+        .collect(Collectors.toSet());
+  }
+
   /**
    * Convert commit action metadata to bloom filter records.
    *
@@ -684,7 +698,7 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
           String partitionPath = deleteFileInfoPair.getLeft();
           String filePath = deleteFileInfoPair.getRight();
 
-          if (filePath.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+          if (filePath.endsWith(HoodieFileFormat.PARQUET.getFileExtension()) || ExternalFilePathUtil.isExternallyCreatedFile(filePath)) {
             return getColumnStatsRecords(partitionPath, filePath, dataTableMetaClient, columnsToIndex, true).iterator();
           }
           return Collections.emptyListIterator();
@@ -1500,41 +1514,41 @@ public static String deleteMetadataTable(HoodieTableMetaClient dataMetaClient, H
    * @param context        instance of {@code HoodieEngineContext}.
    * @param backup         Whether metadata table should be backed up before deletion. If true, the table is backed up to the
    *                       directory with name metadata_<current_timestamp>.
-   * @param partitionType  The partition to delete
+   * @param partitionPath  The partition to delete
    * @return The backup directory if backup was requested, null otherwise
    */
   public static String deleteMetadataTablePartition(HoodieTableMetaClient dataMetaClient, HoodieEngineContext context,
-                                                    MetadataPartitionType partitionType, boolean backup) {
-    if (partitionType.equals(MetadataPartitionType.FILES)) {
+                                                    String partitionPath, boolean backup) {
+    if (partitionPath.equals(MetadataPartitionType.FILES.getPartitionPath())) {
       return deleteMetadataTable(dataMetaClient, context, backup);
     }
 
-    final Path metadataTablePartitionPath = new Path(HoodieTableMetadata.getMetadataTableBasePath(dataMetaClient.getBasePath()), partitionType.getPartitionPath());
+    final Path metadataTablePartitionPath = new Path(HoodieTableMetadata.getMetadataTableBasePath(dataMetaClient.getBasePath()), partitionPath);
     FileSystem fs = HadoopFSUtils.getFs(metadataTablePartitionPath.toString(), context.getHadoopConf().get());
-    dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionType, false);
+    dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionPath, false);
     try {
       if (!fs.exists(metadataTablePartitionPath)) {
         return null;
       }
     } catch (FileNotFoundException e) {
       // Ignoring exception as metadata table already does not exist
-      LOG.debug("Metadata table partition " + partitionType + " not found at path " + metadataTablePartitionPath);
+      LOG.debug("Metadata table partition " + partitionPath + " not found at path " + metadataTablePartitionPath);
       return null;
     } catch (Exception e) {
-      throw new HoodieMetadataException(String.format("Failed to check existence of MDT partition %s at path %s: ", partitionType, metadataTablePartitionPath), e);
+      throw new HoodieMetadataException(String.format("Failed to check existence of MDT partition %s at path %s: ", partitionPath, metadataTablePartitionPath), e);
     }
 
     if (backup) {
       final Path metadataPartitionBackupPath = new Path(metadataTablePartitionPath.getParent().getParent(),
-          String.format(".metadata_%s_%s", partitionType.getPartitionPath(), dataMetaClient.createNewInstantTime(false)));
-      LOG.info(String.format("Backing up MDT partition %s to %s before deletion", partitionType, metadataPartitionBackupPath));
+          String.format(".metadata_%s_%s", partitionPath, dataMetaClient.createNewInstantTime(false)));
+      LOG.info(String.format("Backing up MDT partition %s to %s before deletion", partitionPath, metadataPartitionBackupPath));
       try {
         if (fs.rename(metadataTablePartitionPath, metadataPartitionBackupPath)) {
           return metadataPartitionBackupPath.toString();
         }
       } catch (Exception e) {
         // If rename fails, we will try to delete the table instead
-        LOG.error(String.format("Failed to backup MDT partition %s using rename", partitionType), e);
+        LOG.error(String.format("Failed to backup MDT partition %s using rename", partitionPath), e);
       }
     } else {
       LOG.info("Deleting metadata table partition from " + metadataTablePartitionPath);
@@ -1640,7 +1654,7 @@ public static String createLogCompactionTimestamp(String timestamp) {
    *
    * @param partitionType         Type of the partition for which the file group count is to be estimated.
    * @param recordCount           The number of records expected to be written.
-   * @param averageRecordSize     Average size of each record to be writen.
+   * @param averageRecordSize     Average size of each record to be written.
    * @param minFileGroupCount     Minimum number of file groups to use.
    * @param maxFileGroupCount     Maximum number of file groups to use.
    * @param growthFactor          By what factor are the records (recordCount) expected to grow?
diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
index 6d8fa651e51b..1417f2f67dcc 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.avro;
 
+import org.apache.hudi.exception.SchemaBackwardsCompatibilityException;
 import org.apache.hudi.exception.SchemaCompatibilityException;
 
 import org.apache.avro.Schema;
@@ -231,6 +232,30 @@ public void testIsCompatiblePartitionDropCols(boolean shouldValidate) {
     AvroSchemaUtils.checkSchemaCompatible(FULL_SCHEMA, SHORT_SCHEMA, shouldValidate, false, Collections.singleton("c"));
   }
 
+  private static final Schema BROKEN_SCHEMA = new Schema.Parser().parse("{\n"
+      + "  \"type\" : \"record\",\n"
+      + "  \"name\" : \"broken\",\n"
+      + "  \"fields\" : [ {\n"
+      + "    \"name\" : \"a\",\n"
+      + "    \"type\" : [ \"null\", \"int\" ],\n"
+      + "    \"default\" : null\n"
+      + "  }, {\n"
+      + "    \"name\" : \"b\",\n"
+      + "    \"type\" : [ \"null\", \"int\" ],\n"
+      + "    \"default\" : null\n"
+      + "  }, {\n"
+      + "    \"name\" : \"c\",\n"
+      + "    \"type\" : [ \"null\", \"boolean\" ],\n"
+      + "    \"default\" : null\n"
+      + "  } ]\n"
+      + "}");
+
+  @Test
+  public void  testBrokenSchema() {
+    assertThrows(SchemaBackwardsCompatibilityException.class,
+        () -> AvroSchemaUtils.checkSchemaCompatible(FULL_SCHEMA, BROKEN_SCHEMA, true, false, Collections.emptySet()));
+  }
+
   /* [HUDI-7045] should uncomment this test
   @Test
   public void testAppendFieldsToSchemaDedupNested() {
diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
index eb20081475ff..f1e5f606602c 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
@@ -629,4 +629,27 @@ public void testAddMetadataFields() {
     assertEquals("custom_schema_property_value", schemaWithMetadata.getProp("custom_schema_property"));
     assertEquals("value", originalFieldsInUpdatedSchema.get(0).getProp("custom_field_property"));
   }
+
+  @Test
+  void testSafeAvroToJsonStringMissingRequiredField() {
+    Schema schema = new Schema.Parser().parse(EXAMPLE_SCHEMA);
+    GenericRecord record = new GenericData.Record(schema);
+    record.put("non_pii_col", "val1");
+    record.put("pii_col", "val2");
+    record.put("timestamp", 3.5);
+    String jsonString = HoodieAvroUtils.safeAvroToJsonString(record);
+    assertEquals("{\"timestamp\": 3.5, \"_row_key\": null, \"non_pii_col\": \"val1\", \"pii_col\": \"val2\"}", jsonString);
+  }
+
+  @Test
+  void testSafeAvroToJsonStringBadDataType() {
+    Schema schema = new Schema.Parser().parse(EXAMPLE_SCHEMA);
+    GenericRecord record = new GenericData.Record(schema);
+    record.put("non_pii_col", "val1");
+    record.put("_row_key", "key");
+    record.put("pii_col", "val2");
+    record.put("timestamp", "foo");
+    String jsonString = HoodieAvroUtils.safeAvroToJsonString(record);
+    assertEquals("{\"timestamp\": \"foo\", \"_row_key\": \"key\", \"non_pii_col\": \"val1\", \"pii_col\": \"val2\"}", jsonString);
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java b/hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java
index 60f74bb79963..6e7dcd7e3fa2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java
+++ b/hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java
@@ -166,6 +166,9 @@ public void testLockIdentity() throws InterruptedException {
     Assertions.assertTrue(writer3Completed.get());
     Assertions.assertEquals(lockProviderList.get(0).getLock(), lockProviderList.get(1).getLock());
     Assertions.assertEquals(lockProviderList.get(1).getLock(), lockProviderList.get(2).getLock());
+
+    writer2.interrupt();
+    writer3.interrupt();
   }
 
   @Test
@@ -254,6 +257,8 @@ public void run() {
       //
     }
     Assertions.assertTrue(writer2Completed.get());
+
+    writer2.interrupt();
   }
 
   @Test
@@ -317,6 +322,9 @@ public void run() {
     }
     Assertions.assertTrue(writer2Stream1Completed.get());
     Assertions.assertTrue(writer2Stream2Completed.get());
+
+    writer2Stream1.interrupt();
+    writer2Stream2.interrupt();
   }
 
   @Test
@@ -373,6 +381,8 @@ public void testTryLockReAcquisitionByDifferentThread() {
     assertDoesNotThrow(() -> {
       inProcessLockProvider.unlock();
     });
+
+    writer2.interrupt();
   }
 
   @Test
@@ -414,6 +424,9 @@ public void testTryUnLockByDifferentThread() {
       // unlock by main thread should succeed.
       inProcessLockProvider.unlock();
     });
+
+    writer2.interrupt();
+    writer3.interrupt();
   }
 
   @Test
@@ -472,6 +485,9 @@ public void testTryLockAcquisitionBeforeTimeOutFromTwoThreads() {
     // Make sure both writers actually completed good
     Assertions.assertTrue(writer1Completed.get());
     Assertions.assertTrue(writer2Completed.get());
+
+    writer1.interrupt();
+    writer2.interrupt();
   }
 
   @Test
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListData.java b/hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListData.java
index ea19f128d1a9..795318f5e01b 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListData.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListData.java
@@ -27,12 +27,15 @@
 import org.junit.jupiter.params.provider.MethodSource;
 
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 class TestHoodieListData {
 
@@ -72,4 +75,23 @@ public void testGetNumPartitions() {
         IntStream.rangeClosed(0, 100).boxed().collect(Collectors.toList()));
     assertEquals(1, listData.getNumPartitions());
   }
+
+  @Test
+  public void testIsEmpty() {
+    // HoodieListData bearing eager execution semantic
+    HoodieData<Integer> listData = HoodieListData.eager(
+            IntStream.rangeClosed(0, 100).boxed().collect(Collectors.toList()));
+    assertFalse(listData.isEmpty());
+
+    HoodieData<Integer> emptyListData = HoodieListData.eager(Collections.emptyList());
+    assertTrue(emptyListData.isEmpty());
+
+    // HoodieListData bearing lazy execution semantic
+    listData = HoodieListData.lazy(
+            IntStream.rangeClosed(0, 100).boxed().collect(Collectors.toList()));
+    assertFalse(listData.isEmpty());
+
+    emptyListData = HoodieListData.lazy(Collections.emptyList());
+    assertTrue(emptyListData.isEmpty());
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
index 350d1a02072c..7d5e1de93a9b 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
@@ -21,7 +21,6 @@
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -75,7 +74,6 @@
 public class TestFSUtils extends HoodieCommonTestHarness {
 
   private static final String TEST_WRITE_TOKEN = "1-0-1";
-  private static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
 
   @Rule
   public final EnvironmentVariables environmentVariables = new EnvironmentVariables();
@@ -94,7 +92,8 @@ public void tearDown() throws Exception {
   public void testMakeDataFileName() {
     String instantTime = HoodieActiveTimeline.formatDate(new Date());
     String fileName = UUID.randomUUID().toString();
-    assertEquals(FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + BASE_FILE_EXTENSION);
+    assertEquals(FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION),
+        fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + BASE_FILE_EXTENSION);
   }
 
   @Test
@@ -169,7 +168,7 @@ public void testProcessFiles() throws Exception {
   public void testGetCommitTime() {
     String instantTime = HoodieActiveTimeline.formatDate(new Date());
     String fileName = UUID.randomUUID().toString();
-    String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName);
+    String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION);
     assertEquals(instantTime, FSUtils.getCommitTime(fullFileName));
     // test log file name
     fullFileName = FSUtils.makeLogFileName(fileName, HOODIE_LOG.getFileExtension(), instantTime, 1, TEST_WRITE_TOKEN);
@@ -180,7 +179,7 @@ public void testGetCommitTime() {
   public void testGetFileNameWithoutMeta() {
     String instantTime = HoodieActiveTimeline.formatDate(new Date());
     String fileName = UUID.randomUUID().toString();
-    String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName);
+    String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION);
     assertEquals(fileName, FSUtils.getFileId(fullFileName));
   }
 
@@ -375,7 +374,7 @@ public void testFileNameRelatedFunctions() throws Exception {
     final String LOG_EXTENSION = "." + LOG_STR;
 
     // data file name
-    String dataFileName = FSUtils.makeBaseFileName(instantTime, writeToken, fileId);
+    String dataFileName = FSUtils.makeBaseFileName(instantTime, writeToken, fileId, BASE_FILE_EXTENSION);
     assertEquals(instantTime, FSUtils.getCommitTime(dataFileName));
     assertEquals(fileId, FSUtils.getFileId(dataFileName));
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java
index 7adcb1052375..e718bc21ed6b 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java
@@ -56,6 +56,7 @@ public void testStorageSchemes() {
     assertFalse(StorageSchemes.isAtomicCreationSupported("jfs"));
     assertFalse(StorageSchemes.isAtomicCreationSupported("bos"));
     assertFalse(StorageSchemes.isAtomicCreationSupported("ks3"));
+    assertFalse(StorageSchemes.isAtomicCreationSupported("nos"));
     assertFalse(StorageSchemes.isAtomicCreationSupported("ofs"));
     assertFalse(StorageSchemes.isAtomicCreationSupported("oci"));
     assertFalse(StorageSchemes.isAtomicCreationSupported("tos"));
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestFirstValueAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestFirstValueAvroPayload.java
new file mode 100644
index 000000000000..a0b7eb86b488
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestFirstValueAvroPayload.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.testutils.PreCombineTestUtils;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Properties;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class TestFirstValueAvroPayload {
+
+  private Schema schema;
+  private Properties props;
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    schema = Schema.createRecord(Arrays.asList(
+            new Schema.Field("id", Schema.create(Schema.Type.STRING), "", null),
+            new Schema.Field("partition", Schema.create(Schema.Type.STRING), "", null),
+            new Schema.Field("ts", Schema.create(Schema.Type.LONG), "", null),
+            new Schema.Field("_hoodie_is_deleted", Schema.create(Schema.Type.BOOLEAN), "", false)
+    ));
+    props = new Properties();
+    props.setProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY, "ts");
+    props.setProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY, "ts");
+  }
+
+  @ParameterizedTest
+  @MethodSource("org.apache.hudi.common.testutils.PreCombineTestUtils#configurePreCombine")
+  public void testActiveRecordsForFirstValueAvroPayload(String key) throws IOException {
+    PreCombineTestUtils.setPreCombineConfig(props, key, "ts");
+    GenericRecord record1 = new GenericData.Record(schema);
+    record1.put("id", "0");
+    record1.put("partition", "partition0");
+    record1.put("ts", 0L);
+    record1.put("_hoodie_is_deleted", false);
+
+    GenericRecord record2 = new GenericData.Record(schema);
+    record2.put("id", "0");
+    record2.put("partition", "partition0");
+    record2.put("ts", 0L);
+    record2.put("_hoodie_is_deleted", false);
+
+    DefaultHoodieRecordPayload payload1 = new FirstValueAvroPayload(record1, 1);
+    DefaultHoodieRecordPayload payload2 = new FirstValueAvroPayload(record2, 1);
+    assertEquals(payload1.preCombine(payload2, props), payload2);
+    assertEquals(payload2.preCombine(payload1, props), payload1);
+
+    assertEquals(record1, payload1.getInsertValue(schema, props).get());
+    assertEquals(record2, payload2.getInsertValue(schema, props).get());
+
+    assertEquals(payload1.combineAndGetUpdateValue(record2, schema, props).get(), record2);
+    assertEquals(payload2.combineAndGetUpdateValue(record1, schema, props).get(), record1);
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieDeltaWriteStat.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieDeltaWriteStat.java
index b774e06cea6d..a09bf539febc 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieDeltaWriteStat.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieDeltaWriteStat.java
@@ -23,6 +23,8 @@
 import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -49,4 +51,27 @@ public void testBaseFileAndLogFiles() {
     writeStat.setLogFiles(new ArrayList<>());
     assertTrue(writeStat.getLogFiles().isEmpty());
   }
+
+  @Test
+  void testGetHoodieDeltaWriteStatFromPreviousStat() {
+    HoodieDeltaWriteStat prevStat = createDeltaWriteStat("part", "fileId", "888",
+        "base", Collections.singletonList("log1"));
+    HoodieDeltaWriteStat stat = prevStat.copy();
+    assertEquals(prevStat.getPartitionPath(), stat.getPartitionPath());
+    assertEquals(prevStat.getFileId(), stat.getFileId());
+    assertEquals(prevStat.getPrevCommit(), stat.getPrevCommit());
+    assertEquals(prevStat.getBaseFile(), stat.getBaseFile());
+    assertEquals(1, stat.getLogFiles().size());
+    assertEquals(prevStat.getLogFiles().get(0), stat.getLogFiles().get(0));
+  }
+
+  private HoodieDeltaWriteStat createDeltaWriteStat(String partition, String fileId, String prevCommit, String baseFile, List<String> logFiles) {
+    HoodieDeltaWriteStat writeStat1 = new HoodieDeltaWriteStat();
+    writeStat1.setPartitionPath(partition);
+    writeStat1.setFileId(fileId);
+    writeStat1.setPrevCommit(prevCommit);
+    writeStat1.setBaseFile(baseFile);
+    writeStat1.setLogFiles(logFiles);
+    return writeStat1;
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java
index e8a7205f769e..d6c3cf7fbb02 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.model;
 
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 
 import org.apache.hadoop.fs.Path;
@@ -46,7 +47,8 @@ public void testSetPaths() {
     Path basePath = new Path(basePathString);
     Path partitionPath = new Path(basePath, partitionPathString);
 
-    Path finalizeFilePath = new Path(partitionPath, FSUtils.makeBaseFileName(instantTime, writeToken, fileName));
+    Path finalizeFilePath = new Path(partitionPath, FSUtils.makeBaseFileName(instantTime, writeToken, fileName,
+        HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension()));
     HoodieWriteStat writeStat = new HoodieWriteStat();
     writeStat.setPath(basePath, finalizeFilePath);
     assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
index 3ac42b9d3b7c..b7f0ba8eba77 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
@@ -21,7 +21,7 @@
 import org.apache.hudi.avro.AvroSchemaUtils;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieIncompatibleSchemaException;
+import org.apache.hudi.internal.schema.HoodieSchemaException;
 
 import org.apache.avro.Schema;
 import org.junit.jupiter.api.Test;
@@ -61,7 +61,7 @@ public void testRecreateSchemaWhenDropPartitionColumns() {
     String[] pts4 = {"user_partition", "partition_path"};
     try {
       TableSchemaResolver.appendPartitionColumns(originSchema, Option.of(pts3));
-    } catch (HoodieIncompatibleSchemaException e) {
+    } catch (HoodieSchemaException e) {
       assertTrue(e.getMessage().contains("Partial partition fields are still in the schema"));
     }
   }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
index c81a05b4c20a..d258753c3a85 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
@@ -158,7 +158,7 @@ public void testGetPartitions() throws IOException {
 
       HoodieInstant cleanInstant = new HoodieInstant(true, CLEAN_ACTION, ts);
       activeTimeline.createNewInstant(cleanInstant);
-      activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(olderPartition, ts));
+      activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(olderPartition, ts, false));
     }
 
     metaClient.reloadActiveTimeline();
@@ -197,7 +197,7 @@ public void testGetPartitionsUnPartitioned() throws IOException {
 
       HoodieInstant cleanInstant = new HoodieInstant(true, CLEAN_ACTION, ts);
       activeTimeline.createNewInstant(cleanInstant);
-      activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(partitionPath, ts));
+      activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(partitionPath, ts, false));
     }
 
     metaClient.reloadActiveTimeline();
@@ -553,7 +553,7 @@ private byte[] getReplaceCommitMetadata(String basePath, String commitTs, String
     return serializeCommitMetadata(commit).get();
   }
 
-  private Option<byte[]> getCleanMetadata(String partition, String time) throws IOException {
+  private Option<byte[]> getCleanMetadata(String partition, String time, boolean isPartitionDeleted) throws IOException {
     Map<String, HoodieCleanPartitionMetadata> partitionToFilesCleaned = new HashMap<>();
     List<String> filesDeleted = new ArrayList<>();
     filesDeleted.add("file-" + partition + "-" + time + "1");
@@ -564,6 +564,7 @@ private Option<byte[]> getCleanMetadata(String partition, String time) throws IO
         .setFailedDeleteFiles(Collections.emptyList())
         .setDeletePathPatterns(Collections.emptyList())
         .setSuccessDeleteFiles(filesDeleted)
+        .setIsPartitionDeleted(isPartitionDeleted)
         .build();
     partitionToFilesCleaned.putIfAbsent(partition, partitionMetadata);
     HoodieCleanMetadata cleanMetadata = HoodieCleanMetadata.newBuilder()
@@ -611,4 +612,43 @@ public void testHandleHollowCommitIfNeeded(HollowCommitHandling handlingMode) th
         fail("should cover all handling mode.");
     }
   }
+
+  @Test
+  public void testGetDroppedPartitions() throws Exception {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
+    assertTrue(activeCommitTimeline.empty());
+
+    String olderPartition = "p1"; // older partitions that will be deleted by clean commit
+    // first insert to the older partition
+    HoodieInstant instant1 = new HoodieInstant(true, COMMIT_ACTION, "00001");
+    activeTimeline.createNewInstant(instant1);
+    activeTimeline.saveAsComplete(instant1, Option.of(getCommitMetadata(basePath, olderPartition, "00001", 2, Collections.emptyMap())));
+
+    metaClient.reloadActiveTimeline();
+    List<String> droppedPartitions = TimelineUtils.getDroppedPartitions(metaClient, Option.empty(), Option.empty());
+    // no dropped partitions
+    assertEquals(0, droppedPartitions.size());
+
+    // another commit inserts to new partition
+    HoodieInstant instant2 = new HoodieInstant(true, COMMIT_ACTION, "00002");
+    activeTimeline.createNewInstant(instant2);
+    activeTimeline.saveAsComplete(instant2, Option.of(getCommitMetadata(basePath, "p2", "00002", 2, Collections.emptyMap())));
+
+    metaClient.reloadActiveTimeline();
+    droppedPartitions = TimelineUtils.getDroppedPartitions(metaClient, Option.empty(), Option.empty());
+    // no dropped partitions
+    assertEquals(0, droppedPartitions.size());
+
+    // clean commit deletes older partition
+    HoodieInstant cleanInstant = new HoodieInstant(true, CLEAN_ACTION, "00003");
+    activeTimeline.createNewInstant(cleanInstant);
+    activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(olderPartition, "00003", true));
+
+    metaClient.reloadActiveTimeline();
+    droppedPartitions = TimelineUtils.getDroppedPartitions(metaClient, Option.empty(), Option.empty());
+    // older partition is in the list dropped partitions
+    assertEquals(1, droppedPartitions.size());
+    assertEquals(olderPartition, droppedPartitions.get(0));
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderBase.java b/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderBase.java
index 5fc1895ef781..8001cbe45d37 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderBase.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderBase.java
@@ -150,7 +150,7 @@ private void validateOutputFromFileGroupReader(Configuration hadoopConf,
     HoodieEngineContext engineContext = new HoodieLocalEngineContext(hadoopConf);
     HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
     FileSystemViewManager viewManager = FileSystemViewManager.createViewManager(
-        engineContext, metadataConfig, FileSystemViewStorageConfig.newBuilder().build(),
+        engineContext, FileSystemViewStorageConfig.newBuilder().build(),
         HoodieCommonConfig.newBuilder().build(),
         mc -> HoodieTableMetadata.create(
             engineContext, metadataConfig, mc.getBasePathV2().toString()));
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java
index e9546f8f9d19..26f9c3f07611 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java
@@ -124,10 +124,10 @@ public void testReplaceFileIdIsExcludedInView() throws IOException {
 
     // Only one commit
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
-    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
-    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
     new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
index 1ab824724aab..f5cee136d2e2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
@@ -313,8 +313,8 @@ public void testViewForFileSlicesWithPartitionMetadataFile() throws Exception {
     String fileId1 = UUID.randomUUID().toString();
     String fileId2 = UUID.randomUUID().toString();
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
     HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
@@ -343,8 +343,8 @@ public void testViewForGetAllFileGroupsStateless() throws Exception {
     String fileId1 = UUID.randomUUID().toString();
     String fileId2 = UUID.randomUUID().toString();
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
 
@@ -356,8 +356,8 @@ public void testViewForGetAllFileGroupsStateless() throws Exception {
     String fileId3 = UUID.randomUUID().toString();
     String fileId4 = UUID.randomUUID().toString();
     String commitTime2 = "2";
-    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
-    String fileName4 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId4);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
     new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
 
@@ -394,8 +394,8 @@ public void testViewForGetLatestFileSlicesStateless() throws Exception {
     String fileId1 = UUID.randomUUID().toString();
     String fileId2 = UUID.randomUUID().toString();
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
 
@@ -407,8 +407,8 @@ public void testViewForGetLatestFileSlicesStateless() throws Exception {
     String fileId3 = UUID.randomUUID().toString();
     String fileId4 = UUID.randomUUID().toString();
     String commitTime2 = "2";
-    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
-    String fileName4 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId4);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
     new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
 
@@ -515,7 +515,7 @@ void testFileSlicingWithMultipleDeltaWriters() throws Exception {
     String deltaInstantTime2 = "30"; // 30 -> 50
     String deltaInstantTime3 = "35"; // 35 -> 90
 
-    String baseFile1 = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
+    String baseFile1 = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     String deltaFile1 =
         FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, deltaInstantTime1, 0, TEST_WRITE_TOKEN);
     String deltaFile2 =
@@ -556,7 +556,7 @@ void testFileSlicingWithMultipleDeltaWriters() throws Exception {
 
     // schedules a compaction
     String compactionInstantTime1 = metaClient.createNewInstantTime(); // 60 -> 80
-    String compactionFile1 = FSUtils.makeBaseFileName(compactionInstantTime1, TEST_WRITE_TOKEN, fileId);
+    String compactionFile1 = FSUtils.makeBaseFileName(compactionInstantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
     partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
     HoodieCompactionPlan compactionPlan =
@@ -633,6 +633,61 @@ void testFileSlicingWithMultipleDeltaWriters() throws Exception {
     assertEquals(deltaFile3, logFiles.get(0).getFileName(), "Log File Order check");
   }
 
+  @Test
+  void testLoadPartitions_unPartitioned() throws Exception {
+    String partitionPath = "";
+    Paths.get(basePath, partitionPath).toFile().mkdirs();
+    String fileId = UUID.randomUUID().toString();
+
+    String instantTime1 = "1";
+    String fileName1 =
+        FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
+
+    Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
+    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
+    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
+
+    saveAsComplete(commitTimeline, instant1, Option.empty());
+    refreshFsView();
+
+    // Assert that no base files are returned without the partitions being loaded
+    assertEquals(0, fsView.getLatestFileSliceInRange(Collections.singletonList("1")).count());
+    // Assert that load does not fail for un-partitioned tables
+    fsView.loadPartitions(Collections.singletonList(partitionPath));
+    // Assert that base files are returned after the empty-string partition is loaded
+    assertEquals(1, fsView.getLatestFileSliceInRange(Collections.singletonList("1")).count());
+  }
+
+  @Test
+  void testLoadPartitions_partitioned() throws Exception {
+    String partitionPath1 = "2016/05/01";
+    String partitionPath2 = "2016/05/02";
+    Paths.get(basePath, partitionPath1).toFile().mkdirs();
+    Paths.get(basePath, partitionPath2).toFile().mkdirs();
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String instantTime1 = "1";
+    String fileName1 =
+        FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
+    String fileName2 =
+        FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
+
+    Paths.get(basePath, partitionPath1, fileName1).toFile().createNewFile();
+    Paths.get(basePath, partitionPath2, fileName2).toFile().createNewFile();
+    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
+    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
+
+    saveAsComplete(commitTimeline, instant1, Option.empty());
+    refreshFsView();
+
+    // Assert that no base files are returned without the partitions being loaded
+    assertEquals(0, fsView.getLatestFileSliceInRange(Collections.singletonList("1")).count());
+    // Only load a single partition path
+    fsView.loadPartitions(Collections.singletonList(partitionPath1));
+    // Assert that base file is returned for partitionPath1 only
+    assertEquals(1, fsView.getLatestFileSliceInRange(Collections.singletonList("1")).count());
+  }
+
   /**
    * Returns all file-slices including uncommitted ones.
    *
@@ -726,7 +781,7 @@ protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingData
 
     String dataFileName = null;
     if (!skipCreatingDataFile) {
-      dataFileName = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
+      dataFileName = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
       new File(basePath + "/" + partitionPath + "/" + dataFileName).createNewFile();
     }
     String fileName1 =
@@ -765,7 +820,7 @@ protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingData
       checkExternalFile(srcFileStatus, fileSlice.getBaseFile().get().getBootstrapBaseFile(), testBootstrap);
     }
     String compactionRequestedTime = "4";
-    String compactDataFileName = FSUtils.makeBaseFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
+    String compactDataFileName = FSUtils.makeBaseFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
     partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
     HoodieCompactionPlan compactionPlan =
@@ -900,12 +955,12 @@ protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingData
     final String orphanFileId2 = UUID.randomUUID().toString();
     final String invalidInstantId = "INVALIDTIME";
     String inflightDeltaInstantTime = "7";
-    String orphanDataFileName = FSUtils.makeBaseFileName(invalidInstantId, TEST_WRITE_TOKEN, orphanFileId1);
+    String orphanDataFileName = FSUtils.makeBaseFileName(invalidInstantId, TEST_WRITE_TOKEN, orphanFileId1, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + orphanDataFileName).createNewFile();
     String orphanLogFileName =
         FSUtils.makeLogFileName(orphanFileId2, HoodieLogFile.DELTA_EXTENSION, invalidInstantId, 0, TEST_WRITE_TOKEN);
     new File(basePath + "/" + partitionPath + "/" + orphanLogFileName).createNewFile();
-    String inflightDataFileName = FSUtils.makeBaseFileName(inflightDeltaInstantTime, TEST_WRITE_TOKEN, inflightFileId1);
+    String inflightDataFileName = FSUtils.makeBaseFileName(inflightDeltaInstantTime, TEST_WRITE_TOKEN, inflightFileId1, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + inflightDataFileName).createNewFile();
     String inflightLogFileName = FSUtils.makeLogFileName(inflightFileId2, HoodieLogFile.DELTA_EXTENSION,
         inflightDeltaInstantTime, 0, TEST_WRITE_TOKEN);
@@ -1060,7 +1115,7 @@ public void testGetLatestDataFilesForFileId() throws IOException {
 
     // Only one commit, but is not safe
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
     refreshFsView();
     assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId)),
@@ -1076,7 +1131,7 @@ public void testGetLatestDataFilesForFileId() throws IOException {
 
     // Do another commit, but not safe
     String commitTime2 = "2";
-    String fileName2 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
     refreshFsView();
     assertEquals(fileName1, roView.getLatestBaseFiles(partitionPath)
@@ -1110,22 +1165,22 @@ public void testStreamLatestVersionInPartition(boolean isLatestFileSliceOnly) th
     String fileId3 = UUID.randomUUID().toString();
     String fileId4 = UUID.randomUUID().toString();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
             .createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1, TEST_WRITE_TOKEN))
             .createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN))
             .createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
             .createNewFile();
@@ -1178,9 +1233,9 @@ private void testStreamLatestVersionInPartition(boolean isLatestFileSliceOnly, S
     for (HoodieBaseFile status : dataFileList) {
       filenames.add(status.getFileName());
     }
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)));
 
     filenames = new HashSet<>();
     List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4, true)
@@ -1207,12 +1262,12 @@ private void testStreamLatestVersionInPartition(boolean isLatestFileSliceOnly, S
     }
     if (!isLatestFileSliceOnly) {
       assertEquals(3, dataFiles.size());
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)));
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)));
     } else {
       assertEquals(1, dataFiles.size());
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
     }
 
     logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3, true)
@@ -1238,13 +1293,13 @@ protected void testStreamEveryVersionInPartition(boolean isLatestFileSliceOnly)
     String fileId2 = UUID.randomUUID().toString();
     String fileId3 = UUID.randomUUID().toString();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
 
     new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
     new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
@@ -1269,22 +1324,22 @@ protected void testStreamEveryVersionInPartition(boolean isLatestFileSliceOnly)
       Set<String> expFileNames = new HashSet<>();
       if (fileId.equals(fileId1)) {
         if (!isLatestFileSliceOnly) {
-          expFileNames.add(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1));
+          expFileNames.add(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION));
         }
-        expFileNames.add(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1));
+        expFileNames.add(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION));
         assertEquals(expFileNames, filenames);
       } else if (fileId.equals(fileId2)) {
         if (!isLatestFileSliceOnly) {
-          expFileNames.add(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2));
-          expFileNames.add(FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2));
+          expFileNames.add(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION));
+          expFileNames.add(FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION));
         }
-        expFileNames.add(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2));
+        expFileNames.add(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION));
         assertEquals(expFileNames, filenames);
       } else {
         if (!isLatestFileSliceOnly) {
-          expFileNames.add(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3));
+          expFileNames.add(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION));
         }
-        expFileNames.add(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3));
+        expFileNames.add(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION));
         assertEquals(expFileNames, filenames);
       }
     }
@@ -1307,21 +1362,21 @@ protected void testStreamLatestVersionInRange(boolean isLatestFileSliceOnly) thr
     String fileId2 = UUID.randomUUID().toString();
     String fileId3 = UUID.randomUUID().toString();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, TEST_WRITE_TOKEN))
             .createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId1)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath
         + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN))
             .createNewFile();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
 
     new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
     new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
@@ -1344,10 +1399,10 @@ protected void testStreamLatestVersionInRange(boolean isLatestFileSliceOnly) thr
       filenames.add(status.getFileName());
     }
 
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId1)));
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
     if (!isLatestFileSliceOnly) {
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)));
     }
 
     List<FileSlice> slices =
@@ -1388,13 +1443,13 @@ protected void testStreamLatestVersionsBefore(boolean isLatestFileSliceOnly) thr
     String fileId2 = UUID.randomUUID().toString();
     String fileId3 = UUID.randomUUID().toString();
 
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
-    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
+    new File(fullPartitionPath + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)).createNewFile();
 
     new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
     new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
@@ -1414,8 +1469,8 @@ protected void testStreamLatestVersionsBefore(boolean isLatestFileSliceOnly) thr
       for (HoodieBaseFile status : dataFiles) {
         filenames.add(status.getFileName());
       }
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)));
-      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)));
+      assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
     } else {
       assertEquals(0, dataFiles.size());
     }
@@ -1439,30 +1494,30 @@ protected void testStreamLatestVersions(boolean isLatestFileSliceOnly) throws IO
     String fileId2 = UUID.randomUUID().toString();
     String fileId3 = UUID.randomUUID().toString();
 
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION))
         .createNewFile();
     new File(fullPartitionPath + "/"
         + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, TEST_WRITE_TOKEN))
             .createNewFile();
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION))
         .createNewFile();
     new File(fullPartitionPath + "/"
         + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
             .createNewFile();
 
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION))
         .createNewFile();
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION))
         .createNewFile();
     new File(fullPartitionPath + "/"
         + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, TEST_WRITE_TOKEN))
             .createNewFile();
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION))
         .createNewFile();
 
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION))
         .createNewFile();
-    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3))
+    new File(fullPartitionPath + "/" + FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION))
         .createNewFile();
 
     new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
@@ -1509,9 +1564,9 @@ protected void testStreamLatestVersions(boolean isLatestFileSliceOnly) throws IO
     for (HoodieBaseFile status : statuses1) {
       filenames.add(status.getFileName());
     }
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
-    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION)));
+    assertTrue(filenames.contains(FSUtils.makeBaseFileName(commitTime4, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION)));
   }
 
   @Test
@@ -1532,15 +1587,15 @@ public void testPendingCompactionWithDuplicateFileIdsAcrossPartitions() throws E
     String deltaInstantTime2 = "4";
     String fileId = UUID.randomUUID().toString();
 
-    String dataFileName = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
+    String dataFileName = FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     new File(fullPartitionPath1 + dataFileName).createNewFile();
 
     String fileName1 =
         FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
     new File(fullPartitionPath1 + fileName1).createNewFile();
-    new File(fullPartitionPath2 + FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
+    new File(fullPartitionPath2 + FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath2 + fileName1).createNewFile();
-    new File(fullPartitionPath3 + FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
+    new File(fullPartitionPath3 + FSUtils.makeBaseFileName(instantTime1, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION)).createNewFile();
     new File(fullPartitionPath3 + fileName1).createNewFile();
 
     HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
@@ -1579,7 +1634,7 @@ public void testPendingCompactionWithDuplicateFileIdsAcrossPartitions() throws E
     partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
 
     String compactionRequestedTime = "2";
-    String compactDataFileName = FSUtils.makeBaseFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
+    String compactDataFileName = FSUtils.makeBaseFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId, BASE_FILE_EXTENSION);
     HoodieCompactionPlan compactionPlan =
         CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
 
@@ -1696,8 +1751,8 @@ public void testReplaceWithTimeTravel() throws IOException {
         "No commit, should not find any data file");
     // Only one commit
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
 
@@ -1713,8 +1768,8 @@ public void testReplaceWithTimeTravel() throws IOException {
     // create commit2 - fileId1 is replaced. new file groups fileId3,fileId4 are created.
     String fileId3 = UUID.randomUUID().toString();
     String fileId4 = UUID.randomUUID().toString();
-    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
-    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
 
@@ -1795,10 +1850,10 @@ public void testReplaceFileIdIsExcludedInView() throws IOException {
 
     // Only one commit
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
-    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
-    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
     new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
@@ -1858,9 +1913,9 @@ public void testPendingClusteringOperations() throws IOException {
         "No commit, should not find any data file");
     // Only one commit
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
-    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
     new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
@@ -1972,8 +2027,8 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept
 
     // first insert commit
     String commitTime1 = "1";
-    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
-    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId1, BASE_FILE_EXTENSION);
+    String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, fileId2, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
     new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
 
@@ -1997,7 +2052,7 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept
 
     // replace commit
     String commitTime2 = "2";
-    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
+    String fileName3 = FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, fileId3, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
 
     HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
@@ -2022,7 +2077,7 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept
 
     // another insert commit
     String commitTime3 = "3";
-    String fileName4 = FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId4);
+    String fileName4 = FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, fileId4, BASE_FILE_EXTENSION);
     new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
     HoodieInstant instant3 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime3);
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
index 852f916c1a4b..91fcad237206 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
@@ -967,7 +967,7 @@ private List<Pair<String, HoodieWriteStat>> generateDataForInstant(String instan
       try {
         java.nio.file.Path filePath = Paths.get(basePath, p, deltaCommit
             ? FSUtils.makeLogFileName(f, ".log", instant, 0, TEST_WRITE_TOKEN)
-            : FSUtils.makeBaseFileName(instant, TEST_WRITE_TOKEN, f));
+            : FSUtils.makeBaseFileName(instant, TEST_WRITE_TOKEN, f, BASE_FILE_EXTENSION));
         Files.createFile(filePath);
         HoodieWriteStat w = new HoodieWriteStat();
         w.setFileId(f);
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestPriorityBasedFileSystemView.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestPriorityBasedFileSystemView.java
index b297d320c7a6..1e2b8e0c35e5 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestPriorityBasedFileSystemView.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestPriorityBasedFileSystemView.java
@@ -53,6 +53,9 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -698,6 +701,27 @@ public void testGetLatestFileSlice() {
     });
   }
 
+  @Test
+  public void testLoadPartitions() {
+    String partitionPath = "/table2";
+
+    fsView.loadPartitions(Collections.singletonList(partitionPath));
+    verify(primary, times(1)).loadPartitions(Collections.singletonList(partitionPath));
+    verify(secondary, never()).loadPartitions(any());
+
+    resetMocks();
+    doThrow(new RuntimeException()).when(primary).loadPartitions(Collections.singletonList(partitionPath));
+    fsView.loadPartitions(Collections.singletonList(partitionPath));
+    verify(primary, times(1)).loadPartitions(Collections.singletonList(partitionPath));
+    verify(secondary, times(1)).loadPartitions(Collections.singletonList(partitionPath));
+
+    resetMocks();
+    doThrow(new RuntimeException()).when(secondary).loadPartitions(Collections.singletonList(partitionPath));
+    assertThrows(RuntimeException.class, () -> {
+      fsView.loadPartitions(Collections.singletonList(partitionPath));
+    });
+  }
+
   @Test
   public void testGetPreferredView() {
     assertEquals(primary, fsView.getPreferredView());
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java
index a1a3864a6a98..bda5b38c5178 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.testutils;
 
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
@@ -35,6 +36,8 @@
  */
 public class HoodieCommonTestHarness {
 
+  protected static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
+
   protected String tableName;
   protected String basePath;
   protected URI baseUri;
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
index fe2583b87af3..5adbb57edfa8 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
@@ -91,6 +91,13 @@
  */
 public class HoodieTestDataGenerator implements AutoCloseable {
 
+  /**
+   * You may get a different result due to the upgrading of Spark 3.0: reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z from Parquet INT96 files can be ambiguous,
+   * as the files may be written by Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar that is different from Spark 3.0+s Proleptic Gregorian calendar.
+   * See more details in SPARK-31404.
+   */
+  private boolean makeDatesAmbiguous = false;
+
   // based on examination of sample file, the schema produces the following per record size
   public static final int BYTES_PER_RECORD = (int) (1.2 * 1024);
   // with default bloom filter with 60,000 entries and 0.000000001 FPRate
@@ -208,6 +215,11 @@ public HoodieTestDataGenerator() {
     this(DEFAULT_PARTITION_PATHS);
   }
 
+  public HoodieTestDataGenerator(boolean makeDatesAmbiguous) {
+    this();
+    this.makeDatesAmbiguous = makeDatesAmbiguous;
+  }
+
   @Deprecated
   public HoodieTestDataGenerator(String[] partitionPaths, Map<Integer, KeyPartition> keyPartitionMap) {
     // NOTE: This used as a workaround to make sure that new instantiations of the generator
@@ -392,7 +404,8 @@ private void generateExtraSchemaValues(GenericRecord rec) {
     rec.put("nation", ByteBuffer.wrap(bytes));
     long randomMillis = genRandomTimeMillis(rand);
     Instant instant = Instant.ofEpochMilli(randomMillis);
-    rec.put("current_date", (int) LocalDateTime.ofInstant(instant, ZoneOffset.UTC).toLocalDate().toEpochDay());
+    rec.put("current_date", makeDatesAmbiguous ? -1000000 :
+        (int) LocalDateTime.ofInstant(instant, ZoneOffset.UTC).toLocalDate().toEpochDay());
     rec.put("current_ts", randomMillis);
 
     BigDecimal bigDecimal = new BigDecimal(String.format(Locale.ENGLISH, "%5f", rand.nextFloat()));
@@ -524,6 +537,15 @@ private static void createCommitFile(String basePath, String instantTime, Config
         .forEach(f -> createMetadataFile(f, basePath, configuration, commitMetadata));
   }
 
+  public static void createOnlyCompletedCommitFile(String basePath, String instantTime, Configuration configuration) {
+    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
+    createOnlyCompletedCommitFile(basePath, instantTime, configuration, commitMetadata);
+  }
+
+  public static void createOnlyCompletedCommitFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
+    createMetadataFile(HoodieTimeline.makeCommitFileName(instantTime), basePath, configuration, commitMetadata);
+  }
+
   public static void createDeltaCommitFile(String basePath, String instantTime, Configuration configuration) {
     HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
     createDeltaCommitFile(basePath, instantTime, configuration, commitMetadata);
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
index ca13ff79c521..94060d999af2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
@@ -120,6 +120,7 @@
 import static org.apache.hudi.common.testutils.FileCreateUtils.createSavepointCommit;
 import static org.apache.hudi.common.testutils.FileCreateUtils.deleteSavepointCommit;
 import static org.apache.hudi.common.testutils.FileCreateUtils.logFileName;
+import static org.apache.hudi.common.testutils.HoodieCommonTestHarness.BASE_FILE_EXTENSION;
 import static org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata;
 import static org.apache.hudi.common.util.CommitUtils.buildMetadata;
 import static org.apache.hudi.common.util.CommitUtils.getCommitActionType;
@@ -555,7 +556,7 @@ private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> genera
     if (newFileId.isPresent() && !StringUtils.isNullOrEmpty(newFileId.get())) {
       HoodieWriteStat writeStat = new HoodieWriteStat();
       writeStat.setPartitionPath(partition);
-      writeStat.setPath(partition + "/" + FSUtils.makeBaseFileName(instantTime, "1-0-1", newFileId.get()));
+      writeStat.setPath(partition + "/" + FSUtils.makeBaseFileName(instantTime, "1-0-1", newFileId.get(), BASE_FILE_EXTENSION));
       writeStat.setFileId(newFileId.get());
       writeStat.setTotalWriteBytes(1);
       writeStat.setFileSizeInBytes(1);
@@ -962,6 +963,19 @@ public HoodieCleanMetadata doClean(String commitTime, Map<String, Integer> parti
     return cleanerMeta.getValue();
   }
 
+  /**
+   * Repeats the same cleaning based on the cleaner plan and clean commit metadata.
+   *
+   * @param cleanCommitTime new clean commit time to use.
+   * @param cleanerPlan     cleaner plan to write to the metadata.
+   * @param cleanMetadata   clean metadata in data table to use.
+   */
+  public void repeatClean(String cleanCommitTime,
+                          HoodieCleanerPlan cleanerPlan,
+                          HoodieCleanMetadata cleanMetadata) throws IOException {
+    addClean(cleanCommitTime, cleanerPlan, cleanMetadata);
+  }
+
   public HoodieCleanMetadata doCleanBasedOnCommits(String cleanCommitTime, List<String> commitsToClean) throws IOException {
     Map<String, Integer> partitionFileCountsToDelete = new HashMap<>();
     for (String commitTime : commitsToClean) {
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieTestReaderContext.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieTestReaderContext.java
index 38108cf15784..92344ba39ab2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieTestReaderContext.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieTestReaderContext.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.common.testutils.reader;
 
+import org.apache.hudi.avro.model.HoodieDeleteRecord;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.HoodieReaderContext;
 import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
@@ -207,6 +208,14 @@ public UnaryOperator<IndexedRecord> projectRecord(Schema from, Schema to) {
     };
   }
 
+  @Override
+  public IndexedRecord constructRawDeleteRecord(Map<String, Object> metadata) {
+    return new HoodieDeleteRecord(
+        (String) metadata.get(INTERNAL_META_RECORD_KEY),
+        (String) metadata.get(INTERNAL_META_PARTITION_PATH),
+        metadata.get(INTERNAL_META_ORDERING_FIELD));
+  }
+
   private Object getFieldValueFromIndexedRecord(
       IndexedRecord record,
       Schema recordSchema,
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java
index d3375fe5e8af..72630787c13f 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java
@@ -133,7 +133,9 @@ public void testClusteringPlanInflight() throws Exception {
     String clusterTime1 = "1";
     HoodieInstant requestedInstant = createRequestedReplaceInstant(partitionPath1, clusterTime1, fileIds1);
     HoodieInstant inflightInstant = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(requestedInstant, Option.empty());
+    assertTrue(ClusteringUtils.isClusteringInstant(metaClient.getActiveTimeline(), requestedInstant));
     HoodieClusteringPlan requestedClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, requestedInstant).get().getRight();
+    assertTrue(ClusteringUtils.isClusteringInstant(metaClient.getActiveTimeline(), inflightInstant));
     HoodieClusteringPlan inflightClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, inflightInstant).get().getRight();
     assertEquals(requestedClusteringPlan, inflightClusteringPlan);
   }
@@ -261,7 +263,7 @@ private HoodieInstant createRequestedReplaceInstant(String partitionPath1, Strin
 
   private FileSlice generateFileSlice(String partitionPath, String fileId, String baseInstant) {
     FileSlice fs = new FileSlice(new HoodieFileGroupId(partitionPath, fileId), baseInstant);
-    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId)));
+    fs.setBaseFile(new HoodieBaseFile(FSUtils.makeBaseFileName(baseInstant, "1-0-1", fileId, BASE_FILE_EXTENSION)));
     return fs;
   }
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
index 0be0a5f89c52..4027bd28178f 100644
--- a/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
+import org.apache.hudi.exception.HoodieNullSchemaTypeException;
 import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.hudi.internal.schema.InternalSchemaBuilder;
 import org.apache.hudi.internal.schema.Type;
@@ -46,6 +47,9 @@
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 /**
  * Tests {@link AvroSchemaEvolutionUtils}.
  */
@@ -184,6 +188,37 @@ public void testComplexConvert() {
     Assertions.assertEquals(schema, AvroInternalSchemaConverter.convert(internalSchema, "newTableName"));
   }
 
+  @Test
+  public void testNullFieldType() {
+    Schema schema = create("t1",
+        new Schema.Field("nullField", Schema.create(Schema.Type.NULL), null, JsonProperties.NULL_VALUE));
+    Throwable t = assertThrows(HoodieNullSchemaTypeException.class,
+        () -> AvroInternalSchemaConverter.convert(schema));
+    assertTrue(t.getMessage().contains("'t1.nullField'"));
+
+    Schema schemaArray = create("t2",
+        new Schema.Field("nullArray", Schema.createArray(Schema.create(Schema.Type.NULL)), null, null));
+    t = assertThrows(HoodieNullSchemaTypeException.class,
+        () -> AvroInternalSchemaConverter.convert(schemaArray));
+    assertTrue(t.getMessage().contains("'t2.nullArray.element'"));
+
+    Schema schemaMap = create("t3",
+        new Schema.Field("nullMap", Schema.createMap(Schema.create(Schema.Type.NULL)), null, null));
+    t = assertThrows(HoodieNullSchemaTypeException.class,
+        () -> AvroInternalSchemaConverter.convert(schemaMap));
+    assertTrue(t.getMessage().contains("'t3.nullMap.value'"));
+
+
+    Schema schemaComplex = create("t4",
+        new Schema.Field("complexField", Schema.createMap(
+            create("nestedStruct",
+                new Schema.Field("nestedArray", Schema.createArray(Schema.createMap(Schema.create(Schema.Type.NULL))),
+                    null, null))), null, null));
+    t = assertThrows(HoodieNullSchemaTypeException.class,
+        () -> AvroInternalSchemaConverter.convert(schemaComplex));
+    assertTrue(t.getMessage().contains("'t4.nestedStruct.nestedArray.element.value'"));
+  }
+
   @Test
   public void testRefreshNewId() {
     Types.RecordType record = Types.RecordType.get(Types.Field.get(0, false, "id", Types.IntType.get()),
diff --git a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
index cde9341f5cdf..941587531a50 100644
--- a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
+++ b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
@@ -28,6 +28,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -39,11 +40,10 @@
  * Tests {@link HoodieMetadataPayload}.
  */
 public class TestHoodieMetadataPayload extends HoodieCommonTestHarness {
+  public static final String PARTITION_NAME = "2022/10/01";
 
   @Test
   public void testFileSystemMetadataPayloadMerging() {
-    String partitionName = "2022/10/01";
-
     Map<String, Long> firstCommitAddedFiles = createImmutableMap(
         Pair.of("file1.parquet", 1000L),
         Pair.of("file2.parquet", 2000L),
@@ -51,7 +51,7 @@ public void testFileSystemMetadataPayloadMerging() {
     );
 
     HoodieRecord<HoodieMetadataPayload> firstPartitionFilesRecord =
-        HoodieMetadataPayload.createPartitionFilesRecord(partitionName, firstCommitAddedFiles, Collections.emptyList());
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, firstCommitAddedFiles, Collections.emptyList());
 
     Map<String, Long> secondCommitAddedFiles = createImmutableMap(
         // NOTE: This is an append
@@ -63,13 +63,13 @@ public void testFileSystemMetadataPayloadMerging() {
     List<String> secondCommitDeletedFiles = Collections.singletonList("file1.parquet");
 
     HoodieRecord<HoodieMetadataPayload> secondPartitionFilesRecord =
-        HoodieMetadataPayload.createPartitionFilesRecord(partitionName, secondCommitAddedFiles, secondCommitDeletedFiles);
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, secondCommitAddedFiles, secondCommitDeletedFiles);
 
     HoodieMetadataPayload combinedPartitionFilesRecordPayload =
         secondPartitionFilesRecord.getData().preCombine(firstPartitionFilesRecord.getData());
 
     HoodieMetadataPayload expectedCombinedPartitionedFilesRecordPayload =
-        HoodieMetadataPayload.createPartitionFilesRecord(partitionName,
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
             createImmutableMap(
                 Pair.of("file2.parquet", 2000L),
                 Pair.of("file3.parquet", 3333L),
@@ -82,9 +82,76 @@ public void testFileSystemMetadataPayloadMerging() {
     assertEquals(expectedCombinedPartitionedFilesRecordPayload, combinedPartitionFilesRecordPayload);
   }
 
+  @Test
+  public void testFileSystemMetadataPayloadMergingWithDeletions() {
+    Map<String, Long> addedFileMap = createImmutableMap(
+        Pair.of("file1.parquet", 1000L),
+        Pair.of("file2.parquet", 2000L),
+        Pair.of("file3.parquet", 3000L),
+        Pair.of("file4.parquet", 4000L)
+    );
+    HoodieRecord<HoodieMetadataPayload> additionRecord =
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, addedFileMap, Collections.emptyList());
+
+    List<String> deletedFileList1 = new ArrayList<>();
+    deletedFileList1.add("file1.parquet");
+    deletedFileList1.add("file3.parquet");
+    HoodieRecord<HoodieMetadataPayload> deletionRecord1 =
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, Collections.emptyMap(), deletedFileList1);
+
+    List<String> deletedFileList2 = new ArrayList<>();
+    deletedFileList2.add("file1.parquet");
+    deletedFileList2.add("file4.parquet");
+    HoodieRecord<HoodieMetadataPayload> deletionRecord2 =
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, Collections.emptyMap(), deletedFileList2);
+
+    assertEquals(
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
+            createImmutableMap(
+                Pair.of("file2.parquet", 2000L),
+                Pair.of("file4.parquet", 4000L)
+            ),
+            Collections.emptyList()
+        ).getData(),
+        deletionRecord1.getData().preCombine(additionRecord.getData())
+    );
+
+    List<String> expectedDeleteFileList = new ArrayList<>();
+    expectedDeleteFileList.add("file1.parquet");
+    expectedDeleteFileList.add("file3.parquet");
+    expectedDeleteFileList.add("file4.parquet");
+    
+    assertEquals(
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
+            Collections.emptyMap(),
+            expectedDeleteFileList
+        ).getData(),
+        deletionRecord2.getData().preCombine(deletionRecord1.getData())
+    );
+
+    assertEquals(
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
+            createImmutableMap(
+                Pair.of("file2.parquet", 2000L)
+            ),
+            Collections.emptyList()
+        ).getData(),
+        deletionRecord2.getData().preCombine(deletionRecord1.getData()).preCombine(additionRecord.getData())
+    );
+
+    assertEquals(
+        HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
+            createImmutableMap(
+                Pair.of("file2.parquet", 2000L)
+            ),
+            Collections.singletonList("file1.parquet")
+        ).getData(),
+        deletionRecord2.getData().preCombine(deletionRecord1.getData().preCombine(additionRecord.getData()))
+    );
+  }
+
   @Test
   public void testColumnStatsPayloadMerging() throws IOException {
-    String partitionPath = "2022/10/01";
     String fileName = "file.parquet";
     String targetColName = "c1";
 
@@ -92,7 +159,7 @@ public void testColumnStatsPayloadMerging() throws IOException {
         HoodieColumnRangeMetadata.<Comparable>create(fileName, targetColName, 100, 1000, 5, 1000, 123456, 123456);
 
     HoodieRecord<HoodieMetadataPayload> columnStatsRecord =
-        HoodieMetadataPayload.createColumnStatsRecords(partitionPath, Collections.singletonList(c1Metadata), false)
+        HoodieMetadataPayload.createColumnStatsRecords(PARTITION_NAME, Collections.singletonList(c1Metadata), false)
             .findFirst().get();
 
     ////////////////////////////////////////////////////////////////////////
@@ -105,7 +172,7 @@ public void testColumnStatsPayloadMerging() throws IOException {
         HoodieColumnRangeMetadata.<Comparable>create(fileName, targetColName, 0, 500, 0, 100, 12345, 12345);
 
     HoodieRecord<HoodieMetadataPayload> updatedColumnStatsRecord =
-        HoodieMetadataPayload.createColumnStatsRecords(partitionPath, Collections.singletonList(c1AppendedBlockMetadata), false)
+        HoodieMetadataPayload.createColumnStatsRecords(PARTITION_NAME, Collections.singletonList(c1AppendedBlockMetadata), false)
             .findFirst().get();
 
     HoodieMetadataPayload combinedMetadataPayload =
@@ -115,7 +182,7 @@ public void testColumnStatsPayloadMerging() throws IOException {
         HoodieColumnRangeMetadata.<Comparable>create(fileName, targetColName, 0, 1000, 5, 1100, 135801, 135801);
 
     HoodieRecord<HoodieMetadataPayload> expectedColumnStatsRecord =
-        HoodieMetadataPayload.createColumnStatsRecords(partitionPath, Collections.singletonList(expectedColumnRangeMetadata), false)
+        HoodieMetadataPayload.createColumnStatsRecords(PARTITION_NAME, Collections.singletonList(expectedColumnRangeMetadata), false)
             .findFirst().get();
 
     // Assert combined payload
@@ -135,7 +202,7 @@ public void testColumnStatsPayloadMerging() throws IOException {
         HoodieColumnRangeMetadata.<Comparable>stub(fileName, targetColName);
 
     HoodieRecord<HoodieMetadataPayload> deletedColumnStatsRecord =
-        HoodieMetadataPayload.createColumnStatsRecords(partitionPath, Collections.singletonList(c1StubbedMetadata), true)
+        HoodieMetadataPayload.createColumnStatsRecords(PARTITION_NAME, Collections.singletonList(c1StubbedMetadata), true)
             .findFirst().get();
 
     // NOTE: In this case, deleted (or tombstone) record will be therefore deleting
diff --git a/hudi-examples/hudi-examples-flink/pom.xml b/hudi-examples/hudi-examples-flink/pom.xml
index c2697372e2a9..884d8f161fc9 100644
--- a/hudi-examples/hudi-examples-flink/pom.xml
+++ b/hudi-examples/hudi-examples-flink/pom.xml
@@ -69,9 +69,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index a15ac0efe07a..7565ad153002 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -290,7 +290,7 @@ private FlinkOptions() {
           + "   log file records(combines the two records with same key for base and log file records), then read the left log file records");
 
   @AdvancedConfig
-  public static final ConfigOption<Boolean> UTC_TIMEZONE = ConfigOptions
+  public static final ConfigOption<Boolean> READ_UTC_TIMEZONE = ConfigOptions
       .key("read.utc-timezone")
       .booleanType()
       .defaultValue(true)
@@ -316,7 +316,7 @@ private FlinkOptions() {
   public static final ConfigOption<Boolean> READ_STREAMING_SKIP_COMPACT = ConfigOptions
       .key("read.streaming.skip_compaction")
       .booleanType()
-      .defaultValue(false)// default read as batch
+      .defaultValue(true)
       .withDescription("Whether to skip compaction instants and avoid reading compacted base files for streaming read to improve read performance.\n"
           + "This option can be used to avoid reading duplicates when changelog mode is enabled, it is a solution to keep data integrity\n");
 
@@ -325,10 +325,18 @@ private FlinkOptions() {
   public static final ConfigOption<Boolean> READ_STREAMING_SKIP_CLUSTERING = ConfigOptions
       .key("read.streaming.skip_clustering")
       .booleanType()
-      .defaultValue(false)
+      .defaultValue(true)
       .withDescription("Whether to skip clustering instants to avoid reading base files of clustering operations for streaming read "
           + "to improve read performance.");
 
+  // this option is experimental
+  public static final ConfigOption<Boolean> READ_STREAMING_SKIP_INSERT_OVERWRITE = ConfigOptions
+      .key("read.streaming.skip_insertoverwrite")
+      .booleanType()
+      .defaultValue(false)
+      .withDescription("Whether to skip insert overwrite instants to avoid reading base files of insert overwrite operations for streaming read. "
+          + "In streaming scenarios, insert overwrite is usually used to repair data, here you can control the visibility of downstream streaming read.");
+
   public static final String START_COMMIT_EARLIEST = "earliest";
   public static final ConfigOption<String> READ_START_COMMIT = ConfigOptions
       .key("read.start-commit")
@@ -489,6 +497,15 @@ private FlinkOptions() {
   public static final String PARTITION_FORMAT_HOUR = "yyyyMMddHH";
   public static final String PARTITION_FORMAT_DAY = "yyyyMMdd";
   public static final String PARTITION_FORMAT_DASHED_DAY = "yyyy-MM-dd";
+
+  @AdvancedConfig
+  public static final ConfigOption<Boolean> WRITE_UTC_TIMEZONE = ConfigOptions
+        .key("write.utc-timezone")
+        .booleanType()
+        .defaultValue(true)
+        .withDescription("Use UTC timezone or local timezone to the conversion between epoch"
+            + " time and LocalDateTime. Default value is utc timezone for forward compatibility.");
+
   @AdvancedConfig
   public static final ConfigOption<String> PARTITION_FORMAT = ConfigOptions
       .key("write.partition.format")
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
index ae1a86f36cc4..f73c5d28c5b9 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
@@ -77,6 +77,14 @@ public static boolean isInsertOperation(Configuration conf) {
     return operationType == WriteOperationType.INSERT;
   }
 
+  /**
+   * Returns whether the table operation is 'upsert'.
+   */
+  public static boolean isUpsertOperation(Configuration conf) {
+    WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
+    return operationType == WriteOperationType.UPSERT;
+  }
+
   /**
    * Returns whether the table operation is 'bulk_insert'.
    */
@@ -142,10 +150,20 @@ public static boolean isPartitionedTable(Configuration conf) {
     return FilePathUtils.extractPartitionKeys(conf).length > 0;
   }
 
+  /**
+   * Returns whether the table index is bucket index.
+   */
   public static boolean isBucketIndexType(Configuration conf) {
     return conf.getString(FlinkOptions.INDEX_TYPE).equalsIgnoreCase(HoodieIndex.IndexType.BUCKET.name());
   }
 
+  /**
+   * Returns whether it is a MERGE_ON_READ table, and updates by bucket index.
+   */
+  public static boolean isMorWithBucketIndexUpsert(Configuration conf) {
+    return isMorTable(conf) && isUpsertOperation(conf) && isBucketIndexType(conf);
+  }
+
   public static HoodieIndex.BucketIndexEngineType getBucketEngineType(Configuration conf) {
     String bucketEngineType = conf.get(FlinkOptions.BUCKET_INDEX_ENGINE_TYPE);
     return HoodieIndex.BucketIndexEngineType.valueOf(bucketEngineType);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
index f98f1bde8c8a..26ca245f8bee 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
@@ -437,6 +437,18 @@ private void handleBootstrapEvent(WriteMetadataEvent event) {
           .filter(evt -> evt.getWriteStatuses().size() > 0)
           .findFirst().map(WriteMetadataEvent::getInstantTime)
           .orElse(WriteMetadataEvent.BOOTSTRAP_INSTANT);
+
+      // if currentInstant is pending && bootstrap event instant is empty
+      // reuse currentInstant, reject bootstrap
+      if (this.metaClient.reloadActiveTimeline().filterInflightsAndRequested().containsInstant(this.instant)
+              && instant.equals(WriteMetadataEvent.BOOTSTRAP_INSTANT)
+              && this.tableState.operationType == WriteOperationType.INSERT) {
+        LOG.warn("Reuse current pending Instant {} with {} operationType, "
+                + "ignoring empty bootstrap event.", this.instant, WriteOperationType.INSERT.value());
+        reset();
+        return;
+      }
+
       initInstant(instant);
     }
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
index e91535a24736..357bc07160d3 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
@@ -100,7 +100,7 @@ public void open() throws Exception {
 
     collector = new StreamRecordCollector<>(output);
 
-    // register the the metrics.
+    // register the metrics.
     getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
     getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
     getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
index bfc7d7d62ad4..0a13bea513d4 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.keygen.KeyGenerator;
 import org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory;
 import org.apache.hudi.sink.utils.PayloadCreation;
@@ -84,7 +85,7 @@ public RowDataToHoodieFunction(RowType rowType, Configuration config) {
   public void open(Configuration parameters) throws Exception {
     super.open(parameters);
     this.avroSchema = StreamerUtil.getSourceSchema(this.config);
-    this.converter = RowDataToAvroConverters.createConverter(this.rowType);
+    this.converter = RowDataToAvroConverters.createConverter(this.rowType, this.config.getBoolean(FlinkOptions.WRITE_UTC_TIMEZONE));
     this.keyGenerator =
         HoodieAvroKeyGeneratorFactory
             .createKeyGenerator(flinkConf2TypedProperties(this.config));
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
index c1cd5874d960..ddd7fbbb0a88 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
@@ -35,7 +35,7 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.OptionsResolver;
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
 import org.apache.hudi.source.prune.PartitionPruners;
 import org.apache.hudi.table.format.cdc.CdcInputSplit;
@@ -91,6 +91,8 @@ public class IncrementalInputSplits implements Serializable {
   private final boolean skipCompaction;
   // skip clustering
   private final boolean skipClustering;
+  // skip insert overwrite
+  private final boolean skipInsertOverwrite;
 
   private IncrementalInputSplits(
       Configuration conf,
@@ -99,7 +101,8 @@ private IncrementalInputSplits(
       long maxCompactionMemoryInBytes,
       @Nullable PartitionPruners.PartitionPruner partitionPruner,
       boolean skipCompaction,
-      boolean skipClustering) {
+      boolean skipClustering,
+      boolean skipInsertOverwrite) {
     this.conf = conf;
     this.path = path;
     this.rowType = rowType;
@@ -107,6 +110,7 @@ private IncrementalInputSplits(
     this.partitionPruner = partitionPruner;
     this.skipCompaction = skipCompaction;
     this.skipClustering = skipClustering;
+    this.skipInsertOverwrite = skipInsertOverwrite;
   }
 
   /**
@@ -135,6 +139,7 @@ public Result inputSplits(
         .rangeType(InstantRange.RangeType.CLOSED_CLOSED)
         .skipCompaction(skipCompaction)
         .skipClustering(skipClustering)
+        .skipInsertOverwrite(skipInsertOverwrite)
         .build();
 
     IncrementalQueryAnalyzer.QueryContext analyzingResult = analyzer.analyze();
@@ -241,6 +246,7 @@ public Result inputSplits(
         .rangeType(issuedOffset != null ? InstantRange.RangeType.OPEN_CLOSED : InstantRange.RangeType.CLOSED_CLOSED)
         .skipCompaction(skipCompaction)
         .skipClustering(skipClustering)
+        .skipInsertOverwrite(skipInsertOverwrite)
         .limit(OptionsResolver.getReadCommitsLimit(conf))
         .build();
 
@@ -412,7 +418,7 @@ private FileIndex getFileIndex() {
    * @return the set of read partitions
    */
   private Set<String> getReadPartitions(List<HoodieCommitMetadata> metadataList) {
-    Set<String> partitions = HoodieInputFormatUtils.getWritePartitionPaths(metadataList);
+    Set<String> partitions = HoodieTableMetadataUtil.getWritePartitionPaths(metadataList);
     // apply partition push down
     if (this.partitionPruner != null) {
       Set<String> selectedPartitions = this.partitionPruner.filter(partitions);
@@ -498,6 +504,8 @@ public static class Builder {
     private boolean skipCompaction = false;
     // skip clustering
     private boolean skipClustering = false;
+    // skip insert overwrite
+    private boolean skipInsertOverwrite = false;
 
     public Builder() {
     }
@@ -537,10 +545,15 @@ public Builder skipClustering(boolean skipClustering) {
       return this;
     }
 
+    public Builder skipInsertOverwrite(boolean skipInsertOverwrite) {
+      this.skipInsertOverwrite = skipInsertOverwrite;
+      return this;
+    }
+
     public IncrementalInputSplits build() {
       return new IncrementalInputSplits(
           Objects.requireNonNull(this.conf), Objects.requireNonNull(this.path), Objects.requireNonNull(this.rowType),
-          this.maxCompactionMemoryInBytes, this.partitionPruner, this.skipCompaction, this.skipClustering);
+          this.maxCompactionMemoryInBytes, this.partitionPruner, this.skipCompaction, this.skipClustering, this.skipInsertOverwrite);
     }
   }
 }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
index fa911cadb0e1..0e3b1f0ce58e 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
@@ -124,6 +124,7 @@ public StreamReadMonitoringFunction(
         .partitionPruner(partitionPruner)
         .skipCompaction(conf.getBoolean(FlinkOptions.READ_STREAMING_SKIP_COMPACT))
         .skipClustering(conf.getBoolean(FlinkOptions.READ_STREAMING_SKIP_CLUSTERING))
+        .skipInsertOverwrite(conf.getBoolean(FlinkOptions.READ_STREAMING_SKIP_INSERT_OVERWRITE))
         .build();
   }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadAppendPartitioner.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadAppendPartitioner.java
new file mode 100644
index 000000000000..67bd9f9e324f
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadAppendPartitioner.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.source.filedistribution.partitioner;
+
+import org.apache.flink.api.common.functions.Partitioner;
+
+public class StreamReadAppendPartitioner implements Partitioner<Integer> {
+
+  private final int parallNum;
+
+  public StreamReadAppendPartitioner(int parallNum) {
+    this.parallNum = parallNum;
+  }
+
+  @Override
+  public int partition(Integer splitNum, int maxParallelism) {
+    return splitNum % parallNum;
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadBucketIndexPartitioner.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadBucketIndexPartitioner.java
new file mode 100644
index 000000000000..4b5531b67ba9
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/partitioner/StreamReadBucketIndexPartitioner.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.source.filedistribution.partitioner;
+
+import org.apache.hudi.index.bucket.BucketIdentifier;
+
+import org.apache.flink.api.common.functions.Partitioner;
+
+public class StreamReadBucketIndexPartitioner implements Partitioner<String> {
+
+  private final int parallNum;
+
+  public StreamReadBucketIndexPartitioner(int parallNum) {
+    this.parallNum = parallNum;
+  }
+
+  @Override
+  public int partition(String fileName, int maxParallelism) {
+    return BucketIdentifier.bucketIdFromFileId(fileName) % parallNum;
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadAppendKeySelector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadAppendKeySelector.java
new file mode 100644
index 000000000000..de4a5f85f9c2
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadAppendKeySelector.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.source.filedistribution.selector;
+
+import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
+
+import org.apache.flink.api.java.functions.KeySelector;
+
+public class StreamReadAppendKeySelector implements KeySelector<MergeOnReadInputSplit, Integer> {
+
+  @Override
+  public Integer getKey(MergeOnReadInputSplit mergeOnReadInputSplit) throws Exception {
+    return mergeOnReadInputSplit.getSplitNumber();
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadBucketIndexKeySelector.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadBucketIndexKeySelector.java
new file mode 100644
index 000000000000..d1db65596598
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/filedistribution/selector/StreamReadBucketIndexKeySelector.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.source.filedistribution.selector;
+
+import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
+
+import org.apache.flink.api.java.functions.KeySelector;
+
+public class StreamReadBucketIndexKeySelector implements KeySelector<MergeOnReadInputSplit, String> {
+
+  @Override
+  public String getKey(MergeOnReadInputSplit mergeOnReadInputSplit) throws Exception {
+    return mergeOnReadInputSplit.getFileId();
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
index e8050d157618..25ba73f97d3d 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
@@ -455,7 +455,7 @@ public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkSt
       conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA_PATH, config.sourceAvroSchemaPath);
     }
     conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, config.sourceAvroSchema);
-    conf.setBoolean(FlinkOptions.UTC_TIMEZONE, config.utcTimezone);
+    conf.setBoolean(FlinkOptions.READ_UTC_TIMEZONE, config.utcTimezone);
     conf.setBoolean(FlinkOptions.URL_ENCODE_PARTITIONING, config.writePartitionUrlEncode);
     conf.setBoolean(FlinkOptions.HIVE_STYLE_PARTITIONING, config.hiveStylePartitioning);
     conf.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, config.writeTaskMaxSize);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
index 68642b39da89..65f0199ae809 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
@@ -28,7 +28,6 @@
 import org.apache.hudi.configuration.OptionsResolver;
 import org.apache.hudi.exception.HoodieValidationException;
 import org.apache.hudi.index.HoodieIndex;
-import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
 import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
 import org.apache.hudi.util.AvroSchemaConverter;
@@ -318,11 +317,7 @@ private static void setupHoodieKeyOptions(Configuration conf, CatalogTable table
       }
     }
     boolean complexHoodieKey = pks.length > 1 || partitions.length > 1;
-    if (complexHoodieKey && FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.KEYGEN_CLASS_NAME)) {
-      conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, ComplexAvroKeyGenerator.class.getName());
-      LOG.info("Table option [{}] is reset to {} because record key or partition path has two or more fields",
-          FlinkOptions.KEYGEN_CLASS_NAME.key(), ComplexAvroKeyGenerator.class.getName());
-    }
+    StreamerUtil.checkKeygenGenerator(complexHoodieKey, conf);
   }
 
   /**
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
index 47692c141cf7..b4cfdbb0c9fa 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
@@ -46,6 +46,10 @@
 import org.apache.hudi.source.IncrementalInputSplits;
 import org.apache.hudi.source.StreamReadMonitoringFunction;
 import org.apache.hudi.source.StreamReadOperator;
+import org.apache.hudi.source.filedistribution.partitioner.StreamReadAppendPartitioner;
+import org.apache.hudi.source.filedistribution.partitioner.StreamReadBucketIndexPartitioner;
+import org.apache.hudi.source.filedistribution.selector.StreamReadAppendKeySelector;
+import org.apache.hudi.source.filedistribution.selector.StreamReadBucketIndexKeySelector;
 import org.apache.hudi.source.prune.DataPruner;
 import org.apache.hudi.source.prune.PartitionPruners;
 import org.apache.hudi.source.prune.PrimaryKeyPruners;
@@ -204,24 +208,18 @@ public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv)
               conf, FilePathUtils.toFlinkPath(path), tableRowType, maxCompactionMemoryInBytes, partitionPruner);
           InputFormat<RowData, ?> inputFormat = getInputFormat(true);
           OneInputStreamOperatorFactory<MergeOnReadInputSplit, RowData> factory = StreamReadOperator.factory((MergeOnReadInputFormat) inputFormat);
-          DataStream<MergeOnReadInputSplit> monitorOperatorStream = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor"))
+          SingleOutputStreamOperator<MergeOnReadInputSplit> monitorOperatorStream = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor"))
               .uid(Pipelines.opUID("split_monitor", conf))
               .setParallelism(1)
               .setMaxParallelism(1);
-          SingleOutputStreamOperator<RowData> source;
-          if (OptionsResolver.isAppendMode(HoodieTableSource.this.conf)) {
-            source = monitorOperatorStream
-                .transform("split_reader", typeInfo, factory)
-                .uid(Pipelines.opUID("split_reader", conf))
-                .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
-          } else {
-            source = monitorOperatorStream
-                .keyBy(MergeOnReadInputSplit::getFileId)
-                .transform("split_reader", typeInfo, factory)
-                .uid(Pipelines.opUID("split_reader", conf))
-                .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
-          }
-          return new DataStreamSource<>(source);
+
+          DataStream<MergeOnReadInputSplit> sourceWithKey = addFileDistributionStrategy(monitorOperatorStream);
+
+          SingleOutputStreamOperator<RowData> streamReadSource = sourceWithKey
+              .transform("split_reader", typeInfo, factory)
+              .uid(Pipelines.opUID("split_reader", conf))
+              .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
+          return new DataStreamSource<>(streamReadSource);
         } else {
           InputFormatSourceFunction<RowData> func = new InputFormatSourceFunction<>(getInputFormat(), typeInfo);
           DataStreamSource<RowData> source = execEnv.addSource(func, asSummaryString(), typeInfo);
@@ -231,6 +229,20 @@ public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv)
     };
   }
 
+  /**
+   * Specify the file distribution strategy based on different upstream writing mechanisms,
+   *  to prevent hot spot issues during stream reading.
+   */
+  private DataStream<MergeOnReadInputSplit> addFileDistributionStrategy(SingleOutputStreamOperator<MergeOnReadInputSplit> source) {
+    if (OptionsResolver.isMorWithBucketIndexUpsert(conf)) {
+      return source.partitionCustom(new StreamReadBucketIndexPartitioner(conf.getInteger(FlinkOptions.READ_TASKS)), new StreamReadBucketIndexKeySelector());
+    } else if (OptionsResolver.isAppendMode(conf)) {
+      return source.partitionCustom(new StreamReadAppendPartitioner(conf.getInteger(FlinkOptions.READ_TASKS)), new StreamReadAppendKeySelector());
+    } else {
+      return source.keyBy(MergeOnReadInputSplit::getFileId);
+    }
+  }
+
   @Override
   public ChangelogMode getChangelogMode() {
     // when read as streaming and changelog mode is enabled, emit as FULL mode;
@@ -548,7 +560,7 @@ private MergeOnReadInputFormat mergeOnReadInputFormat(
         this.predicates,
         this.limit == NO_LIMIT_CONSTANT ? Long.MAX_VALUE : this.limit, // ParquetInputFormat always uses the limit value
         getParquetConf(this.conf, this.hadoopConf),
-        this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE),
+        this.conf.getBoolean(FlinkOptions.READ_UTC_TIMEZONE),
         this.internalSchemaManager
     );
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
index d25db7d82fac..f9088b4096c8 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
@@ -343,6 +343,10 @@ public void createTable(ObjectPath tablePath, CatalogBaseTable catalogTable, boo
       final String partitions = String.join(",", resolvedTable.getPartitionKeys());
       conf.setString(FlinkOptions.PARTITION_PATH_FIELD, partitions);
       options.put(TableOptionProperties.PARTITION_COLUMNS, partitions);
+
+      final String[] pks = conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(",");
+      boolean complexHoodieKey = pks.length > 1 || resolvedTable.getPartitionKeys().size() > 1;
+      StreamerUtil.checkKeygenGenerator(complexHoodieKey, conf);
     } else {
       conf.setString(FlinkOptions.KEYGEN_CLASS_NAME.key(), NonpartitionedAvroKeyGenerator.class.getName());
     }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java
index 0d3a478f59d0..ce0230e69394 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java
@@ -502,6 +502,9 @@ private void initTableIfNotExists(ObjectPath tablePath, CatalogTable catalogTabl
     if (catalogTable.isPartitioned() && !flinkConf.contains(FlinkOptions.PARTITION_PATH_FIELD)) {
       final String partitions = String.join(",", catalogTable.getPartitionKeys());
       flinkConf.setString(FlinkOptions.PARTITION_PATH_FIELD, partitions);
+      final String[] pks = flinkConf.getString(FlinkOptions.RECORD_KEY_FIELD).split(",");
+      boolean complexHoodieKey = pks.length > 1 || catalogTable.getPartitionKeys().size() > 1;
+      StreamerUtil.checkKeygenGenerator(complexHoodieKey, flinkConf);
     }
 
     if (!catalogTable.isPartitioned()) {
@@ -549,6 +552,7 @@ private Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table,
     hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));
 
     Map<String, String> properties = new HashMap<>(table.getOptions());
+    hiveConf.getAllProperties().forEach((k, v) -> properties.put("hadoop." + k, String.valueOf(v)));
 
     if (external) {
       hiveTable.setTableType(TableType.EXTERNAL_TABLE.toString());
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
index f13098fc7c7c..29bb0a06d8ce 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
@@ -332,7 +332,7 @@ private ClosableIterator<RowData> getBaseFileIterator(String path, int[] require
 
     return RecordIterators.getParquetRecordIterator(
         internalSchemaManager,
-        this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE),
+        this.conf.getBoolean(FlinkOptions.READ_UTC_TIMEZONE),
         true,
         HadoopConfigurations.getParquetConf(this.conf, hadoopConf),
         fieldNames.toArray(new String[0]),
@@ -735,7 +735,7 @@ public MergeIterator(
       this.emitDelete = emitDelete;
       this.operationPos = operationPos;
       this.avroProjection = avroProjection;
-      this.rowDataToAvroConverter = RowDataToAvroConverters.createConverter(tableRowType);
+      this.rowDataToAvroConverter = RowDataToAvroConverters.createConverter(tableRowType, flinkConf.getBoolean(FlinkOptions.WRITE_UTC_TIMEZONE));
       this.avroToRowDataConverter = AvroToRowDataConverters.createRowConverter(requiredRowType);
       this.projection = projection;
       this.instantRange = split.getInstantRange().orElse(null);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
index ff2903c0a733..23dbe71721a5 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
@@ -77,6 +77,10 @@ public interface RowDataToAvroConverter extends Serializable {
    * Flink Table & SQL internal data structures to corresponding Avro data structures.
    */
   public static RowDataToAvroConverter createConverter(LogicalType type) {
+    return createConverter(type, true);
+  }
+
+  public static RowDataToAvroConverter createConverter(LogicalType type, boolean utcTimezone) {
     final RowDataToAvroConverter converter;
     switch (type.getTypeRoot()) {
       case NULL:
@@ -156,8 +160,34 @@ public Object convert(Schema schema, Object object) {
             };
         break;
       case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+        int precision = DataTypeUtils.precision(type);
+        if (precision <= 3) {
+          converter =
+            new RowDataToAvroConverter() {
+              private static final long serialVersionUID = 1L;
+
+              @Override
+              public Object convert(Schema schema, Object object) {
+                return ((TimestampData) object).toInstant().toEpochMilli();
+              }
+          };
+        } else if (precision <= 6) {
+          converter =
+              new RowDataToAvroConverter() {
+                private static final long serialVersionUID = 1L;
+
+                @Override
+                public Object convert(Schema schema, Object object) {
+                  Instant instant = ((TimestampData) object).toInstant();
+                  return  Math.addExact(Math.multiplyExact(instant.getEpochSecond(), 1000_000), instant.getNano() / 1000);
+                }
+            };
+        } else {
+          throw new UnsupportedOperationException("Unsupported timestamp precision: " + precision);
+        }
+        break;
       case TIMESTAMP_WITHOUT_TIME_ZONE:
-        final int precision = DataTypeUtils.precision(type);
+        precision = DataTypeUtils.precision(type);
         if (precision <= 3) {
           converter =
               new RowDataToAvroConverter() {
@@ -165,7 +195,7 @@ public Object convert(Schema schema, Object object) {
 
                 @Override
                 public Object convert(Schema schema, Object object) {
-                  return ((TimestampData) object).toInstant().toEpochMilli();
+                  return utcTimezone ? ((TimestampData) object).toInstant().toEpochMilli() : ((TimestampData) object).toTimestamp().getTime();
                 }
               };
         } else if (precision <= 6) {
@@ -175,7 +205,7 @@ public Object convert(Schema schema, Object object) {
 
                 @Override
                 public Object convert(Schema schema, Object object) {
-                  Instant instant = ((TimestampData) object).toInstant();
+                  Instant instant = utcTimezone ? ((TimestampData) object).toInstant() : ((TimestampData) object).toTimestamp().toInstant();
                   return  Math.addExact(Math.multiplyExact(instant.getEpochSecond(), 1000_000), instant.getNano() / 1000);
                 }
               };
@@ -196,14 +226,14 @@ public Object convert(Schema schema, Object object) {
             };
         break;
       case ARRAY:
-        converter = createArrayConverter((ArrayType) type);
+        converter = createArrayConverter((ArrayType) type, utcTimezone);
         break;
       case ROW:
-        converter = createRowConverter((RowType) type);
+        converter = createRowConverter((RowType) type, utcTimezone);
         break;
       case MAP:
       case MULTISET:
-        converter = createMapConverter(type);
+        converter = createMapConverter(type, utcTimezone);
         break;
       case RAW:
       default:
@@ -241,10 +271,10 @@ public Object convert(Schema schema, Object object) {
     };
   }
 
-  private static RowDataToAvroConverter createRowConverter(RowType rowType) {
+  private static RowDataToAvroConverter createRowConverter(RowType rowType, boolean utcTimezone) {
     final RowDataToAvroConverter[] fieldConverters =
         rowType.getChildren().stream()
-            .map(RowDataToAvroConverters::createConverter)
+            .map(type -> createConverter(type, utcTimezone))
             .toArray(RowDataToAvroConverter[]::new);
     final LogicalType[] fieldTypes =
         rowType.getFields().stream()
@@ -276,10 +306,10 @@ public Object convert(Schema schema, Object object) {
     };
   }
 
-  private static RowDataToAvroConverter createArrayConverter(ArrayType arrayType) {
+  private static RowDataToAvroConverter createArrayConverter(ArrayType arrayType, boolean utcTimezone) {
     LogicalType elementType = arrayType.getElementType();
     final ArrayData.ElementGetter elementGetter = ArrayData.createElementGetter(elementType);
-    final RowDataToAvroConverter elementConverter = createConverter(arrayType.getElementType());
+    final RowDataToAvroConverter elementConverter = createConverter(arrayType.getElementType(), utcTimezone);
 
     return new RowDataToAvroConverter() {
       private static final long serialVersionUID = 1L;
@@ -299,10 +329,10 @@ public Object convert(Schema schema, Object object) {
     };
   }
 
-  private static RowDataToAvroConverter createMapConverter(LogicalType type) {
+  private static RowDataToAvroConverter createMapConverter(LogicalType type, boolean utcTimezone) {
     LogicalType valueType = AvroSchemaConverter.extractValueTypeToAvroMap(type);
     final ArrayData.ElementGetter valueGetter = ArrayData.createElementGetter(valueType);
-    final RowDataToAvroConverter valueConverter = createConverter(valueType);
+    final RowDataToAvroConverter valueConverter = createConverter(valueType, utcTimezone);
 
     return new RowDataToAvroConverter() {
       private static final long serialVersionUID = 1L;
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
index 064e59cc7512..c4587cc2c0b8 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
@@ -48,6 +48,7 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieValidationException;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
+import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
 import org.apache.hudi.schema.FilebasedSchemaProvider;
 import org.apache.hudi.sink.transform.ChainedTransformer;
@@ -69,7 +70,6 @@
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.StringReader;
-import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Date;
@@ -265,7 +265,7 @@ public static HoodieTableMetaClient initTableIfNotExists(
           .initTable(hadoopConf, basePath);
       LOG.info("Table initialized under base path {}", basePath);
     } else {
-      LOG.info("Table [{}/{}] already exists, no need to initialize the table",
+      LOG.info("Table [path={}, name={}] already exists, no need to initialize the table",
           basePath, conf.getString(FlinkOptions.TABLE_NAME));
     }
 
@@ -380,34 +380,30 @@ public static Option<HoodieTableConfig> getTableConfig(String basePath, org.apac
    * Returns the median instant time between the given two instant time.
    */
   public static Option<String> medianInstantTime(String highVal, String lowVal) {
-    try {
-      long high = HoodieActiveTimeline.parseDateFromInstantTime(highVal).getTime();
-      long low = HoodieActiveTimeline.parseDateFromInstantTime(lowVal).getTime();
-      ValidationUtils.checkArgument(high > low,
-          "Instant [" + highVal + "] should have newer timestamp than instant [" + lowVal + "]");
-      long median = low + (high - low) / 2;
-      final String instantTime = HoodieActiveTimeline.formatDate(new Date(median));
-      if (HoodieTimeline.compareTimestamps(lowVal, HoodieTimeline.GREATER_THAN_OR_EQUALS, instantTime)
-          || HoodieTimeline.compareTimestamps(highVal, HoodieTimeline.LESSER_THAN_OR_EQUALS, instantTime)) {
-        return Option.empty();
-      }
-      return Option.of(instantTime);
-    } catch (ParseException e) {
-      throw new HoodieException("Get median instant time with interval [" + lowVal + ", " + highVal + "] error", e);
+    long high = HoodieActiveTimeline.parseDateFromInstantTimeSafely(highVal)
+            .orElseThrow(() -> new HoodieException("Get instant time diff with interval [" + highVal + "] error")).getTime();
+    long low = HoodieActiveTimeline.parseDateFromInstantTimeSafely(lowVal)
+            .orElseThrow(() -> new HoodieException("Get instant time diff with interval [" + lowVal + "] error")).getTime();
+    ValidationUtils.checkArgument(high > low,
+            "Instant [" + highVal + "] should have newer timestamp than instant [" + lowVal + "]");
+    long median = low + (high - low) / 2;
+    final String instantTime = HoodieActiveTimeline.formatDate(new Date(median));
+    if (HoodieTimeline.compareTimestamps(lowVal, HoodieTimeline.GREATER_THAN_OR_EQUALS, instantTime)
+            || HoodieTimeline.compareTimestamps(highVal, HoodieTimeline.LESSER_THAN_OR_EQUALS, instantTime)) {
+      return Option.empty();
     }
+    return Option.of(instantTime);
   }
 
   /**
    * Returns the time interval in seconds between the given instant time.
    */
   public static long instantTimeDiffSeconds(String newInstantTime, String oldInstantTime) {
-    try {
-      long newTimestamp = HoodieActiveTimeline.parseDateFromInstantTime(newInstantTime).getTime();
-      long oldTimestamp = HoodieActiveTimeline.parseDateFromInstantTime(oldInstantTime).getTime();
-      return (newTimestamp - oldTimestamp) / 1000;
-    } catch (ParseException e) {
-      throw new HoodieException("Get instant time diff with interval [" + oldInstantTime + ", " + newInstantTime + "] error", e);
-    }
+    long newTimestamp = HoodieActiveTimeline.parseDateFromInstantTimeSafely(newInstantTime)
+            .orElseThrow(() -> new HoodieException("Get instant time diff with interval [" + oldInstantTime + ", " + newInstantTime + "] error")).getTime();
+    long oldTimestamp = HoodieActiveTimeline.parseDateFromInstantTimeSafely(oldInstantTime)
+            .orElseThrow(() -> new HoodieException("Get instant time diff with interval [" + oldInstantTime + ", " + newInstantTime + "] error")).getTime();
+    return (newTimestamp - oldTimestamp) / 1000;
   }
 
   public static Option<Transformer> createTransformer(List<String> classNames) throws IOException {
@@ -518,7 +514,7 @@ public static boolean fileExists(FileSystem fs, Path path) {
   public static boolean isWriteCommit(HoodieTableType tableType, HoodieInstant instant, HoodieTimeline timeline) {
     return tableType == HoodieTableType.MERGE_ON_READ
         ? !instant.getAction().equals(HoodieTimeline.COMMIT_ACTION) // not a compaction
-        : !ClusteringUtils.isClusteringInstant(instant, timeline);   // not a clustering
+        : !ClusteringUtils.isCompletedClusteringInstant(instant, timeline);   // not a clustering
   }
 
   /**
@@ -539,4 +535,15 @@ public static void checkPreCombineKey(Configuration conf, List<String> fields) {
       }
     }
   }
+
+  /**
+   * Validate keygen generator.
+   */
+  public static void checkKeygenGenerator(boolean isComplexHoodieKey, Configuration conf) {
+    if (isComplexHoodieKey && FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.KEYGEN_CLASS_NAME)) {
+      conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, ComplexAvroKeyGenerator.class.getName());
+      LOG.info("Table option [{}] is reset to {} because record key or partition path has two or more fields",
+          FlinkOptions.KEYGEN_CLASS_NAME.key(), ComplexAvroKeyGenerator.class.getName());
+    }
+  }
 }
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java
index fea986885f8c..47c613ec7847 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java
@@ -24,7 +24,7 @@
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.HadoopConfigurations;
 import org.apache.hudi.configuration.OptionsInference;
-import org.apache.hudi.exception.SchemaCompatibilityException;
+import org.apache.hudi.exception.MissingSchemaFieldException;
 import org.apache.hudi.sink.transform.ChainedTransformer;
 import org.apache.hudi.sink.transform.Transformer;
 import org.apache.hudi.sink.utils.Pipelines;
@@ -557,13 +557,13 @@ public void testColumnDroppingIsNotAllowed() throws Exception {
     } catch (JobExecutionException e) {
       Throwable actualException = e;
       while (actualException != null) {
-        if (actualException.getClass() == SchemaCompatibilityException.class) {
+        if (actualException.getClass() == MissingSchemaFieldException.class) {
           // test is passed
           return;
         }
         actualException = actualException.getCause();
       }
     }
-    throw new AssertionError(String.format("Excepted exception %s is not found", SchemaCompatibilityException.class));
+    throw new AssertionError(String.format("Excepted exception %s is not found", MissingSchemaFieldException.class));
   }
 }
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
index 0f8651b4b05e..c3e77a379390 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
@@ -148,6 +148,8 @@ public void testSubtaskFails() throws Exception {
         .end();
   }
 
+  // Only when Job level fails with INSERT operationType can we roll back the unfinished instant.
+  // Task level failed retry, we should reuse the unfinished Instant with INSERT operationType
   @Test
   public void testPartialFailover() throws Exception {
     conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, 1L);
@@ -163,7 +165,7 @@ public void testPartialFailover() throws Exception {
         .assertNextEvent()
         // if the write task can not fetch any pending instant when starts up(the coordinator restarts),
         // it will send an event to the coordinator
-        .coordinatorFails()
+        .restartCoordinator()
         .subTaskFails(0, 2)
         // the subtask can not fetch the instant to write until a new instant is initialized
         .checkpointThrows(4, "Timeout(1000ms) while waiting for instant initialize")
@@ -172,6 +174,13 @@ public void testPartialFailover() throws Exception {
         // the last checkpoint instant was rolled back by subTaskFails(0, 2)
         // with EAGER cleaning strategy
         .assertNoEvent()
+        .checkpoint(4)
+        .assertNextEvent()
+        .subTaskFails(0, 4)
+        // the last checkpoint instant can not be rolled back by subTaskFails(0, 4) with INSERT write operationType
+        // because last data has been snapshot by checkpoint complete but instant has not been committed
+        // so we need re-commit it
+        .assertEmptyEvent()
         .end();
   }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/BulkInsertFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/BulkInsertFunctionWrapper.java
index 92f8f6decda0..ca6a317cfad6 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/BulkInsertFunctionWrapper.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/BulkInsertFunctionWrapper.java
@@ -72,7 +72,7 @@ public class BulkInsertFunctionWrapper<I> implements TestFunctionWrapper<I> {
   private final MockStreamingRuntimeContext runtimeContext;
   private final MockOperatorEventGateway gateway;
   private final MockOperatorCoordinatorContext coordinatorContext;
-  private final StreamWriteOperatorCoordinator coordinator;
+  private StreamWriteOperatorCoordinator coordinator;
   private final boolean needSortInput;
 
   private BulkInsertWriteFunction<RowData> writeFunction;
@@ -160,6 +160,13 @@ public void coordinatorFails() throws Exception {
     this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
   }
 
+  public void restartCoordinator() throws Exception {
+    this.coordinator.close();
+    this.coordinator = new StreamWriteOperatorCoordinator(conf, this.coordinatorContext);
+    this.coordinator.start();
+    this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
+  }
+
   public void checkpointFails(long checkpointId) {
     coordinator.notifyCheckpointAborted(checkpointId);
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java
index cb144e92ba06..15634cc6e72b 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java
@@ -59,7 +59,7 @@ public class InsertFunctionWrapper<I> implements TestFunctionWrapper<I> {
   private final MockStreamingRuntimeContext runtimeContext;
   private final MockOperatorEventGateway gateway;
   private final MockOperatorCoordinatorContext coordinatorContext;
-  private final StreamWriteOperatorCoordinator coordinator;
+  private StreamWriteOperatorCoordinator coordinator;
   private final MockStateInitializationContext stateInitializationContext;
 
   private final boolean asyncClustering;
@@ -152,6 +152,13 @@ public void coordinatorFails() throws Exception {
     this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
   }
 
+  public void restartCoordinator() throws Exception {
+    this.coordinator.close();
+    this.coordinator = new StreamWriteOperatorCoordinator(conf, this.coordinatorContext);
+    this.coordinator.start();
+    this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
+  }
+
   public void checkpointFails(long checkpointId) {
     coordinator.notifyCheckpointAborted(checkpointId);
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java
index cf801bb0d7d0..c65e42f1521a 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/StreamWriteFunctionWrapper.java
@@ -71,7 +71,7 @@ public class StreamWriteFunctionWrapper<I> implements TestFunctionWrapper<I> {
   private final MockStreamingRuntimeContext runtimeContext;
   private final MockOperatorEventGateway gateway;
   private final MockOperatorCoordinatorContext coordinatorContext;
-  private final StreamWriteOperatorCoordinator coordinator;
+  private StreamWriteOperatorCoordinator coordinator;
   private final MockStateInitializationContext stateInitializationContext;
 
   /**
@@ -227,6 +227,13 @@ public void coordinatorFails() throws Exception {
     this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
   }
 
+  public void restartCoordinator() throws Exception {
+    this.coordinator.close();
+    this.coordinator = new StreamWriteOperatorCoordinator(conf, this.coordinatorContext);
+    this.coordinator.start();
+    this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
+  }
+
   public void checkpointFails(long checkpointId) {
     coordinator.notifyCheckpointAborted(checkpointId);
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java
index 25593d8d2fd2..faee168bf251 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestFunctionWrapper.java
@@ -82,6 +82,14 @@ default void coordinatorFails() throws Exception {
     throw new UnsupportedOperationException();
   }
 
+  /**
+   * Triggers Job level fail, so the coordinator need re-create a new instance.
+   * @throws Exception
+   */
+  default void restartCoordinator() throws Exception {
+    throw new UnsupportedOperationException();
+  }
+
   /**
    * Returns the operator coordinator.
    */
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java
index f40bc9c365aa..22ff06c606b6 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java
@@ -517,8 +517,8 @@ public TestHarness checkLastPendingInstantCompleted() {
      * Used to simulate the use case that the coordinator has not finished a new instant initialization,
      * while the write task fails intermittently.
      */
-    public TestHarness coordinatorFails() throws Exception {
-      this.pipeline.coordinatorFails();
+    public TestHarness restartCoordinator() throws Exception {
+      this.pipeline.restartCoordinator();
       return this;
     }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java
index 64211608e058..c15e4c628b64 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java
@@ -67,6 +67,7 @@
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN;
 import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertIterableEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -142,18 +143,20 @@ void testFilterInstantsWithRange() throws IOException {
   }
 
   @Test
-  void testFilterInstantsByCondition() throws IOException {
-    Configuration conf = TestConfigurations.getDefaultConf(basePath);
+  void testFilterInstantsByConditionForMOR() throws IOException {
     metaClient = HoodieTestUtils.init(basePath, HoodieTableType.MERGE_ON_READ);
+    HoodieActiveTimeline timelineMOR = metaClient.getActiveTimeline();
 
-    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
+    // commit1: delta commit
     HoodieInstant commit1 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "1");
+    timelineMOR.createCompleteInstant(commit1);
+    // commit2: delta commit
     HoodieInstant commit2 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "2");
+    // commit3: clustering
+    timelineMOR.createCompleteInstant(commit2);
     HoodieInstant commit3 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "3");
-    timeline.createCompleteInstant(commit1);
-    timeline.createCompleteInstant(commit2);
-    timeline.createNewInstant(commit3);
-    commit3 = timeline.transitionReplaceRequestedToInflight(commit3, Option.empty());
+    timelineMOR.createNewInstant(commit3);
+    commit3 = timelineMOR.transitionReplaceRequestedToInflight(commit3, Option.empty());
     HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(
             new ArrayList<>(),
             new HashMap<>(),
@@ -161,15 +164,139 @@ void testFilterInstantsByCondition() throws IOException {
             WriteOperationType.CLUSTER,
             "",
             HoodieTimeline.REPLACE_COMMIT_ACTION);
-    timeline.transitionReplaceInflightToComplete(true,
+    timelineMOR.transitionReplaceInflightToComplete(true,
         HoodieTimeline.getReplaceCommitInflightInstant(commit3.getTimestamp()),
         serializeCommitMetadata(commitMetadata));
-    timeline = timeline.reload();
+    // commit4: insert overwrite
+    HoodieInstant commit4 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "4");
+    timelineMOR.createNewInstant(commit4);
+    commit4 = timelineMOR.transitionReplaceRequestedToInflight(commit4, Option.empty());
+    commitMetadata = CommitUtils.buildMetadata(
+            new ArrayList<>(),
+            new HashMap<>(),
+            Option.empty(),
+            WriteOperationType.INSERT_OVERWRITE,
+            "",
+            HoodieTimeline.REPLACE_COMMIT_ACTION);
+    timelineMOR.transitionReplaceInflightToComplete(true,
+            HoodieTimeline.getReplaceCommitInflightInstant(commit4.getTimestamp()),
+            serializeCommitMetadata(commitMetadata));
+    // commit5: insert overwrite table
+    HoodieInstant commit5 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "5");
+    timelineMOR.createNewInstant(commit5);
+    commit5 = timelineMOR.transitionReplaceRequestedToInflight(commit5, Option.empty());
+    commitMetadata = CommitUtils.buildMetadata(
+            new ArrayList<>(),
+            new HashMap<>(),
+            Option.empty(),
+            WriteOperationType.INSERT_OVERWRITE_TABLE,
+            "",
+            HoodieTimeline.REPLACE_COMMIT_ACTION);
+    timelineMOR.transitionReplaceInflightToComplete(true,
+            HoodieTimeline.getReplaceCommitInflightInstant(commit5.getTimestamp()),
+            serializeCommitMetadata(commitMetadata));
+    // commit6:  compaction
+    HoodieInstant commit6 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "6");
+    timelineMOR.createNewInstant(commit6);
+    commit6 = timelineMOR.transitionCompactionRequestedToInflight(commit6);
+    commit6 = timelineMOR.transitionCompactionInflightToComplete(false, commit6, Option.empty());
+    timelineMOR.createCompleteInstant(commit6);
+    timelineMOR = timelineMOR.reload();
+
+    // will not filter commits by default
+    HoodieTimeline resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineMOR, false, false, false);
+    assertEquals(6, resTimeline.getInstants().size());
+
+    // filter cluster commits
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineMOR, false, true, false);
+    assertEquals(5, resTimeline.getInstants().size());
+    assertFalse(resTimeline.containsInstant(commit3));
+
+    // filter compaction commits for mor table
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineMOR, true, false, false);
+    assertFalse(resTimeline.containsInstant(commit6));
+
+    // filter insert overwriter commits
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineMOR, false, false, true);
+    assertEquals(4, resTimeline.getInstants().size());
+    assertFalse(resTimeline.containsInstant(commit4));
+    assertFalse(resTimeline.containsInstant(commit5));
+  }
+
+  @Test
+  void testFilterInstantsByConditionForCOW() throws IOException {
+    metaClient = HoodieTestUtils.init(basePath, HoodieTableType.COPY_ON_WRITE);
+    HoodieActiveTimeline timelineCOW = metaClient.getActiveTimeline();
+
+    // commit1: commit
+    HoodieInstant commit1 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "1");
+    timelineCOW.createCompleteInstant(commit1);
+    // commit2: commit
+    HoodieInstant commit2 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "2");
+    // commit3: clustering
+    timelineCOW.createCompleteInstant(commit2);
+    HoodieInstant commit3 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "3");
+    timelineCOW.createNewInstant(commit3);
+    commit3 = timelineCOW.transitionReplaceRequestedToInflight(commit3, Option.empty());
+    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(
+            new ArrayList<>(),
+            new HashMap<>(),
+            Option.empty(),
+            WriteOperationType.CLUSTER,
+            "",
+            HoodieTimeline.REPLACE_COMMIT_ACTION);
+    timelineCOW.transitionReplaceInflightToComplete(true,
+            HoodieTimeline.getReplaceCommitInflightInstant(commit3.getTimestamp()),
+            serializeCommitMetadata(commitMetadata));
+    // commit4: insert overwrite
+    HoodieInstant commit4 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "4");
+    timelineCOW.createNewInstant(commit4);
+    commit4 = timelineCOW.transitionReplaceRequestedToInflight(commit4, Option.empty());
+    commitMetadata = CommitUtils.buildMetadata(
+            new ArrayList<>(),
+            new HashMap<>(),
+            Option.empty(),
+            WriteOperationType.INSERT_OVERWRITE,
+            "",
+            HoodieTimeline.REPLACE_COMMIT_ACTION);
+    timelineCOW.transitionReplaceInflightToComplete(true,
+            HoodieTimeline.getReplaceCommitInflightInstant(commit4.getTimestamp()),
+            serializeCommitMetadata(commitMetadata));
+    // commit5: insert overwrite table
+    HoodieInstant commit5 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "5");
+    timelineCOW.createNewInstant(commit5);
+    commit5 = timelineCOW.transitionReplaceRequestedToInflight(commit5, Option.empty());
+    commitMetadata = CommitUtils.buildMetadata(
+            new ArrayList<>(),
+            new HashMap<>(),
+            Option.empty(),
+            WriteOperationType.INSERT_OVERWRITE_TABLE,
+            "",
+            HoodieTimeline.REPLACE_COMMIT_ACTION);
+    timelineCOW.transitionReplaceInflightToComplete(true,
+            HoodieTimeline.getReplaceCommitInflightInstant(commit5.getTimestamp()),
+            serializeCommitMetadata(commitMetadata));
+
+    timelineCOW = timelineCOW.reload();
+
+    // will not filter commits by default
+    HoodieTimeline resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineCOW, false, false, false);
+    assertEquals(5, resTimeline.getInstants().size());
+
+    // filter cluster commits
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineCOW, false, true, false);
+    assertEquals(4, resTimeline.getInstants().size());
+    assertFalse(resTimeline.containsInstant(commit3));
+
+    // cow table skip-compact does not take effect (because if it take effect will affect normal commits)
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineCOW, true, false, false);
+    assertEquals(5, resTimeline.getInstants().size());
 
-    conf.set(FlinkOptions.READ_END_COMMIT, "3");
-    HoodieTimeline resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timeline, false, false);
-    // will not filter cluster commit by default
+    // filter insert overwriter commits
+    resTimeline = IncrementalQueryAnalyzer.filterInstantsAsPerUserConfigs(metaClient, timelineCOW, false, false, true);
     assertEquals(3, resTimeline.getInstants().size());
+    assertFalse(resTimeline.containsInstant(commit4));
+    assertFalse(resTimeline.containsInstant(commit5));
   }
 
   @Test
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
index 22a2511ba073..ac38d92a5770 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
@@ -60,7 +60,10 @@
 import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.File;
+import java.time.Instant;
+import java.time.LocalDateTime;
 import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
@@ -131,6 +134,7 @@ void testStreamWriteAndReadFromSpecifiedCommit(HoodieTableType tableType) throws
     hoodieTableDDL = sql("t1")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .option(FlinkOptions.READ_START_COMMIT, firstCommit)
         .end();
@@ -163,6 +167,7 @@ void testStreamReadFromSpecifiedCommitWithChangelog(HoodieCDCSupplementalLogging
     String hoodieTableDDL = sql("t1")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.CDC_ENABLED, true)
         .option(FlinkOptions.SUPPLEMENTAL_LOGGING_MODE, mode.name())
         .end();
@@ -196,6 +201,7 @@ void testStreamWriteAndRead(HoodieTableType tableType) throws Exception {
     String hoodieTableDDL = sql("t1")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .end();
     streamTableEnv.executeSql(hoodieTableDDL);
@@ -239,6 +245,7 @@ void testStreamReadAppendData(HoodieTableType tableType) throws Exception {
     String createHoodieTable2 = sql("t2")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .option(FlinkOptions.READ_START_COMMIT, specifiedCommit)
         .end();
@@ -332,7 +339,6 @@ void testStreamWriteReadSkippingCompaction() throws Exception {
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
         .option(FlinkOptions.READ_AS_STREAMING, true)
-        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, true)
         .option(FlinkOptions.COMPACTION_DELTA_COMMITS, 1)
         .option(FlinkOptions.COMPACTION_TASKS, 1)
         .end();
@@ -359,7 +365,6 @@ void testAppendWriteReadSkippingClustering() throws Exception {
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.OPERATION, "insert")
         .option(FlinkOptions.READ_AS_STREAMING, true)
-        .option(FlinkOptions.READ_STREAMING_SKIP_CLUSTERING, true)
         .option(FlinkOptions.CLUSTERING_SCHEDULE_ENABLED,true)
         .option(FlinkOptions.CLUSTERING_ASYNC_ENABLED, true)
         .option(FlinkOptions.CLUSTERING_DELTA_COMMITS,1)
@@ -490,6 +495,7 @@ void testStreamReadFilterByPartition(HoodieTableType tableType, boolean hiveStyl
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .option(FlinkOptions.READ_AS_STREAMING, true)
         .option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, 2)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
         .end();
     streamTableEnv.executeSql(hoodieTableDDL);
@@ -675,7 +681,8 @@ void testWriteAndReadParMiddle(ExecMode execMode) throws Exception {
         + "with (\n"
         + "  'connector' = 'hudi',\n"
         + "  'path' = '" + tempFile.getAbsolutePath() + "',\n"
-        + "  'read.streaming.enabled' = '" + streaming + "'\n"
+        + "  'read.streaming.enabled' = '" + streaming + "',\n"
+        + "  'read.streaming.skip_compaction' = 'false'\n"
         + ")";
     streamTableEnv.executeSql(hoodieTableDDL);
     String insertInto = "insert into t1 values\n"
@@ -721,6 +728,7 @@ void testWriteAndReadWithTimestampMicros(ExecMode execMode) throws Exception {
         .noPartition()
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, streaming)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .end();
     streamTableEnv.executeSql(hoodieTableDDL);
     String insertInto = "insert into t1 values\n"
@@ -824,6 +832,7 @@ void testStreamWriteAndReadWithMiniBatches(HoodieTableType tableType) throws Exc
     String hoodieTableDDL = sql("t1")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .option(FlinkOptions.READ_START_COMMIT, "earliest")
         .option(FlinkOptions.WRITE_BATCH_SIZE, 0.00001)
@@ -1076,6 +1085,7 @@ void testWriteAndReadDebeziumJson(ExecMode execMode) throws Exception {
         .pkField("id")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.READ_AS_STREAMING, execMode == ExecMode.STREAM)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.PRE_COMBINE, true)
         .noPartition()
         .end();
@@ -1825,6 +1835,56 @@ void testWriteReadWithLocalTimestamp(HoodieTableType tableType) {
     assertRowsEquals(result, expected);
   }
 
+  @ParameterizedTest
+  @EnumSource(value = HoodieTableType.class)
+  void testWriteReadWithTimestampWithoutTZ(HoodieTableType tableType) {
+    TableEnvironment tableEnv = batchTableEnv;
+    tableEnv.getConfig().setLocalTimeZone(ZoneId.of("America/Los_Angeles"));
+    String createTable = sql("t1")
+        .field("f0 int")
+        .field("f1 varchar(10)")
+        .field("f2 TIMESTAMP(3)")
+        .field("f3 TIMESTAMP(6)")
+        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
+        .option(FlinkOptions.PRECOMBINE_FIELD, "f1")
+        .option(FlinkOptions.TABLE_TYPE, tableType)
+        .option(FlinkOptions.WRITE_UTC_TIMEZONE, false)
+        //FlinkOptions.READ_UTC_TIMEZONE doesn't affect in MergeOnReadInputFormat since the option isn't supported in AvroToRowDataConverters
+        //.option(FlinkOptions.READ_UTC_TIMEZONE, false)
+        .pkField("f0")
+        .noPartition()
+        .end();
+    tableEnv.executeSql(createTable);
+
+    long epochMillis = 0L;
+    DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+    String insertInto = "insert into t1 values\n"
+        + "(1"
+        + ", 'abc'"
+        + ", TIMESTAMP '" + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 1000), ZoneId.systemDefault())) + "'"
+        + ", TIMESTAMP '" + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 2000), ZoneId.systemDefault())) + "'),\n"
+        + "(2"
+        + ", 'def'"
+        + ", TIMESTAMP '" + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 3000), ZoneId.systemDefault())) + "'"
+        + ", TIMESTAMP '" + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 4000), ZoneId.systemDefault())) + "')";
+    execInsertSql(tableEnv, insertInto);
+
+    List<Row> result = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from t1").execute().collect());
+    formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss");
+    final String expected = "["
+        + "+I[1"
+        + ", abc"
+        + ", " + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 1000), ZoneId.of("UTC")))
+        + ", " + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 2000), ZoneId.of("UTC"))) + "], "
+        + "+I[2"
+        + ", def"
+        + ", " + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 3000), ZoneId.of("UTC")))
+        + ", " + formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis + 4000), ZoneId.of("UTC"))) + "]]";
+
+    assertRowsEquals(result, expected);
+  }
+
   @ParameterizedTest
   @MethodSource("tableTypeQueryTypeNumInsertAndCompactionDeltaCommitsParams")
   void testReadMetaFields(HoodieTableType tableType, String queryType, int numInsertBatches, int compactionDeltaCommits) throws Exception {
@@ -1969,6 +2029,7 @@ void testDynamicPartitionPrune(HoodieTableType tableType, boolean hiveStyleParti
         .option(FlinkOptions.TABLE_TYPE, tableType)
         .option(FlinkOptions.READ_AS_STREAMING, true)
         .option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, 2)
+        .option(FlinkOptions.READ_STREAMING_SKIP_COMPACT, false)
         .option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
         .end();
     streamTableEnv.executeSql(hoodieTableDDL);
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java
index 0417285815a9..46f51df741f1 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java
@@ -90,7 +90,8 @@ public void testCopyOnWriteInputFormat() throws Exception {
   public void testMergeOnReadInputFormatBaseFileOnlyIterator() throws Exception {
     TableOptions tableOptions = defaultTableOptions(tempFile.getAbsolutePath())
         .withOption(FlinkOptions.READ_AS_STREAMING.key(), true)
-        .withOption(FlinkOptions.READ_START_COMMIT.key(), FlinkOptions.START_COMMIT_EARLIEST);
+        .withOption(FlinkOptions.READ_START_COMMIT.key(), FlinkOptions.START_COMMIT_EARLIEST)
+        .withOption(FlinkOptions.READ_STREAMING_SKIP_COMPACT.key(), false);
     testSchemaEvolution(tableOptions);
   }
 
@@ -98,7 +99,8 @@ public void testMergeOnReadInputFormatBaseFileOnlyIterator() throws Exception {
   public void testMergeOnReadInputFormatBaseFileOnlyFilteringIterator() throws Exception {
     TableOptions tableOptions = defaultTableOptions(tempFile.getAbsolutePath())
         .withOption(FlinkOptions.READ_AS_STREAMING.key(), true)
-        .withOption(FlinkOptions.READ_START_COMMIT.key(), 1);
+        .withOption(FlinkOptions.READ_START_COMMIT.key(), 1)
+        .withOption(FlinkOptions.READ_STREAMING_SKIP_COMPACT.key(), false);
     testSchemaEvolution(tableOptions);
   }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java
index 0207022903b4..d883b72b075d 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalog.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.HadoopConfigurations;
 import org.apache.hudi.exception.HoodieValidationException;
+import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
 import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
 import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
@@ -35,6 +36,7 @@
 import org.apache.hudi.utils.TestConfigurations;
 import org.apache.hudi.utils.TestData;
 
+import org.apache.flink.calcite.shaded.com.google.common.collect.Lists;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.fs.Path;
 import org.apache.flink.table.api.DataTypes;
@@ -108,6 +110,13 @@ public class TestHoodieCatalog {
           Collections.emptyList(),
           CONSTRAINTS);
 
+  private static final UniqueConstraint MULTI_KEY_CONSTRAINTS = UniqueConstraint.primaryKey("uuid", Arrays.asList("uuid", "name"));
+  private static final ResolvedSchema CREATE_MULTI_KEY_TABLE_SCHEMA =
+      new ResolvedSchema(
+          CREATE_COLUMNS,
+          Collections.emptyList(),
+          MULTI_KEY_CONSTRAINTS);
+
   private static final List<Column> EXPECTED_TABLE_COLUMNS =
       CREATE_COLUMNS.stream()
           .map(
@@ -258,6 +267,40 @@ public void testCreateTable() throws Exception {
     String keyGeneratorClassName = metaClient.getTableConfig().getKeyGeneratorClassName();
     assertEquals(keyGeneratorClassName, SimpleAvroKeyGenerator.class.getName());
 
+    // validate single key and multiple partition for partitioned table
+    ObjectPath singleKeyMultiplePartitionPath = new ObjectPath(TEST_DEFAULT_DATABASE, "tb_skmp" + System.currentTimeMillis());
+    final ResolvedCatalogTable singleKeyMultiplePartitionTable = new ResolvedCatalogTable(
+        CatalogTable.of(
+            Schema.newBuilder().fromResolvedSchema(CREATE_TABLE_SCHEMA).build(),
+            "test",
+            Lists.newArrayList("par1", "par2"),
+            EXPECTED_OPTIONS),
+        CREATE_TABLE_SCHEMA
+    );
+
+    catalog.createTable(singleKeyMultiplePartitionPath, singleKeyMultiplePartitionTable, false);
+    metaClient =
+        StreamerUtil.createMetaClient(catalog.inferTablePath(catalogPathStr, singleKeyMultiplePartitionPath), new org.apache.hadoop.conf.Configuration());
+    keyGeneratorClassName = metaClient.getTableConfig().getKeyGeneratorClassName();
+    assertThat(keyGeneratorClassName, is(ComplexAvroKeyGenerator.class.getName()));
+
+    // validate multiple key and single partition for partitioned table
+    ObjectPath multipleKeySinglePartitionPath = new ObjectPath(TEST_DEFAULT_DATABASE, "tb_mksp" + System.currentTimeMillis());
+    final ResolvedCatalogTable multipleKeySinglePartitionTable = new ResolvedCatalogTable(
+        CatalogTable.of(
+            Schema.newBuilder().fromResolvedSchema(CREATE_MULTI_KEY_TABLE_SCHEMA).build(),
+            "test",
+            Lists.newArrayList("par1"),
+            EXPECTED_OPTIONS),
+        CREATE_TABLE_SCHEMA
+    );
+
+    catalog.createTable(multipleKeySinglePartitionPath, multipleKeySinglePartitionTable, false);
+    metaClient =
+        StreamerUtil.createMetaClient(catalog.inferTablePath(catalogPathStr, singleKeyMultiplePartitionPath), new org.apache.hadoop.conf.Configuration());
+    keyGeneratorClassName = metaClient.getTableConfig().getKeyGeneratorClassName();
+    assertThat(keyGeneratorClassName, is(ComplexAvroKeyGenerator.class.getName()));
+
     // validate key generator for non partitioned table
     ObjectPath nonPartitionPath = new ObjectPath(TEST_DEFAULT_DATABASE, "tb");
     final ResolvedCatalogTable nonPartitionCatalogTable = new ResolvedCatalogTable(
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java
index 3ee85a46fc46..d88bb0326ef4 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java
@@ -29,11 +29,13 @@
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieCatalogException;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
+import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
 import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
 import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
 import org.apache.hudi.util.StreamerUtil;
 
+import org.apache.flink.calcite.shaded.com.google.common.collect.Lists;
 import org.apache.flink.table.api.DataTypes;
 import org.apache.flink.table.api.Schema;
 import org.apache.flink.table.api.TableSchema;
@@ -71,6 +73,7 @@
 
 import static org.apache.flink.table.factories.FactoryUtil.CONNECTOR;
 import static org.apache.hudi.configuration.FlinkOptions.PRECOMBINE_FIELD;
+import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.RECORDKEY_FIELD_NAME;
 import static org.apache.hudi.table.catalog.HoodieCatalogTestUtils.createHiveConf;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.CoreMatchers.is;
@@ -97,6 +100,26 @@ public class TestHoodieHiveCatalog {
           .primaryKey("uuid")
           .build();
   List<String> partitions = Collections.singletonList("par1");
+
+  TableSchema multiKeySinglePartitionTableSchema =
+      TableSchema.builder()
+          .field("uuid", DataTypes.INT().notNull())
+          .field("name", DataTypes.STRING().notNull())
+          .field("age", DataTypes.INT())
+          .field("par1", DataTypes.STRING())
+          .primaryKey("uuid", "name")
+          .build();
+
+  TableSchema singleKeyMultiPartitionTableSchema =
+      TableSchema.builder()
+          .field("uuid", DataTypes.INT().notNull())
+          .field("name", DataTypes.STRING())
+          .field("par1", DataTypes.STRING())
+          .field("par2", DataTypes.STRING())
+          .primaryKey("uuid")
+          .build();
+  List<String> multiPartitions = Lists.newArrayList("par1", "par2");
+
   private static HoodieHiveCatalog hoodieCatalog;
   private final ObjectPath tablePath = new ObjectPath("default", "test");
 
@@ -201,6 +224,28 @@ public void testCreateAndGetHoodieTable(HoodieTableType tableType) throws Except
     String keyGeneratorClassName = metaClient.getTableConfig().getKeyGeneratorClassName();
     assertEquals(keyGeneratorClassName, SimpleAvroKeyGenerator.class.getName());
 
+    // validate single key and multiple partition for partitioned table
+    ObjectPath singleKeyMultiPartitionPath = new ObjectPath("default", "tb_skmp_" + System.currentTimeMillis());
+    CatalogTable singleKeyMultiPartitionTable =
+        new CatalogTableImpl(singleKeyMultiPartitionTableSchema, multiPartitions, options, "hudi table");
+    hoodieCatalog.createTable(singleKeyMultiPartitionPath, singleKeyMultiPartitionTable, false);
+
+    HoodieTableMetaClient singleKeyMultiPartitionTableMetaClient =
+        StreamerUtil.createMetaClient(hoodieCatalog.inferTablePath(singleKeyMultiPartitionPath, singleKeyMultiPartitionTable), createHiveConf());
+    assertThat(singleKeyMultiPartitionTableMetaClient.getTableConfig().getKeyGeneratorClassName(), is(ComplexAvroKeyGenerator.class.getName()));
+
+    // validate multiple key and single partition for partitioned table
+    ObjectPath multiKeySinglePartitionPath = new ObjectPath("default", "tb_mksp_" + System.currentTimeMillis());
+
+    options.remove(RECORDKEY_FIELD_NAME.key());
+    CatalogTable multiKeySinglePartitionTable =
+        new CatalogTableImpl(multiKeySinglePartitionTableSchema, partitions, options, "hudi table");
+    hoodieCatalog.createTable(multiKeySinglePartitionPath, multiKeySinglePartitionTable, false);
+
+    HoodieTableMetaClient multiKeySinglePartitionTableMetaClient =
+        StreamerUtil.createMetaClient(hoodieCatalog.inferTablePath(multiKeySinglePartitionPath, multiKeySinglePartitionTable), createHiveConf());
+    assertThat(multiKeySinglePartitionTableMetaClient.getTableConfig().getKeyGeneratorClassName(), is(ComplexAvroKeyGenerator.class.getName()));
+
     // validate key generator for non partitioned table
     ObjectPath nonPartitionPath = new ObjectPath("default", "tb_" + tableType);
     CatalogTable nonPartitionTable =
@@ -370,6 +415,19 @@ public void testDropPartition() throws Exception {
     assertThrows(NoSuchObjectException.class, () -> getHivePartition(partitionSpec));
   }
 
+  @Test
+  public void testMappingHiveConfPropsToHiveTableParams() throws TableAlreadyExistException, DatabaseNotExistException, TableNotExistException {
+    HoodieHiveCatalog catalog = HoodieCatalogTestUtils.createHiveCatalog("myCatalog", true);
+    catalog.open();
+    Map<String, String> originOptions = new HashMap<>();
+    originOptions.put(FactoryUtil.CONNECTOR.key(), "hudi");
+    CatalogTable table = new CatalogTableImpl(schema, originOptions, "hudi table");
+    catalog.createTable(tablePath, table, false);
+
+    Table hiveTable = hoodieCatalog.getHiveTable(tablePath);
+    assertEquals("false", hiveTable.getParameters().get("hadoop.hive.metastore.schema.verification"));
+  }
+
   private Partition getHivePartition(CatalogPartitionSpec partitionSpec) throws Exception {
     return hoodieCatalog.getClient().getPartition(
         tablePath.getDatabaseName(),
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestRowDataToAvroConverters.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestRowDataToAvroConverters.java
new file mode 100644
index 000000000000..0ab0626d0345
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestRowDataToAvroConverters.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utils;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flink.formats.common.TimestampFormat;
+import org.apache.flink.formats.json.JsonToRowDataConverters;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hudi.util.AvroSchemaConverter;
+import org.apache.hudi.util.RowDataToAvroConverters;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+
+import static org.apache.flink.table.api.DataTypes.ROW;
+import static org.apache.flink.table.api.DataTypes.FIELD;
+import static org.apache.flink.table.api.DataTypes.TIMESTAMP;
+
+class TestRowDataToAvroConverters {
+
+  DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+  @Test
+  void testRowDataToAvroStringToRowDataWithLocalTimezone() throws JsonProcessingException {
+    String timestampFromLocal = "2021-03-30 07:44:29";
+
+    DataType rowDataType = ROW(FIELD("timestamp_from_local", TIMESTAMP()));
+    JsonToRowDataConverters.JsonToRowDataConverter jsonToRowDataConverter =
+            new JsonToRowDataConverters(true, true, TimestampFormat.SQL)
+                    .createConverter(rowDataType.getLogicalType());
+    Object rowData = jsonToRowDataConverter.convert(new ObjectMapper().readTree("{\"timestamp_from_local\":\"" + timestampFromLocal + "\"}"));
+
+    RowType rowType = (RowType) DataTypes.ROW(DataTypes.FIELD("f_timestamp", DataTypes.TIMESTAMP(3))).getLogicalType();
+    RowDataToAvroConverters.RowDataToAvroConverter converter =
+            RowDataToAvroConverters.createConverter(rowType, false);
+    GenericRecord avroRecord =
+            (GenericRecord) converter.convert(AvroSchemaConverter.convertToSchema(rowType), rowData);
+    Assertions.assertEquals(timestampFromLocal, formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli((Long) avroRecord.get(0)), ZoneId.systemDefault())));
+  }
+
+  @Test
+  void testRowDataToAvroStringToRowDataWithUtcTimezone() throws JsonProcessingException {
+    String timestampFromUtc0 = "2021-03-30 07:44:29";
+
+    DataType rowDataType = ROW(FIELD("timestamp_from_utc_0", TIMESTAMP()));
+    JsonToRowDataConverters.JsonToRowDataConverter jsonToRowDataConverter =
+            new JsonToRowDataConverters(true, true, TimestampFormat.SQL)
+                    .createConverter(rowDataType.getLogicalType());
+    Object rowData = jsonToRowDataConverter.convert(new ObjectMapper().readTree("{\"timestamp_from_utc_0\":\"" + timestampFromUtc0 + "\"}"));
+
+    RowType rowType = (RowType) DataTypes.ROW(DataTypes.FIELD("f_timestamp", DataTypes.TIMESTAMP(3))).getLogicalType();
+    RowDataToAvroConverters.RowDataToAvroConverter converter =
+            RowDataToAvroConverters.createConverter(rowType);
+    GenericRecord avroRecord =
+            (GenericRecord) converter.convert(AvroSchemaConverter.convertToSchema(rowType), rowData);
+    Assertions.assertEquals(timestampFromUtc0, formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli((Long) avroRecord.get(0)), ZoneId.of("UTC"))));
+    Assertions.assertEquals("2021-03-30 08:44:29", formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli((Long) avroRecord.get(0)), ZoneId.of("UTC+1"))));
+    Assertions.assertEquals("2021-03-30 15:44:29", formatter.format(LocalDateTime.ofInstant(Instant.ofEpochMilli((Long) avroRecord.get(0)), ZoneId.of("Asia/Shanghai"))));
+  }
+}
\ No newline at end of file
diff --git a/hudi-gcp/pom.xml b/hudi-gcp/pom.xml
index 2c308fbf4244..5ffe9e86c731 100644
--- a/hudi-gcp/pom.xml
+++ b/hudi-gcp/pom.xml
@@ -36,7 +36,7 @@ See https://github.com/GoogleCloudPlatform/cloud-opensource-java/wiki/The-Google
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>26.15.0</version>
+        <version>${gcp-libraries-bom.version}</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
index 5a23a4079ae2..32430b533291 100644
--- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
@@ -44,6 +44,7 @@
 import com.google.cloud.bigquery.TableId;
 import com.google.cloud.bigquery.TableInfo;
 import com.google.cloud.bigquery.ViewDefinition;
+import com.google.cloud.bigquery.StandardTableDefinition;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -198,16 +199,22 @@ public void updateTableSchema(String tableName, Schema schema, List<String> part
       LOG.info("No table update is needed.");
       return; // No need to update schema.
     }
-    ExternalTableDefinition.Builder builder = definition.toBuilder();
-    builder.setSchema(finalSchema);
-    builder.setAutodetect(false);
-    if (definition.getHivePartitioningOptions() != null) {
-      builder.setHivePartitioningOptions(definition.getHivePartitioningOptions().toBuilder().setRequirePartitionFilter(requirePartitionFilter).build());
+    if (!StringUtils.isNullOrEmpty(bigLakeConnectionId)) {
+      Table updatedTable =
+              existingTable.toBuilder().setDefinition(StandardTableDefinition.of(finalSchema)).build();
+      updatedTable.update();
+    } else {
+      ExternalTableDefinition.Builder builder = definition.toBuilder();
+      builder.setSchema(finalSchema);
+      builder.setAutodetect(false);
+      if (definition.getHivePartitioningOptions() != null) {
+        builder.setHivePartitioningOptions(definition.getHivePartitioningOptions().toBuilder().setRequirePartitionFilter(requirePartitionFilter).build());
+      }
+      Table updatedTable = existingTable.toBuilder()
+              .setDefinition(builder.build())
+              .build();
+      bigquery.update(updatedTable);
     }
-    Table updatedTable = existingTable.toBuilder()
-        .setDefinition(builder.build())
-        .build();
-    bigquery.update(updatedTable);
   }
 
   public void createVersionsTable(String tableName, String sourceUri, String sourceUriPrefix, List<String> partitionFields) {
diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java
index be38dfe8d6d5..d59bffc92172 100644
--- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java
+++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java
@@ -22,9 +22,12 @@
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.storage.StorageConfiguration;
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
 import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
@@ -107,4 +110,48 @@ public static Path addSchemeIfLocalPath(String path) {
     LOG.info("Resolving file " + path + "to be a remote file.");
     return providedPath;
   }
+
+  /**
+   * @param path {@link StoragePath} instance.
+   * @return the Hadoop {@link Path} instance after conversion.
+   */
+  public static Path convertToHadoopPath(StoragePath path) {
+    return new Path(path.toUri());
+  }
+
+  /**
+   * @param path Hadoop {@link Path} instance.
+   * @return the {@link StoragePath} instance after conversion.
+   */
+  public static StoragePath convertToStoragePath(Path path) {
+    return new StoragePath(path.toUri());
+  }
+
+  /**
+   * @param fileStatus Hadoop {@link FileStatus} instance.
+   * @return the {@link StoragePathInfo} instance after conversion.
+   */
+  public static StoragePathInfo convertToStoragePathInfo(FileStatus fileStatus) {
+    return new StoragePathInfo(
+        convertToStoragePath(fileStatus.getPath()),
+        fileStatus.getLen(),
+        fileStatus.isDirectory(),
+        fileStatus.getReplication(),
+        fileStatus.getBlockSize(),
+        fileStatus.getModificationTime());
+  }
+
+  /**
+   * @param pathInfo {@link StoragePathInfo} instance.
+   * @return the {@link FileStatus} instance after conversion.
+   */
+  public static FileStatus convertToHadoopFileStatus(StoragePathInfo pathInfo) {
+    return new FileStatus(
+        pathInfo.getLength(),
+        pathInfo.isDirectory(),
+        pathInfo.getBlockReplication(),
+        pathInfo.getBlockSize(),
+        pathInfo.getModificationTime(),
+        convertToHadoopPath(pathInfo.getPath()));
+  }
 }
diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java
index 87d4d9667e63..54c1712be354 100644
--- a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java
+++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java
@@ -19,12 +19,12 @@
 
 package org.apache.hudi.storage.hadoop;
 
+import org.apache.hudi.hadoop.fs.HadoopFSUtils;
 import org.apache.hudi.storage.HoodieStorage;
 import org.apache.hudi.storage.StoragePath;
 import org.apache.hudi.storage.StoragePathFilter;
 import org.apache.hudi.storage.StoragePathInfo;
 
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
@@ -39,6 +39,10 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath;
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath;
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePathInfo;
+
 /**
  * Implementation of {@link HoodieStorage} using Hadoop's {@link FileSystem}
  */
@@ -92,7 +96,7 @@ public boolean createDirectory(StoragePath path) throws IOException {
   @Override
   public List<StoragePathInfo> listDirectEntries(StoragePath path) throws IOException {
     return Arrays.stream(fs.listStatus(convertToHadoopPath(path)))
-        .map(this::convertToStoragePathInfo)
+        .map(HadoopFSUtils::convertToStoragePathInfo)
         .collect(Collectors.toList());
   }
 
@@ -109,9 +113,9 @@ public List<StoragePathInfo> listFiles(StoragePath path) throws IOException {
   @Override
   public List<StoragePathInfo> listDirectEntries(List<StoragePath> pathList) throws IOException {
     return Arrays.stream(fs.listStatus(pathList.stream()
-            .map(this::convertToHadoopPath)
+            .map(HadoopFSUtils::convertToHadoopPath)
             .toArray(Path[]::new)))
-        .map(this::convertToStoragePathInfo)
+        .map(HadoopFSUtils::convertToStoragePathInfo)
         .collect(Collectors.toList());
   }
 
@@ -122,7 +126,7 @@ public List<StoragePathInfo> listDirectEntries(StoragePath path,
     return Arrays.stream(fs.listStatus(
             convertToHadoopPath(path), e ->
                 filter.accept(convertToStoragePath(e))))
-        .map(this::convertToStoragePathInfo)
+        .map(HadoopFSUtils::convertToStoragePathInfo)
         .collect(Collectors.toList());
   }
 
@@ -130,7 +134,7 @@ public List<StoragePathInfo> listDirectEntries(StoragePath path,
   public List<StoragePathInfo> globEntries(StoragePath pathPattern)
       throws IOException {
     return Arrays.stream(fs.globStatus(convertToHadoopPath(pathPattern)))
-        .map(this::convertToStoragePathInfo)
+        .map(HadoopFSUtils::convertToStoragePathInfo)
         .collect(Collectors.toList());
   }
 
@@ -139,7 +143,7 @@ public List<StoragePathInfo> globEntries(StoragePath pathPattern, StoragePathFil
       throws IOException {
     return Arrays.stream(fs.globStatus(convertToHadoopPath(pathPattern), path ->
             filter.accept(convertToStoragePath(path))))
-        .map(this::convertToStoragePathInfo)
+        .map(HadoopFSUtils::convertToStoragePathInfo)
         .collect(Collectors.toList());
   }
 
@@ -184,22 +188,6 @@ public boolean createNewFile(StoragePath path) throws IOException {
     return fs.createNewFile(convertToHadoopPath(path));
   }
 
-  private Path convertToHadoopPath(StoragePath loc) {
-    return new Path(loc.toUri());
-  }
-
-  private StoragePath convertToStoragePath(Path path) {
-    return new StoragePath(path.toUri());
-  }
-
-  private StoragePathInfo convertToStoragePathInfo(FileStatus fileStatus) {
-    return new StoragePathInfo(
-        convertToStoragePath(fileStatus.getPath()),
-        fileStatus.getLen(),
-        fileStatus.isDirectory(),
-        fileStatus.getModificationTime());
-  }
-
   @Override
   public void close() throws IOException {
     fs.close();
diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/fs/TestHadoopFSUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/fs/TestHadoopFSUtils.java
new file mode 100644
index 000000000000..7768ff4feae7
--- /dev/null
+++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/fs/TestHadoopFSUtils.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.hadoop.fs;
+
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopFileStatus;
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath;
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath;
+import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePathInfo;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Tests {@link HadoopFSUtils}
+ */
+public class TestHadoopFSUtils {
+  @ParameterizedTest
+  @ValueSource(strings = {
+      "/a/b/c",
+      "s3://bucket/partition=1%2F2%2F3",
+      "hdfs://x/y/z.file#bar"
+  })
+  public void testPathConversion(String pathString) {
+    // Hadoop Path -> StoragePath -> Hadoop Path
+    Path path = new Path(pathString);
+    StoragePath storagePath = convertToStoragePath(path);
+    Path convertedPath = convertToHadoopPath(storagePath);
+    assertEquals(path.toUri(), storagePath.toUri());
+    assertEquals(path, convertedPath);
+
+    // StoragePath -> Hadoop Path -> StoragePath
+    storagePath = new StoragePath(pathString);
+    path = convertToHadoopPath(storagePath);
+    StoragePath convertedStoragePath = convertToStoragePath(path);
+    assertEquals(storagePath.toUri(), path.toUri());
+    assertEquals(storagePath, convertedStoragePath);
+  }
+
+  @ParameterizedTest
+  @CsvSource({
+      "/a/b/c,1000,false,1,1000000,1238493920",
+      "/x/y/z,0,true,2,0,2002403203"
+  })
+  public void testFileStatusConversion(String path,
+                                       long length,
+                                       boolean isDirectory,
+                                       short blockReplication,
+                                       long blockSize,
+                                       long modificationTime) {
+    // FileStatus -> StoragePathInfo -> FileStatus
+    FileStatus fileStatus = new FileStatus(
+        length, isDirectory, blockReplication, blockSize, modificationTime, new Path(path));
+    StoragePathInfo pathInfo = convertToStoragePathInfo(fileStatus);
+    assertStoragePathInfo(
+        pathInfo, path, length, isDirectory, blockReplication, blockSize, modificationTime);
+    FileStatus convertedFileStatus = convertToHadoopFileStatus(pathInfo);
+    assertFileStatus(
+        convertedFileStatus, path, length, isDirectory, blockReplication, blockSize, modificationTime);
+
+    // StoragePathInfo -> FileStatus -> StoragePathInfo
+    pathInfo = new StoragePathInfo(
+        new StoragePath(path), length, isDirectory, blockReplication, blockSize, modificationTime);
+    fileStatus = convertToHadoopFileStatus(pathInfo);
+    assertFileStatus(
+        fileStatus, path, length, isDirectory, blockReplication, blockSize, modificationTime);
+    StoragePathInfo convertedPathInfo = convertToStoragePathInfo(fileStatus);
+    assertStoragePathInfo(
+        convertedPathInfo, path, length, isDirectory, blockReplication, blockSize, modificationTime);
+  }
+
+  private void assertFileStatus(FileStatus fileStatus,
+                                String path,
+                                long length,
+                                boolean isDirectory,
+                                short blockReplication,
+                                long blockSize,
+                                long modificationTime) {
+    assertEquals(new Path(path), fileStatus.getPath());
+    assertEquals(length, fileStatus.getLen());
+    assertEquals(isDirectory, fileStatus.isDirectory());
+    assertEquals(!isDirectory, fileStatus.isFile());
+    assertEquals(blockReplication, fileStatus.getReplication());
+    assertEquals(blockSize, fileStatus.getBlockSize());
+    assertEquals(modificationTime, fileStatus.getModificationTime());
+  }
+
+  private void assertStoragePathInfo(StoragePathInfo pathInfo,
+                                     String path,
+                                     long length,
+                                     boolean isDirectory,
+                                     short blockReplication,
+                                     long blockSize,
+                                     long modificationTime) {
+    assertEquals(new StoragePath(path), pathInfo.getPath());
+    assertEquals(length, pathInfo.getLength());
+    assertEquals(isDirectory, pathInfo.isDirectory());
+    assertEquals(!isDirectory, pathInfo.isFile());
+    assertEquals(blockReplication, pathInfo.getBlockReplication());
+    assertEquals(blockSize, pathInfo.getBlockSize());
+    assertEquals(modificationTime, pathInfo.getModificationTime());
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
index 0cfe0d0a1940..76209422fe9c 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
@@ -53,6 +53,7 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SplitLocationInfo;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -192,7 +193,7 @@ protected List<FileStatus> listStatusForIncrementalMode(JobConf job,
     // build fileGroup from fsView
     Path basePath = new Path(tableMetaClient.getBasePath());
     // filter affectedPartition by inputPaths
-    List<String> affectedPartition = HoodieInputFormatUtils.getWritePartitionPaths(metadataList).stream()
+    List<String> affectedPartition = HoodieTableMetadataUtil.getWritePartitionPaths(metadataList).stream()
         .filter(k -> k.isEmpty() ? inputPaths.contains(basePath) : inputPaths.contains(new Path(basePath, k))).collect(Collectors.toList());
     if (affectedPartition.isEmpty()) {
       return result;
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
index a0d1b086e035..22116283d121 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
@@ -99,7 +99,7 @@ public GenericRecord serialize(Object o, Schema schema) {
     List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
     List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
 
-    for (int i  = 0; i < size; i++) {
+    for (int i = 0; i < size; i++) {
       Schema.Field field = schema.getFields().get(i);
       if (i >= columnTypes.size()) {
         break;
@@ -134,7 +134,7 @@ private void setUpRecordFieldFromWritable(TypeInfo typeInfo, Object structFieldD
    * Determine if an Avro schema is of type Union[T, NULL].  Avro supports nullable
    * types via a union of type T and null.  This is a very common use case.
    * As such, we want to silently convert it to just T and allow the value to be null.
-   *
+   * <p>
    * When a Hive union type is used with AVRO, the schema type becomes
    * Union[NULL, T1, T2, ...]. The NULL in the union should be silently removed
    *
@@ -266,7 +266,7 @@ private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ss
     GenericData.Record record = new GenericData.Record(schema);
     ArrayList<TypeInfo> allStructFieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
 
-    for (int i  = 0; i < size; i++) {
+    for (int i = 0; i < size; i++) {
       Schema.Field field = schema.getFields().get(i);
       setUpRecordFieldFromWritable(allStructFieldTypeInfos.get(i), structFieldsDataAsList.get(i),
           allStructFieldRefs.get(i).getFieldObjectInspector(), record, field);
@@ -278,26 +278,30 @@ private Object serializePrimitive(PrimitiveObjectInspector fieldOI, Object struc
     switch (fieldOI.getPrimitiveCategory()) {
       case BINARY:
         if (schema.getType() == Schema.Type.BYTES) {
-          return AvroSerdeUtils.getBufferFromBytes((byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+          return AvroSerdeUtils.getBufferFromBytes((byte[]) fieldOI.getPrimitiveJavaObject(structFieldData));
         } else if (schema.getType() == Schema.Type.FIXED) {
-          GenericData.Fixed fixed = new GenericData.Fixed(schema, (byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+          GenericData.Fixed fixed = new GenericData.Fixed(schema, (byte[]) fieldOI.getPrimitiveJavaObject(structFieldData));
           return fixed;
         } else {
           throw new HoodieException("Unexpected Avro schema for Binary TypeInfo: " + schema.getType());
         }
       case DECIMAL:
-        HiveDecimal dec = (HiveDecimal)fieldOI.getPrimitiveJavaObject(structFieldData);
-        LogicalTypes.Decimal decimal = (LogicalTypes.Decimal)schema.getLogicalType();
+        HiveDecimal dec = (HiveDecimal) fieldOI.getPrimitiveJavaObject(structFieldData);
+        LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) schema.getLogicalType();
         BigDecimal bd = new BigDecimal(dec.toString()).setScale(decimal.getScale());
-        return HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd, schema, decimal);
+        if (schema.getType() == Schema.Type.BYTES) {
+          return HoodieAvroUtils.DECIMAL_CONVERSION.toBytes(bd, schema, decimal);
+        } else {
+          return HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd, schema, decimal);
+        }
       case CHAR:
-        HiveChar ch = (HiveChar)fieldOI.getPrimitiveJavaObject(structFieldData);
+        HiveChar ch = (HiveChar) fieldOI.getPrimitiveJavaObject(structFieldData);
         return new Utf8(ch.getStrippedValue());
       case VARCHAR:
-        HiveVarchar vc = (HiveVarchar)fieldOI.getPrimitiveJavaObject(structFieldData);
+        HiveVarchar vc = (HiveVarchar) fieldOI.getPrimitiveJavaObject(structFieldData);
         return new Utf8(vc.getValue());
       case STRING:
-        String string = (String)fieldOI.getPrimitiveJavaObject(structFieldData);
+        String string = (String) fieldOI.getPrimitiveJavaObject(structFieldData);
         return new Utf8(string);
       case DATE:
         return HoodieHiveUtils.getDays(structFieldData);
@@ -364,7 +368,7 @@ private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Ob
     ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();
     TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo();
     TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo();
-    Map<?,?> map = fieldOI.getMap(structFieldData);
+    Map<?, ?> map = fieldOI.getMap(structFieldData);
     Schema valueType = schema.getValueType();
 
     Map<Object, Object> deserialized = new LinkedHashMap<Object, Object>(fieldOI.getMapSize(structFieldData));
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
index 8922b837871f..4ab72701a11a 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
@@ -514,19 +514,6 @@ public static FileStatus[] listAffectedFilesForCommits(Configuration hadoopConf,
     return fullPathToFileStatus.values().toArray(new FileStatus[0]);
   }
 
-  /**
-   * Returns all the incremental write partition paths as a set with the given commits metadata.
-   *
-   * @param metadataList The commits metadata
-   * @return the partition path set
-   */
-  public static Set<String> getWritePartitionPaths(List<HoodieCommitMetadata> metadataList) {
-    return metadataList.stream()
-        .map(HoodieCommitMetadata::getWritePartitionPaths)
-        .flatMap(Collection::stream)
-        .collect(Collectors.toSet());
-  }
-
   public static HoodieRealtimeFileSplit createRealtimeFileSplit(HoodieRealtimePath path, long start, long length, String[] hosts) {
     try {
       return new HoodieRealtimeFileSplit(new FileSplit(path, start, length, hosts), path);
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
index 275ab36b82a0..6d98f0c8f52e 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
@@ -27,7 +27,6 @@
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
 
-import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.JsonProperties;
 import org.apache.avro.LogicalType;
 import org.apache.avro.LogicalTypes;
@@ -168,6 +167,9 @@ public static Writable avroToArrayWritable(Object value, Schema schema, boolean
       case STRING:
         return new Text(value.toString());
       case BYTES:
+        if (schema.getLogicalType() != null && schema.getLogicalType().getName().equals("decimal")) {
+          return toHiveDecimalWritable(((ByteBuffer) value).array(), schema);
+        }
         return new BytesWritable(((ByteBuffer) value).array());
       case INT:
         if (schema.getLogicalType() != null && schema.getLogicalType().getName().equals("date")) {
@@ -198,12 +200,13 @@ public static Writable avroToArrayWritable(Object value, Schema schema, boolean
         Writable[] recordValues = new Writable[schema.getFields().size()];
         int recordValueIndex = 0;
         for (Schema.Field field : schema.getFields()) {
-          // TODO Revisit Avro exception handling in future
           Object fieldValue = null;
-          try {
+          if (record.getSchema().getField(field.name()) != null) {
             fieldValue = record.get(field.name());
-          } catch (AvroRuntimeException e) {
-            LOG.debug("Field:" + field.name() + "not found in Schema:" + schema);
+          } else {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Field:" + field.name() + "not found in Schema:" + schema);
+            }
           }
           recordValues[recordValueIndex++] = avroToArrayWritable(fieldValue, field.schema(), supportTimestamp);
         }
@@ -248,11 +251,7 @@ public static Writable avroToArrayWritable(Object value, Schema schema, boolean
         }
       case FIXED:
         if (schema.getLogicalType() != null && schema.getLogicalType().getName().equals("decimal")) {
-          LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) LogicalTypes.fromSchema(schema);
-          HiveDecimalWritable writable = new HiveDecimalWritable(((GenericFixed) value).bytes(),
-              decimal.getScale());
-          return HiveDecimalUtils.enforcePrecisionScale(writable,
-              new DecimalTypeInfo(decimal.getPrecision(), decimal.getScale()));
+          return toHiveDecimalWritable(((GenericFixed) value).bytes(), schema);
         }
         return new BytesWritable(((GenericFixed) value).bytes());
       default:
@@ -319,4 +318,11 @@ private static Schema appendNullSchemaFields(Schema schema, List<String> newFiel
     }
     return appendFieldsToSchema(schema, newFields);
   }
+
+  private static HiveDecimalWritable toHiveDecimalWritable(byte[] bytes, Schema schema) {
+    LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) LogicalTypes.fromSchema(schema);
+    HiveDecimalWritable writable = new HiveDecimalWritable(bytes, decimal.getScale());
+    return HiveDecimalUtils.enforcePrecisionScale(writable,
+        new DecimalTypeInfo(decimal.getPrecision(), decimal.getScale()));
+  }
 }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkUtil.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkUtil.java
new file mode 100644
index 000000000000..f6fec62cb3b2
--- /dev/null
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkUtil.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite.streaming;
+
+import org.apache.hudi.exception.HoodieException;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Saprk-submit to test spark streaming
+ * 
+ * Sample command.
+ * ./bin/spark-submit --master local[2]  --driver-memory 1g  --executor-memory 1g \
+ * --class org.apache.hudi.streaming.StructuredStreamingSinkUtil  PATH TO hudi-integ-test-bundle-0.13.0-SNAPSHOT.jar \
+ * --spark-master local[2] \
+ * --source-path /tmp/parquet_ny/ \
+ * --target-path /tmp/hudi_streaming_kafka10/MERGE_ON_READ3/ \
+ * --checkpoint-path /tmp/hudi_streaming_kafka10/checkpoint_mor3/ \
+ * --table-type COPY_ON_WRITE \
+ * --partition-field date_col \
+ * --record-key-field tpep_pickup_datetime \
+ * --pre-combine-field tpep_dropoff_datetime \
+ * --table-name test_tbl
+ *
+ * Ensure "source-path" has parquet data.
+ */
+public class StructuredStreamingSinkUtil implements Serializable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(StructuredStreamingSinkUtil.class);
+
+  private transient JavaSparkContext jsc;
+  private SparkSession sparkSession;
+  private Config cfg;
+
+  public StructuredStreamingSinkUtil(JavaSparkContext jsc, Config cfg) {
+    this.jsc = jsc;
+    this.sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate();
+    this.cfg = cfg;
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--source-path", "-sp"}, description = "Source path to consume data from", required = true)
+    public String sourcePath = null;
+
+    @Parameter(names = {"--target-path", "-tp"}, description = "Target path of the table of interest.", required = true)
+    public String targetPath = null;
+
+    @Parameter(names = {"--table-type", "-ty"}, description = "Target path of the table of interest.", required = true)
+    public String tableType = "COPY_ON_WRITE";
+
+    @Parameter(names = {"--checkpoint-path", "-cp"}, description = "Checkppint path of the table of interest", required = true)
+    public String checkpointPath = null;
+
+    @Parameter(names = {"--partition-field", "-pp"}, description = "Partitioning field", required = true)
+    public String partitionField = null;
+
+    @Parameter(names = {"--record-key-field", "-rk"}, description = "record key field", required = true)
+    public String recordKeyField = null;
+
+    @Parameter(names = {"--pre-combine-field", "-pc"}, description = "Precombine field", required = true)
+    public String preCombineField = null;
+
+    @Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
+    public String tableName = null;
+
+    @Parameter(names = {"--disable-metadata", "-dmdt"}, description = "Disable metadata while querying", required = false)
+    public Boolean disableMetadata = false;
+
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
+    public String sparkMaster = null;
+
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
+    public String sparkMemory = "1g";
+
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+  }
+
+  public static void main(String[] args) {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+
+    SparkConf sparkConf = buildSparkConf("Spark-structured-streaming-test", cfg.sparkMaster);
+    sparkConf.set("spark.executor.memory", cfg.sparkMemory);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    try {
+      StructuredStreamingSinkUtil streamingSinkUtil = new StructuredStreamingSinkUtil(jsc, cfg);
+      streamingSinkUtil.run();
+    } catch (Throwable throwable) {
+      LOG.error("Fail to execute tpcds read benchmarks for " + cfg, throwable);
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public void run() {
+    try {
+      LOG.info(cfg.toString());
+      StructuredStreamingSinkTestWriter.triggerStreaming(sparkSession, cfg.tableType, cfg.sourcePath, cfg.targetPath, cfg.checkpointPath,
+          cfg.tableName, cfg.partitionField, cfg.recordKeyField, cfg.preCombineField);
+      StructuredStreamingSinkTestWriter.waitUntilCondition(1000 * 60 * 10, 1000 * 30);
+    } catch (Exception e) {
+      throw new HoodieException("Unable to test spark structured writes to hudi " + cfg.targetPath, e);
+    } finally {
+      LOG.warn("Completing Spark Structured Streaming test");
+    }
+  }
+
+  public static SparkConf buildSparkConf(String appName, String defaultMaster) {
+    return buildSparkConf(appName, defaultMaster, new HashMap<>());
+  }
+
+  private static SparkConf buildSparkConf(String appName, String defaultMaster, Map<String, String> additionalConfigs) {
+    final SparkConf sparkConf = new SparkConf().setAppName(appName);
+    String master = sparkConf.get("spark.master", defaultMaster);
+    sparkConf.setMaster(master);
+    if (master.startsWith("yarn")) {
+      sparkConf.set("spark.eventLog.overwrite", "true");
+      sparkConf.set("spark.eventLog.enabled", "true");
+    }
+    sparkConf.set("spark.ui.port", "8090");
+    sparkConf.setIfMissing("spark.driver.maxResultSize", "2g");
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    sparkConf.set("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar");
+    sparkConf.set("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension");
+    sparkConf.set("spark.hadoop.mapred.output.compress", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
+    sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
+
+    additionalConfigs.forEach(sparkConf::set);
+    return sparkConf;
+  }
+}
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkTestWriter.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkTestWriter.scala
new file mode 100644
index 000000000000..8eb3b469e938
--- /dev/null
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/streaming/StructuredStreamingSinkTestWriter.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite.streaming
+
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.config.HoodieWriteConfig.FAIL_ON_TIMELINE_ARCHIVING_ENABLE
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
+import org.apache.spark.sql.streaming.{OutputMode, StreamingQueryListener, Trigger}
+import org.apache.log4j.LogManager
+
+object StructuredStreamingSinkTestWriter {
+
+  private val log = LogManager.getLogger(getClass)
+  var validationComplete: Boolean = false;
+
+  def waitUntilCondition(): Unit = {
+    waitUntilCondition(1000 * 60 * 5, 500)
+  }
+
+  def waitUntilCondition(maxWaitTimeMs: Long, intervalTimeMs: Long): Unit = {
+    var waitSoFar: Long = 0;
+    while (waitSoFar < maxWaitTimeMs && !validationComplete) {
+      log.info("Waiting for " + intervalTimeMs + ". Total wait time " + waitSoFar)
+      Thread.sleep(intervalTimeMs)
+      waitSoFar += intervalTimeMs
+    }
+  }
+
+  def triggerStreaming(spark: SparkSession, tableType: String, inputPath: String, hudiPath: String, hudiCheckpointPath: String,
+                       tableName: String, partitionPathField: String, recordKeyField: String,
+                       preCombineField: String): Unit = {
+
+    def validate(): Unit = {
+      log.info("Validation starting")
+      val inputDf = spark.read.format("parquet").load(inputPath)
+      val hudiDf = spark.read.format("hudi").load(hudiPath)
+      inputDf.registerTempTable("inputTbl")
+      hudiDf.registerTempTable("hudiTbl")
+      assert(spark.sql("select count(distinct " + partitionPathField + ", " + recordKeyField + ") from inputTbl").count ==
+        spark.sql("select count(distinct " + partitionPathField + ", " + recordKeyField + ") from hudiTbl").count)
+      validationComplete = true
+      log.info("Validation complete")
+    }
+
+    def shutdownListener(spark: SparkSession) = new StreamingQueryListener() {
+      override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
+        log.info("Query started: " + queryStarted.id)
+      }
+
+      override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
+        log.info("Query terminated! " + queryTerminated.id + ". Validating input and hudi")
+        validate()
+        log.info("Data Validation complete")
+      }
+
+      override def onQueryProgress(queryProgressEvent: QueryProgressEvent): Unit = {
+        if (queryProgressEvent.progress.numInputRows == 0) {
+          log.info("Stopping spark structured streaming as we have reached the end")
+          spark.streams.active.foreach(_.stop())
+        }
+      }
+    }
+
+    spark.streams.addListener(shutdownListener(spark))
+    log.info("Starting to consume from source and writing to hudi ")
+
+    val inputDfSchema = spark.read.format("parquet").load(inputPath).schema
+    val parquetdf = spark.readStream.option("spark.sql.streaming.schemaInference", "true").option("maxFilesPerTrigger", "1")
+      .schema(inputDfSchema).parquet(inputPath)
+
+    val writer = parquetdf.writeStream.format("org.apache.hudi").
+      option(TABLE_TYPE.key, tableType).
+      option(PRECOMBINE_FIELD.key, preCombineField).
+      option(RECORDKEY_FIELD.key, recordKeyField).
+      option(PARTITIONPATH_FIELD.key, partitionPathField).
+      option(FAIL_ON_TIMELINE_ARCHIVING_ENABLE.key, false).
+      option(STREAMING_IGNORE_FAILED_BATCH.key, false).
+      option(STREAMING_RETRY_CNT.key, 0).
+      option("hoodie.table.name", tableName).
+      option("hoodie.compact.inline.max.delta.commits", "2").
+      option("checkpointLocation", hudiCheckpointPath).
+      outputMode(OutputMode.Append());
+
+    writer.trigger(Trigger.ProcessingTime(30000)).start(hudiPath);
+  }
+}
diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StoragePathInfo.java b/hudi-io/src/main/java/org/apache/hudi/storage/StoragePathInfo.java
index b4ec8194b4de..e4711bf72dd0 100644
--- a/hudi-io/src/main/java/org/apache/hudi/storage/StoragePathInfo.java
+++ b/hudi-io/src/main/java/org/apache/hudi/storage/StoragePathInfo.java
@@ -35,15 +35,21 @@ public class StoragePathInfo implements Serializable {
   private final StoragePath path;
   private final long length;
   private final boolean isDirectory;
+  private final short blockReplication;
+  private final long blockSize;
   private final long modificationTime;
 
   public StoragePathInfo(StoragePath path,
                          long length,
                          boolean isDirectory,
+                         short blockReplication,
+                         long blockSize,
                          long modificationTime) {
     this.path = path;
     this.length = length;
     this.isDirectory = isDirectory;
+    this.blockReplication = blockReplication;
+    this.blockSize = blockSize;
     this.modificationTime = modificationTime;
   }
 
@@ -79,6 +85,22 @@ public boolean isDirectory() {
     return isDirectory;
   }
 
+  /**
+   * @return the block replication if applied.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  public short getBlockReplication() {
+    return blockReplication;
+  }
+
+  /**
+   * @return the block size in bytes if applied.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  public long getBlockSize() {
+    return blockSize;
+  }
+
   /**
    * @return the modification of a file.
    */
@@ -114,6 +136,8 @@ public String toString() {
         + "path=" + path
         + ", length=" + length
         + ", isDirectory=" + isDirectory
+        + ", blockReplication=" + blockReplication
+        + ", blockSize=" + blockSize
         + ", modificationTime=" + modificationTime
         + '}';
   }
diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java
index 74d126d7f07a..00e8594a0c83 100644
--- a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java
+++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java
@@ -69,6 +69,8 @@ public enum StorageSchemes {
   OBS("obs", null, null),
   // Kingsoft Standard Storage ks3
   KS3("ks3", null, null),
+  // Netease Object Storage nos
+  NOS("nos", null, null),
   // JuiceFileSystem
   JFS("jfs", null, null),
   // Baidu Object Storage
diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java
index a6a0efee6dc0..460c831e1c08 100644
--- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java
+++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java
@@ -164,37 +164,37 @@ public void testListing() throws IOException {
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/2.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/y"), 0, true, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z"), 0, true, 0),
+            getStoragePathInfo("x/1.file", false),
+            getStoragePathInfo("x/2.file", false),
+            getStoragePathInfo("x/y", true),
+            getStoragePathInfo("x/z", true)
         }).collect(Collectors.toList()),
         storage.listDirectEntries(new StoragePath(getTempDir(), "x")));
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/2.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/y/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/y/2.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z/2.file"), 0, false, 0)
+            getStoragePathInfo("x/1.file", false),
+            getStoragePathInfo("x/2.file", false),
+            getStoragePathInfo("x/y/1.file", false),
+            getStoragePathInfo("x/y/2.file", false),
+            getStoragePathInfo("x/z/1.file", false),
+            getStoragePathInfo("x/z/2.file", false)
         }).collect(Collectors.toList()),
         storage.listFiles(new StoragePath(getTempDir(), "x")));
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/2.file"), 0, false, 0)
+            getStoragePathInfo("x/2.file", false)
         }).collect(Collectors.toList()),
         storage.listDirectEntries(
             new StoragePath(getTempDir(), "x"), e -> e.getName().contains("2")));
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "w/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "w/2.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z/2.file"), 0, false, 0)
+            getStoragePathInfo("w/1.file", false),
+            getStoragePathInfo("w/2.file", false),
+            getStoragePathInfo("x/z/1.file", false),
+            getStoragePathInfo("x/z/2.file", false)
         }).collect(Collectors.toList()),
         storage.listDirectEntries(Arrays.stream(new StoragePath[] {
             new StoragePath(getTempDir(), "w"),
@@ -206,21 +206,21 @@ public void testListing() throws IOException {
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/y/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/z/1.file"), 0, false, 0)
+            getStoragePathInfo("x/y/1.file", false),
+            getStoragePathInfo("x/z/1.file", false)
         }).collect(Collectors.toList()),
         storage.globEntries(new StoragePath(getTempDir(), "x/*/1.file")));
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/1.file"), 0, false, 0),
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/2.file"), 0, false, 0),
+            getStoragePathInfo("x/1.file", false),
+            getStoragePathInfo("x/2.file", false)
         }).collect(Collectors.toList()),
         storage.globEntries(new StoragePath(getTempDir(), "x/*.file")));
 
     validatePathInfoList(
         Arrays.stream(new StoragePathInfo[] {
-            new StoragePathInfo(new StoragePath(getTempDir(), "x/y/1.file"), 0, false, 0),
+            getStoragePathInfo("x/y/1.file", false)
         }).collect(Collectors.toList()),
         storage.globEntries(
             new StoragePath(getTempDir(), "x/*/*.file"),
@@ -319,6 +319,11 @@ private HoodieStorage getHoodieStorage() {
     return getHoodieStorage(getFileSystem(conf), conf);
   }
 
+  private StoragePathInfo getStoragePathInfo(String subPath, boolean isDirectory) {
+    return new StoragePathInfo(new StoragePath(getTempDir(), subPath),
+        0, isDirectory, (short) 1, 1000000L, 10L);
+  }
+
   private void validatePathInfo(HoodieStorage storage,
                                 StoragePath path,
                                 byte[] data,
diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestStoragePathInfo.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestStoragePathInfo.java
index 1d92fa075d0f..72640c5e3df5 100644
--- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestStoragePathInfo.java
+++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestStoragePathInfo.java
@@ -41,6 +41,8 @@
 public class TestStoragePathInfo {
   private static final Logger LOG = LoggerFactory.getLogger(TestStoragePathInfo.class);
   private static final long LENGTH = 100;
+  private static final short BLOCK_REPLICATION = 1;
+  private static final long BLOCK_SIZE = 1000000L;
   private static final long MODIFICATION_TIME = System.currentTimeMillis();
   private static final String PATH1 = "/abc/xyz1";
   private static final String PATH2 = "/abc/xyz2";
@@ -49,15 +51,15 @@ public class TestStoragePathInfo {
 
   @Test
   public void testConstructor() {
-    StoragePathInfo pathInfo = new StoragePathInfo(STORAGE_PATH1, LENGTH, false, MODIFICATION_TIME);
+    StoragePathInfo pathInfo = new StoragePathInfo(STORAGE_PATH1, LENGTH, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME);
     validateAccessors(pathInfo, PATH1, LENGTH, false, MODIFICATION_TIME);
-    pathInfo = new StoragePathInfo(STORAGE_PATH2, -1, true, MODIFICATION_TIME + 2L);
+    pathInfo = new StoragePathInfo(STORAGE_PATH2, -1, true, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME + 2L);
     validateAccessors(pathInfo, PATH2, -1, true, MODIFICATION_TIME + 2L);
   }
 
   @Test
   public void testSerializability() throws IOException, ClassNotFoundException {
-    StoragePathInfo pathInfo = new StoragePathInfo(STORAGE_PATH1, LENGTH, false, MODIFICATION_TIME);
+    StoragePathInfo pathInfo = new StoragePathInfo(STORAGE_PATH1, LENGTH, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME);
     try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
          ObjectOutputStream oos = new ObjectOutputStream(baos)) {
       oos.writeObject(pathInfo);
@@ -72,18 +74,18 @@ public void testSerializability() throws IOException, ClassNotFoundException {
   @Test
   public void testEquals() {
     StoragePathInfo pathInfo1 = new StoragePathInfo(
-        new StoragePath(PATH1), LENGTH, false, MODIFICATION_TIME);
+        new StoragePath(PATH1), LENGTH, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME);
     StoragePathInfo pathInfo2 = new StoragePathInfo(
-        new StoragePath(PATH1), LENGTH + 2, false, MODIFICATION_TIME + 2L);
+        new StoragePath(PATH1), LENGTH + 2, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME + 2L);
     assertEquals(pathInfo1, pathInfo2);
   }
 
   @Test
   public void testNotEquals() {
     StoragePathInfo pathInfo1 = new StoragePathInfo(
-        STORAGE_PATH1, LENGTH, false, MODIFICATION_TIME);
+        STORAGE_PATH1, LENGTH, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME);
     StoragePathInfo pathInfo2 = new StoragePathInfo(
-        STORAGE_PATH2, LENGTH, false, MODIFICATION_TIME + 2L);
+        STORAGE_PATH2, LENGTH, false, BLOCK_REPLICATION, BLOCK_SIZE, MODIFICATION_TIME + 2L);
     assertFalse(pathInfo1.equals(pathInfo2));
     assertFalse(pathInfo2.equals(pathInfo1));
   }
diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml
index 1abe6ee961e5..d9fce1c9c57b 100644
--- a/hudi-kafka-connect/pom.xml
+++ b/hudi-kafka-connect/pom.xml
@@ -73,9 +73,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 
diff --git a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml
index 4ffbea73b44a..d7d9ca77cd72 100644
--- a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml
+++ b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml
@@ -30,9 +30,9 @@
 
     <properties>
         <main.basedir>${project.parent.basedir}</main.basedir>
-        <mybatis.version>3.4.6</mybatis.version>
+        <mybatis.version>3.5.6</mybatis.version>
         <HikariCP.version>4.0.3</HikariCP.version>
-        <mysql-connector-java.version>8.0.22</mysql-connector-java.version>
+        <mysql-connector-java.version>8.0.28</mysql-connector-java.version>
     </properties>
 
     <dependencies>
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/HoodieSparkFunctionalIndexClient.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/HoodieSparkFunctionalIndexClient.java
index 542b76e8dd16..e66ad5ac417b 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/HoodieSparkFunctionalIndexClient.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/HoodieSparkFunctionalIndexClient.java
@@ -27,7 +27,6 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.config.HoodieLockConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieFunctionalIndexException;
@@ -49,6 +48,9 @@
 
 import static org.apache.hudi.HoodieConversionUtils.mapAsScalaImmutableMap;
 import static org.apache.hudi.HoodieConversionUtils.toScalaOption;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_BLOOM_FILTER;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP;
 import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
 
 public class HoodieSparkFunctionalIndexClient extends BaseHoodieFunctionalIndexClient {
@@ -82,7 +84,9 @@ public void create(HoodieTableMetaClient metaClient, String indexName, String in
       throw new HoodieFunctionalIndexException("Index already exists: " + indexName);
     }
 
-    if (!metaClient.getTableConfig().getIndexDefinitionPath().isPresent() || !metaClient.getFunctionalIndexMetadata().isPresent()) {
+    if (!metaClient.getTableConfig().getIndexDefinitionPath().isPresent()
+        || !metaClient.getFunctionalIndexMetadata().isPresent()
+        || !metaClient.getFunctionalIndexMetadata().get().getIndexDefinitions().containsKey(indexName)) {
       LOG.info("Index definition is not present. Registering the index first");
       register(metaClient, indexName, indexType, columns, options);
     }
@@ -94,7 +98,7 @@ public void create(HoodieTableMetaClient metaClient, String indexName, String in
     try (SparkRDDWriteClient writeClient = HoodieCLIUtils.createHoodieWriteClient(
         sparkSession, metaClient.getBasePathV2().toString(), mapAsScalaImmutableMap(buildWriteConfig(metaClient, functionalIndexDefinition)), toScalaOption(Option.empty()))) {
       // generate index plan
-      Option<String> indexInstantTime = doSchedule(writeClient, metaClient);
+      Option<String> indexInstantTime = doSchedule(writeClient, metaClient, indexName);
       if (indexInstantTime.isPresent()) {
         // build index
         writeClient.index(indexInstantTime.get());
@@ -104,13 +108,13 @@ public void create(HoodieTableMetaClient metaClient, String indexName, String in
     }
   }
 
-  private static Option<String> doSchedule(SparkRDDWriteClient<HoodieRecordPayload> client, HoodieTableMetaClient metaClient) {
+  private static Option<String> doSchedule(SparkRDDWriteClient<HoodieRecordPayload> client, HoodieTableMetaClient metaClient, String indexName) {
     List<MetadataPartitionType> partitionTypes = Collections.singletonList(MetadataPartitionType.FUNCTIONAL_INDEX);
     checkArgument(partitionTypes.size() == 1, "Currently, only one index type can be scheduled at a time.");
     if (metaClient.getTableConfig().getMetadataPartitions().isEmpty()) {
       throw new HoodieException("Metadata table is not yet initialized. Initialize FILES partition before any other partition " + Arrays.toString(partitionTypes.toArray()));
     }
-    return client.scheduleIndexing(partitionTypes);
+    return client.scheduleIndexing(partitionTypes, Collections.singletonList(indexName));
   }
 
   private static boolean indexExists(HoodieTableMetaClient metaClient, String indexName) {
@@ -120,11 +124,25 @@ private static boolean indexExists(HoodieTableMetaClient metaClient, String inde
   private static Map<String, String> buildWriteConfig(HoodieTableMetaClient metaClient, HoodieFunctionalIndexDefinition indexDefinition) {
     Map<String, String> writeConfig = new HashMap<>();
     if (metaClient.getTableConfig().isMetadataTableAvailable()) {
-      if (!writeConfig.containsKey(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key())) {
-        writeConfig.put(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name());
-        writeConfig.putAll(JavaConverters.mapAsJavaMapConverter(HoodieCLIUtils.getLockOptions(metaClient.getBasePathV2().toString())).asJava());
-      }
+      writeConfig.put(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name());
+      writeConfig.putAll(JavaConverters.mapAsJavaMapConverter(HoodieCLIUtils.getLockOptions(metaClient.getBasePathV2().toString())).asJava());
+
+      // [HUDI-7472] Ensure write-config contains the existing MDT partition to prevent those from getting deleted
+      metaClient.getTableConfig().getMetadataPartitions().forEach(partitionPath -> {
+        if (partitionPath.equals(MetadataPartitionType.RECORD_INDEX.getPartitionPath())) {
+          writeConfig.put(RECORD_INDEX_ENABLE_PROP.key(), "true");
+        }
+
+        if (partitionPath.equals(MetadataPartitionType.BLOOM_FILTERS.getPartitionPath())) {
+          writeConfig.put(ENABLE_METADATA_INDEX_BLOOM_FILTER.key(), "true");
+        }
+
+        if (partitionPath.equals(MetadataPartitionType.COLUMN_STATS.getPartitionPath())) {
+          writeConfig.put(ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true");
+        }
+      });
     }
+
     HoodieFunctionalIndexConfig.fromIndexDefinition(indexDefinition).getProps().forEach((key, value) -> writeConfig.put(key.toString(), value.toString()));
     return writeConfig;
   }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index 47a7c61a60fa..f93209c98367 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -565,8 +565,6 @@ object DataSourceWriteOptions {
 
   val SET_NULL_FOR_MISSING_COLUMNS: ConfigProperty[String] = HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS
 
-  val MAKE_NEW_COLUMNS_NULLABLE: ConfigProperty[java.lang.Boolean] = HoodieCommonConfig.MAKE_NEW_COLUMNS_NULLABLE
-
   val SPARK_SQL_INSERT_INTO_OPERATION: ConfigProperty[String] = ConfigProperty
     .key("hoodie.spark.sql.insert.into.operation")
     .defaultValue(WriteOperationType.INSERT.value())
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
index c346f7665df5..be3d2f4ed4bf 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -74,7 +74,12 @@ class DefaultSource extends RelationProvider
   override def createRelation(sqlContext: SQLContext,
                               parameters: Map[String, String]): BaseRelation = {
     try {
-      createRelation(sqlContext, parameters, null)
+      val relation = createRelation(sqlContext, parameters, null)
+      if (relation.schema.isEmpty) {
+        new EmptyRelation(sqlContext, new StructType())
+      } else {
+        relation
+      }
     } catch {
       case _: HoodieSchemaNotFoundException => new EmptyRelation(sqlContext, new StructType())
       case e => throw e
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index 3444feaecff6..affed871cad9 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -63,7 +63,7 @@ import scala.util.{Failure, Success, Try}
  * who's directory level is 3).We can still read it as a partitioned table. We will mapping the
  * partition path (e.g. 2021/03/10) to the only partition column (e.g. "dt").
  *
- * 3、Else the the partition columns size is not equal to the partition directory level and the
+ * 3、Else the partition columns size is not equal to the partition directory level and the
  * size is great than "1" (e.g. partition column is "dt,hh", the partition path is "2021/03/10/12")
  * , we read it as a Non-Partitioned table because we cannot know how to mapping the partition
  * path with the partition columns in this case.
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala
index ed073ce4b174..9aeff64f2370 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala
@@ -21,10 +21,10 @@ package org.apache.hudi
 
 import org.apache.avro.Schema
 import org.apache.hudi.HoodieSparkSqlWriter.{CANONICALIZE_SCHEMA, SQL_MERGE_INTO_WRITES}
-import org.apache.hudi.avro.AvroSchemaUtils.{isCompatibleProjectionOf, isSchemaCompatible, isValidEvolutionOf}
+import org.apache.hudi.avro.AvroSchemaUtils.{checkSchemaCompatible, checkValidEvolution, isCompatibleProjectionOf, isSchemaCompatible}
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.avro.HoodieAvroUtils.removeMetadataFields
-import org.apache.hudi.common.config.HoodieConfig
+import org.apache.hudi.common.config.{HoodieConfig, TypedProperties}
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.config.HoodieWriteConfig
@@ -76,125 +76,131 @@ object HoodieSchemaUtils {
                          latestTableSchemaOpt: Option[Schema],
                          internalSchemaOpt: Option[InternalSchema],
                          opts: Map[String, String]): Schema = {
-    val setNullForMissingColumns = opts.getOrDefault(DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.key(),
-      DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.defaultValue).toBoolean
-    val shouldReconcileSchema = opts(DataSourceWriteOptions.RECONCILE_SCHEMA.key()).toBoolean
-    val shouldValidateSchemasCompatibility = opts.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key,
-      HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.defaultValue).toBoolean
-
     latestTableSchemaOpt match {
-      // In case table schema is empty we're just going to use the source schema as a
-      // writer's schema.
+      // If table schema is empty, then we use the source schema as a writer's schema.
       case None => AvroInternalSchemaConverter.fixNullOrdering(sourceSchema)
       // Otherwise, we need to make sure we reconcile incoming and latest table schemas
       case Some(latestTableSchemaWithMetaFields) =>
-        // NOTE: Meta-fields will be unconditionally injected by Hudi writing handles, for the sake of
-        //       deducing proper writer schema we're stripping them to make sure we can perform proper
-        //       analysis
-        //add call to fix null ordering to ensure backwards compatibility
+        // NOTE: Meta-fields will be unconditionally injected by Hudi writing handles, for the sake of deducing proper writer schema
+        //       we're stripping them to make sure we can perform proper analysis
+        // add call to fix null ordering to ensure backwards compatibility
         val latestTableSchema = AvroInternalSchemaConverter.fixNullOrdering(removeMetadataFields(latestTableSchemaWithMetaFields))
+
         // Before validating whether schemas are compatible, we need to "canonicalize" source's schema
         // relative to the table's one, by doing a (minor) reconciliation of the nullability constraints:
         // for ex, if in incoming schema column A is designated as non-null, but it's designated as nullable
         // in the table's one we want to proceed aligning nullability constraints w/ the table's schema
         // Also, we promote types to the latest table schema if possible.
-        val shouldCanonicalizeSchema = opts.getOrDefault(CANONICALIZE_SCHEMA.key,
-          CANONICALIZE_SCHEMA.defaultValue.toString).toBoolean
-        val mergeIntoWrites = opts.getOrDefault(SQL_MERGE_INTO_WRITES.key(),
-          SQL_MERGE_INTO_WRITES.defaultValue.toString).toBoolean
-
+        val shouldCanonicalizeSchema = opts.getOrDefault(CANONICALIZE_SCHEMA.key, CANONICALIZE_SCHEMA.defaultValue.toString).toBoolean
         val canonicalizedSourceSchema = if (shouldCanonicalizeSchema) {
           canonicalizeSchema(sourceSchema, latestTableSchema, opts)
         } else {
           AvroInternalSchemaConverter.fixNullOrdering(sourceSchema)
         }
 
-        val allowAutoEvolutionColumnDrop = opts.getOrDefault(HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.key,
-          HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.defaultValue).toBoolean
-
+        val shouldReconcileSchema = opts.getOrDefault(DataSourceWriteOptions.RECONCILE_SCHEMA.key(),
+          DataSourceWriteOptions.RECONCILE_SCHEMA.defaultValue().toString).toBoolean
         if (shouldReconcileSchema) {
-          internalSchemaOpt match {
-            case Some(internalSchema) =>
-              // Apply schema evolution, by auto-merging write schema and read schema
-              val mergedInternalSchema = AvroSchemaEvolutionUtils.reconcileSchema(canonicalizedSourceSchema, internalSchema)
-              val evolvedSchema = AvroInternalSchemaConverter.convert(mergedInternalSchema, latestTableSchema.getFullName)
-              val shouldRemoveMetaDataFromInternalSchema = sourceSchema.getFields().filter(f => f.name().equalsIgnoreCase(HoodieRecord.RECORD_KEY_METADATA_FIELD)).isEmpty
-              if (shouldRemoveMetaDataFromInternalSchema) HoodieAvroUtils.removeMetadataFields(evolvedSchema) else evolvedSchema
-            case None =>
-              // In case schema reconciliation is enabled we will employ (legacy) reconciliation
-              // strategy to produce target writer's schema (see definition below)
-              val (reconciledSchema, isCompatible) =
-                reconcileSchemasLegacy(latestTableSchema, canonicalizedSourceSchema)
-
-              // NOTE: In some cases we need to relax constraint of incoming dataset's schema to be compatible
-              //       w/ the table's one and allow schemas to diverge. This is required in cases where
-              //       partial updates will be performed (for ex, `MERGE INTO` Spark SQL statement) and as such
-              //       only incoming dataset's projection has to match the table's schema, and not the whole one
-              if (!shouldValidateSchemasCompatibility || isCompatible) {
-                reconciledSchema
-              } else {
-                log.error(
-                  s"""Failed to reconcile incoming batch schema with the table's one.
-                     |Incoming schema ${sourceSchema.toString(true)}
-                     |Incoming schema (canonicalized) ${canonicalizedSourceSchema.toString(true)}
-                     |Table's schema ${latestTableSchema.toString(true)}
-                     |""".stripMargin)
-                throw new SchemaCompatibilityException("Failed to reconcile incoming schema with the table's one")
-              }
-          }
+          deduceWriterSchemaWithReconcile(sourceSchema, canonicalizedSourceSchema, latestTableSchema, internalSchemaOpt, opts)
+        } else {
+          deduceWriterSchemaWithoutReconcile(sourceSchema, canonicalizedSourceSchema, latestTableSchema, opts)
+        }
+    }
+  }
+
+  /**
+   * Deducing with disabled reconciliation.
+   * We have to validate that the source's schema is compatible w/ the table's latest schema,
+   * such that we're able to read existing table's records using [[sourceSchema]].
+   */
+  private def deduceWriterSchemaWithoutReconcile(sourceSchema: Schema,
+                                                 canonicalizedSourceSchema: Schema,
+                                                 latestTableSchema: Schema,
+                                                 opts: Map[String, String]): Schema = {
+    // NOTE: In some cases we need to relax constraint of incoming dataset's schema to be compatible
+    //       w/ the table's one and allow schemas to diverge. This is required in cases where
+    //       partial updates will be performed (for ex, `MERGE INTO` Spark SQL statement) and as such
+    //       only incoming dataset's projection has to match the table's schema, and not the whole one
+    val mergeIntoWrites = opts.getOrDefault(SQL_MERGE_INTO_WRITES.key(), SQL_MERGE_INTO_WRITES.defaultValue.toString).toBoolean
+    val shouldValidateSchemasCompatibility = opts.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key,
+      HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.defaultValue).toBoolean
+    val allowAutoEvolutionColumnDrop = opts.getOrDefault(HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.key,
+      HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.defaultValue).toBoolean
+    val setNullForMissingColumns = opts.getOrDefault(DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.key(),
+      DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.defaultValue).toBoolean
+
+    if (!mergeIntoWrites && !shouldValidateSchemasCompatibility && !allowAutoEvolutionColumnDrop) {
+      // Default behaviour
+      val reconciledSchema = if (setNullForMissingColumns) {
+        AvroSchemaEvolutionUtils.reconcileSchema(canonicalizedSourceSchema, latestTableSchema)
+      } else {
+        canonicalizedSourceSchema
+      }
+      checkValidEvolution(reconciledSchema, latestTableSchema)
+      reconciledSchema
+    } else {
+      // If it's merge into writes, we don't check for projection nor schema compatibility. Writers down the line will take care of it.
+      // Or it's not merge into writes, and we don't validate schema, but we allow to drop columns automatically.
+      // Or it's not merge into writes, we validate schema, and schema is compatible.
+      if (shouldValidateSchemasCompatibility) {
+        checkSchemaCompatible(latestTableSchema, canonicalizedSourceSchema, true,
+          allowAutoEvolutionColumnDrop, java.util.Collections.emptySet())
+      }
+      canonicalizedSourceSchema
+    }
+  }
+
+  /**
+   * Deducing with enabled reconciliation.
+   * Marked as Deprecated.
+   */
+  private def deduceWriterSchemaWithReconcile(sourceSchema: Schema,
+                                              canonicalizedSourceSchema: Schema,
+                                              latestTableSchema: Schema,
+                                              internalSchemaOpt: Option[InternalSchema],
+                                              opts: Map[String, String]): Schema = {
+    internalSchemaOpt match {
+      case Some(internalSchema) =>
+        // Apply schema evolution, by auto-merging write schema and read schema
+        val mergedInternalSchema = AvroSchemaEvolutionUtils.reconcileSchema(canonicalizedSourceSchema, internalSchema)
+        val evolvedSchema = AvroInternalSchemaConverter.convert(mergedInternalSchema, latestTableSchema.getFullName)
+        val shouldRemoveMetaDataFromInternalSchema = sourceSchema.getFields().filter(f => f.name().equalsIgnoreCase(HoodieRecord.RECORD_KEY_METADATA_FIELD)).isEmpty
+        if (shouldRemoveMetaDataFromInternalSchema) HoodieAvroUtils.removeMetadataFields(evolvedSchema) else evolvedSchema
+      case None =>
+        // In case schema reconciliation is enabled we will employ (legacy) reconciliation
+        // strategy to produce target writer's schema (see definition below)
+        val (reconciledSchema, isCompatible) =
+          reconcileSchemasLegacy(latestTableSchema, canonicalizedSourceSchema)
+
+        // NOTE: In some cases we need to relax constraint of incoming dataset's schema to be compatible
+        //       w/ the table's one and allow schemas to diverge. This is required in cases where
+        //       partial updates will be performed (for ex, `MERGE INTO` Spark SQL statement) and as such
+        //       only incoming dataset's projection has to match the table's schema, and not the whole one
+        val shouldValidateSchemasCompatibility = opts.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key, HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.defaultValue).toBoolean
+        if (!shouldValidateSchemasCompatibility || isCompatible) {
+          reconciledSchema
         } else {
-          // In case reconciliation is disabled, we have to validate that the source's schema
-          // is compatible w/ the table's latest schema, such that we're able to read existing table's
-          // records using [[sourceSchema]].
-          //
-          // NOTE: In some cases we need to relax constraint of incoming dataset's schema to be compatible
-          //       w/ the table's one and allow schemas to diverge. This is required in cases where
-          //       partial updates will be performed (for ex, `MERGE INTO` Spark SQL statement) and as such
-          //       only incoming dataset's projection has to match the table's schema, and not the whole one
-
-          if (mergeIntoWrites) {
-            // if its merge into writes, do not check for projection nor schema compatibility. Writers down the line will
-            // take care of it.
-            canonicalizedSourceSchema
-          } else {
-            if (!shouldValidateSchemasCompatibility) {
-              // if no validation is enabled, check for col drop
-              if (allowAutoEvolutionColumnDrop) {
-                canonicalizedSourceSchema
-              } else {
-                val reconciledSchema = if (setNullForMissingColumns) {
-                  AvroSchemaEvolutionUtils.reconcileSchema(canonicalizedSourceSchema, latestTableSchema)
-                } else {
-                  canonicalizedSourceSchema
-                }
-                if (isValidEvolutionOf(reconciledSchema, latestTableSchema)) {
-                  reconciledSchema
-                } else {
-                  log.error(
-                    s"""Incoming batch schema is not compatible with the table's one.
-                       |Incoming schema ${sourceSchema.toString(true)}
-                       |Incoming schema (canonicalized) ${reconciledSchema.toString(true)}
-                       |Table's schema ${latestTableSchema.toString(true)}
-                       |""".stripMargin)
-                  throw new SchemaCompatibilityException("Incoming batch schema is not compatible with the table's one")
-                }
-              }
-            } else if (isSchemaCompatible(latestTableSchema, canonicalizedSourceSchema, allowAutoEvolutionColumnDrop)) {
-              canonicalizedSourceSchema
-            } else {
-              log.error(
-                s"""Incoming batch schema is not compatible with the table's one.
-                   |Incoming schema ${sourceSchema.toString(true)}
-                   |Incoming schema (canonicalized) ${canonicalizedSourceSchema.toString(true)}
-                   |Table's schema ${latestTableSchema.toString(true)}
-                   |""".stripMargin)
-              throw new SchemaCompatibilityException("Incoming batch schema is not compatible with the table's one")
-            }
-          }
+          log.error(
+            s"""Failed to reconcile incoming batch schema with the table's one.
+               |Incoming schema ${sourceSchema.toString(true)}
+               |Incoming schema (canonicalized) ${canonicalizedSourceSchema.toString(true)}
+               |Table's schema ${latestTableSchema.toString(true)}
+               |""".stripMargin)
+          throw new SchemaCompatibilityException("Failed to reconcile incoming schema with the table's one")
         }
     }
   }
 
+  def deduceWriterSchema(sourceSchema: Schema,
+                         latestTableSchemaOpt: org.apache.hudi.common.util.Option[Schema],
+                         internalSchemaOpt: org.apache.hudi.common.util.Option[InternalSchema],
+                         props: TypedProperties): Schema = {
+    deduceWriterSchema(sourceSchema,
+      HoodieConversionUtils.toScalaOption(latestTableSchemaOpt),
+      HoodieConversionUtils.toScalaOption(internalSchemaOpt),
+      HoodieConversionUtils.fromProperties(props))
+  }
+
   /**
    * Canonicalizes [[sourceSchema]] by reconciling it w/ [[latestTableSchema]] in following
    *
@@ -206,7 +212,7 @@ object HoodieSchemaUtils {
    * TODO support casing reconciliation
    */
   private def canonicalizeSchema(sourceSchema: Schema, latestTableSchema: Schema, opts : Map[String, String]): Schema = {
-    reconcileSchemaRequirements(sourceSchema, latestTableSchema, opts)
+    reconcileSchemaRequirements(sourceSchema, latestTableSchema)
   }
 
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index 00ec59c5b8fd..98c7c5d29f56 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -51,7 +51,7 @@ import org.apache.hudi.common.util.{CommitUtils, StringUtils, Option => HOption}
 import org.apache.hudi.config.HoodieBootstrapConfig.{BASE_PATH, INDEX_CLASS_NAME}
 import org.apache.hudi.config.HoodieWriteConfig.SPARK_SQL_MERGE_INTO_PREPPED_KEY
 import org.apache.hudi.config.{HoodieCompactionConfig, HoodieInternalConfig, HoodieWriteConfig}
-import org.apache.hudi.exception.{HoodieException, HoodieWriteConflictException}
+import org.apache.hudi.exception.{HoodieException, HoodieRecordCreationException, HoodieWriteConflictException}
 import org.apache.hudi.hive.{HiveSyncConfigHolder, HiveSyncTool}
 import org.apache.hudi.internal.schema.InternalSchema
 import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
@@ -78,6 +78,7 @@ import java.util.function.BiConsumer
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters.setAsJavaSetConverter
 import scala.collection.mutable
+import scala.util.{Failure, Success, Try}
 
 object HoodieSparkSqlWriter {
 
@@ -132,21 +133,6 @@ object HoodieSparkSqlWriter {
     new HoodieSparkSqlWriterInternal().bootstrap(sqlContext, mode, optParams, df, hoodieTableConfigOpt, streamingWritesParamsOpt, hoodieWriteClient)
   }
 
-  /**
-   * Deduces writer's schema based on
-   * <ul>
-   * <li>Source's schema</li>
-   * <li>Target table's schema (including Hudi's [[InternalSchema]] representation)</li>
-   * </ul>
-   */
-  def deduceWriterSchema(sourceSchema: Schema,
-                         latestTableSchemaOpt: Option[Schema],
-                         internalSchemaOpt: Option[InternalSchema],
-                         props: TypedProperties): Schema = {
-    HoodieSchemaUtils.deduceWriterSchema(sourceSchema, latestTableSchemaOpt,
-      internalSchemaOpt, HoodieConversionUtils.fromProperties(props))
-  }
-
   def cleanup(): Unit = {
     Metrics.shutdownAllMetrics()
   }
@@ -493,10 +479,13 @@ class HoodieSparkSqlWriterInternal {
             }
             instantTime = client.createNewInstantTime()
             // Convert to RDD[HoodieRecord]
-            val hoodieRecords =
-              HoodieCreateRecordUtils.createHoodieRecordRdd(HoodieCreateRecordUtils.createHoodieRecordRddArgs(df,
-                writeConfig, parameters, avroRecordName, avroRecordNamespace, writerSchema,
-                processedDataSchema, operation, instantTime, preppedSparkSqlWrites, preppedSparkSqlMergeInto, preppedWriteOperation))
+            val hoodieRecords = Try(HoodieCreateRecordUtils.createHoodieRecordRdd(
+              HoodieCreateRecordUtils.createHoodieRecordRddArgs(df, writeConfig, parameters, avroRecordName,
+                avroRecordNamespace, writerSchema, processedDataSchema, operation, instantTime, preppedSparkSqlWrites,
+                preppedSparkSqlMergeInto, preppedWriteOperation))) match {
+              case Success(recs) => recs
+              case Failure(e) => throw new HoodieRecordCreationException("Failed to create Hoodie Spark Record", e)
+            }
 
             val dedupedHoodieRecords =
               if (hoodieConfig.getBoolean(INSERT_DROP_DUPS) && operation != WriteOperationType.INSERT_OVERWRITE_TABLE && operation != WriteOperationType.INSERT_OVERWRITE) {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
index 133f641d280b..63495b0eede6 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
@@ -83,7 +83,6 @@ object HoodieWriterUtils {
     hoodieConfig.setDefaultValue(ASYNC_CLUSTERING_ENABLE)
     hoodieConfig.setDefaultValue(ENABLE_ROW_WRITER)
     hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
-    hoodieConfig.setDefaultValue(MAKE_NEW_COLUMNS_NULLABLE)
     hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
     hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
     Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ DataSourceOptionsHelper.translateConfigurations(parameters)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
index 7b25fd9a8c73..243cf3db550e 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
@@ -26,9 +26,10 @@ import org.apache.hudi.common.table.timeline.TimelineUtils.HollowCommitHandling.
 import org.apache.hudi.common.table.timeline.TimelineUtils.{HollowCommitHandling, concatTimeline, getCommitMetadata, handleHollowCommitIfNeeded}
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
+import org.apache.hudi.metadata.HoodieTableMetadataUtil.getWritePartitionPaths
 import org.apache.hudi.common.util.StringUtils
 import org.apache.hudi.exception.HoodieException
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.{getWritePartitionPaths, listAffectedFilesForCommits}
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.listAffectedFilesForCommits
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
index 764ce69795d9..894405aec45d 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
@@ -162,7 +162,10 @@ class RecordLevelIndexSupport(spark: SparkSession,
       case inQuery: In =>
         var validINQuery = true
         inQuery.value match {
-          case _: AttributeReference =>
+          case attribute: AttributeReference =>
+            if (!attributeMatchesRecordKey(attribute.name)) {
+              validINQuery = false
+            }
           case _ => validINQuery = false
         }
         var literals: List[String] = List.empty
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
index c1414fe77fed..bfd5613feba9 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
@@ -159,11 +159,6 @@ class HoodieCatalogTable(val spark: SparkSession, var table: CatalogTable) exten
     StructType(tableSchema.filterNot(f => partitionFields.contains(f.name)))
   }
 
-  /**
-   * The schema of data fields not including hoodie meta fields
-   */
-  lazy val dataSchemaWithoutMetaFields: StructType = removeMetaFields(dataSchema)
-
   /**
    * The schema of partition fields
    */
@@ -173,7 +168,7 @@ class HoodieCatalogTable(val spark: SparkSession, var table: CatalogTable) exten
    * All the partition paths, excludes lazily deleted partitions.
    */
   def getPartitionPaths: Seq[String] = {
-    val droppedPartitions = TimelineUtils.getDroppedPartitions(metaClient.getActiveTimeline)
+    val droppedPartitions = TimelineUtils.getDroppedPartitions(metaClient, org.apache.hudi.common.util.Option.empty(), org.apache.hudi.common.util.Option.empty())
 
     getAllPartitionPaths(spark, table)
       .filter(!droppedPartitions.contains(_))
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
index cc2d8903a162..46a004808088 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
@@ -155,7 +155,8 @@ object HoodieOptionConfig {
   def mapSqlOptionsToTableConfigs(options: Map[String, String]): Map[String, String] = {
     options.map { case (k, v) =>
       if (sqlOptionKeyToTableConfigKey.contains(k)) {
-        sqlOptionKeyToTableConfigKey(k) -> sqlOptionValueToHoodieConfigValue.getOrElse(v, v)
+        // support table type incase-sensitive
+        sqlOptionKeyToTableConfigKey(k) -> sqlOptionValueToHoodieConfigValue.getOrElse(v.toLowerCase, v)
       } else {
         k -> v
       }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
index 250067d4b847..a4003bbd4807 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
@@ -22,7 +22,7 @@ import org.apache.hudi.{DataSourceWriteOptions, HoodieFileIndex}
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.HoodieConversionUtils.toProperties
 import org.apache.hudi.common.config.{DFSPropertiesConfiguration, TypedProperties}
-import org.apache.hudi.common.model.{OverwriteWithLatestAvroPayload, WriteOperationType}
+import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, WriteOperationType}
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
 import org.apache.hudi.config.{HoodieIndexConfig, HoodieInternalConfig, HoodieWriteConfig}
@@ -44,8 +44,8 @@ import org.apache.spark.sql.hudi.command.{SqlKeyGenerator, ValidateDuplicateKeyP
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PARTITION_OVERWRITE_MODE
 import org.apache.spark.sql.types.StructType
-
 import java.util.Locale
+
 import scala.collection.JavaConverters._
 
 trait ProvidesHoodieConfig extends Logging {
@@ -102,7 +102,7 @@ trait ProvidesHoodieConfig extends Logging {
       // Validate duplicate key for inserts to COW table when using strict insert mode.
       classOf[ValidateDuplicateKeyPayload].getCanonicalName
     } else {
-      classOf[OverwriteWithLatestAvroPayload].getCanonicalName
+      classOf[DefaultHoodieRecordPayload].getCanonicalName
     }
   }
 
@@ -276,7 +276,7 @@ trait ProvidesHoodieConfig extends Logging {
       if (insertDupPolicy == FAIL_INSERT_DUP_POLICY) {
         classOf[ValidateDuplicateKeyPayload].getCanonicalName
       } else {
-        classOf[OverwriteWithLatestAvroPayload].getCanonicalName
+        classOf[DefaultHoodieRecordPayload].getCanonicalName
       }
     }
 
@@ -480,6 +480,8 @@ trait ProvidesHoodieConfig extends Logging {
       hiveSyncConfig.setValue(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS, props.getString(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key))
     }
     hiveSyncConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS, classOf[MultiPartKeysValueExtractor].getName)
+    // This is hardcoded to true to ensure consistency as Spark syncs TIMESTAMP types as TIMESTAMP by default
+    // via Spark's externalCatalog API, which is used by AlterHoodieTableCommand.
     hiveSyncConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE, "true")
     if (hiveSyncConfig.useBucketSync())
       hiveSyncConfig.setValue(HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC,
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
index 3db9742aaf0c..a857c3a5ded0 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
@@ -145,12 +145,17 @@ object CreateHoodieTableCommand {
 
     val partitionColumnNames = hoodieCatalogTable.partitionSchema.map(_.name)
     // Remove some properties should not be used;append pk, preCombineKey, type to the properties of table
-    val newTblProperties =
+    var newTblProperties =
       hoodieCatalogTable.catalogProperties.--(needFilterProps) ++ HoodieOptionConfig.extractSqlOptions(properties)
+
+    // Add provider -> hudi as a table property
+    newTblProperties = newTblProperties + ("provider" -> "hudi")
+
     val newTable = table.copy(
       identifier = newTableIdentifier,
       storage = newStorage,
       schema = hoodieCatalogTable.tableSchema,
+      provider = Some("hudi"),
       partitionColumnNames = partitionColumnNames,
       createVersion = SPARK_VERSION,
       properties = newTblProperties
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
index 04f1fbd5ba04..740ac6758685 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
@@ -49,7 +49,7 @@ class SqlKeyGenerator(props: TypedProperties) extends BuiltinKeyGenerator(props)
     }
   }
 
-  private lazy val autoRecordKeyGen = KeyGenUtils.enableAutoGenerateRecordKeys(props)
+  private lazy val autoRecordKeyGen = KeyGenUtils.isAutoGeneratedRecordKeysEnabled(props)
   private lazy val complexKeyGen = if (autoRecordKeyGen) {
     new AutoRecordGenWrapperKeyGenerator(props, new ComplexKeyGenerator(props))
   } else {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/TestHoodieSchemaUtils.java b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/TestHoodieSchemaUtils.java
new file mode 100644
index 000000000000..b10d0cfa9929
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/TestHoodieSchemaUtils.java
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hudi.common.config.HoodieCommonConfig;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieNullSchemaTypeException;
+import org.apache.hudi.exception.MissingSchemaFieldException;
+import org.apache.hudi.exception.SchemaBackwardsCompatibilityException;
+
+import org.apache.avro.Schema;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestHoodieSchemaUtils {
+
+  @Test
+  void testSchemaWithNullField() {
+    Schema withNullfield = createRecord("nullRecord", createPrimitiveField("nullField", Schema.Type.NULL));
+    assertThrows(HoodieNullSchemaTypeException.class,
+        () -> deduceWriterSchema(withNullfield, null));
+  }
+
+  @Test
+  void testSimplePromotionWithComplexFields() {
+    Schema start = createRecord("simple", createPrimitiveField("f", Schema.Type.INT));
+    Schema end = createRecord("simple", createPrimitiveField("f", Schema.Type.LONG));
+    assertEquals(end, deduceWriterSchema(end, start));
+
+    start = createRecord("nested", createNestedField("f", Schema.Type.INT));
+    end = createRecord("nested", createNestedField("f", Schema.Type.LONG));
+    assertEquals(end, deduceWriterSchema(end, start));
+
+    start = createRecord("arrayRec", createArrayField("f", Schema.Type.INT));
+    end = createRecord("arrayRec", createArrayField("f", Schema.Type.LONG));
+    assertEquals(end, deduceWriterSchema(end, start));
+
+    start = createRecord("mapRec", createMapField("f", Schema.Type.INT));
+    end = createRecord("mapRec", createMapField("f", Schema.Type.LONG));
+    assertEquals(end, deduceWriterSchema(end, start));
+  }
+
+  @Test
+  void testAllowedTypePromotions() {
+    Schema.Type[] promotionTypes = new Schema.Type[]{Schema.Type.INT, Schema.Type.LONG, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.STRING, Schema.Type.BYTES};
+    Map<Schema.Type, Pair<Integer,Integer>> allowedPromotions = new HashMap<>();
+    //allowedPromotions.key can be promoted to any type in the range allowedPromotions.value
+    allowedPromotions.put(Schema.Type.INT, Pair.of(0, 4));
+    allowedPromotions.put(Schema.Type.LONG, Pair.of(1, 4));
+    allowedPromotions.put(Schema.Type.FLOAT, Pair.of(2, 4));
+    allowedPromotions.put(Schema.Type.DOUBLE, Pair.of(3, 4));
+    allowedPromotions.put(Schema.Type.STRING, Pair.of(4, 4));
+    allowedPromotions.put(Schema.Type.BYTES, Pair.of(5, 5));
+
+    Map<Schema.Type, Schema> schemaMap = new HashMap<>();
+    for (Schema.Type type : promotionTypes) {
+      schemaMap.put(type, createRecord("rec",
+          createPrimitiveField("simpleField", type),
+          createArrayField("arrayField", type),
+          createMapField("mapField", type),
+          createNestedField("nestedField", type)));
+    }
+
+    for (int i = 0; i < promotionTypes.length; i++) {
+      Schema startSchema = schemaMap.get(promotionTypes[i]);
+      Pair<Integer,Integer> minMax = allowedPromotions.get(promotionTypes[i]);
+      for (int j = minMax.getLeft(); j <= minMax.getRight(); j++) {
+        Schema endSchema = schemaMap.get(promotionTypes[j]);
+        assertEquals(endSchema, deduceWriterSchema(endSchema, startSchema));
+      }
+    }
+  }
+
+  @Test
+  void testReversePromotions() {
+    Schema.Type[] promotionTypes = new Schema.Type[]{Schema.Type.INT, Schema.Type.LONG, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.STRING, Schema.Type.BYTES};
+    Map<Schema.Type, Pair<Integer,Integer>> reversePromotions = new HashMap<>();
+    //Incoming data types in the range reversePromotions.value will be promoted to reversePromotions.key
+    //if reversePromotions.key is the current table schema
+    reversePromotions.put(Schema.Type.INT, Pair.of(0, 0));
+    reversePromotions.put(Schema.Type.LONG, Pair.of(0, 1));
+    reversePromotions.put(Schema.Type.FLOAT, Pair.of(0, 2));
+    reversePromotions.put(Schema.Type.DOUBLE, Pair.of(0, 3));
+    reversePromotions.put(Schema.Type.STRING, Pair.of(0, 5));
+    reversePromotions.put(Schema.Type.BYTES, Pair.of(4, 5));
+
+    Map<Schema.Type, Schema> schemaMap = new HashMap<>();
+    for (Schema.Type type : promotionTypes) {
+      schemaMap.put(type, createRecord("rec",
+          createPrimitiveField("simpleField", type),
+          createArrayField("arrayField", type),
+          createMapField("mapField", type),
+          createNestedField("nestedField", type)));
+    }
+
+    for (int i = 0; i < promotionTypes.length; i++) {
+      Schema startSchema = schemaMap.get(promotionTypes[i]);
+      Pair<Integer,Integer> minMax = reversePromotions.get(promotionTypes[i]);
+      for (int j = minMax.getLeft(); j <= minMax.getRight(); j++) {
+        Schema endSchema = schemaMap.get(promotionTypes[j]);
+        assertEquals(startSchema, deduceWriterSchema(endSchema, startSchema));
+      }
+    }
+  }
+
+  @Test
+  void testIllegalPromotionsBetweenPrimitives() {
+    Schema.Type[] promotionTypes = new Schema.Type[]{Schema.Type.INT, Schema.Type.LONG, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.BYTES};
+    Map<Schema.Type, Schema> schemaMap = new HashMap<>();
+    for (Schema.Type type : promotionTypes) {
+      schemaMap.put(type, createRecord("rec",
+          createPrimitiveField("simpleField", type),
+          createArrayField("arrayField", type),
+          createMapField("mapField", type),
+          createNestedField("nestedField", type)));
+    }
+
+    String[] fieldNames = new String[]{"rec.simpleField", "rec.arrayField.element", "rec.mapField.value", "rec.nestedField.nested"};
+    //int, long, float, double can't be promoted to bytes
+    for (int i = 0; i < 4; i++) {
+      Schema startSchema = schemaMap.get(promotionTypes[i]);
+      Schema endSchema = schemaMap.get(Schema.Type.BYTES);
+      Throwable t = assertThrows(SchemaBackwardsCompatibilityException.class,
+          () -> deduceWriterSchema(endSchema, startSchema));
+      String baseString = String.format("TYPE_MISMATCH: reader type 'BYTES' not compatible with writer type '%s' for field '%%s'",
+          promotionTypes[i].getName().toUpperCase());
+      for (String fieldName : fieldNames) {
+        assertTrue(t.getMessage().contains(String.format(baseString, fieldName)));
+      }
+    }
+  }
+
+  @Test
+  void testIllegalPromotionsBetweenComplexFields() {
+    String[] typeNames = new String[]{"INT", "ARRAY", "MAP", "RECORD"};
+    Schema[] fieldTypes = new Schema[]{createRecord("rec", createPrimitiveField("testField", Schema.Type.INT)),
+        createRecord("rec", createArrayField("testField", Schema.Type.INT)),
+        createRecord("rec", createMapField("testField", Schema.Type.INT)),
+        createRecord("rec", createNestedField("testField", Schema.Type.INT))};
+
+    for (int i = 0; i < fieldTypes.length; i++) {
+      for (int j = 0; j < fieldTypes.length; j++) {
+        if (i != j) {
+          Schema startSchema = fieldTypes[i];
+          Schema endSchema = fieldTypes[j];
+          Throwable t = assertThrows(SchemaBackwardsCompatibilityException.class,
+              () -> deduceWriterSchema(startSchema, endSchema));
+          String errorMessage = String.format("Schema validation backwards compatibility check failed with the following issues: "
+              + "{TYPE_MISMATCH: reader type '%s' not compatible with writer type '%s' for field 'rec.testField'}", typeNames[i], typeNames[j]);
+          assertTrue(t.getMessage().startsWith(errorMessage));
+        }
+      }
+    }
+  }
+
+  @ParameterizedTest
+  @ValueSource(booleans =  {true, false})
+  void testMissingColumn(boolean allowDroppedColumns) {
+    //simple case
+    Schema start = createRecord("missingSimpleField",
+        createPrimitiveField("field1", Schema.Type.INT),
+        createPrimitiveField("field2", Schema.Type.INT),
+        createPrimitiveField("field3", Schema.Type.INT));
+    Schema end = createRecord("missingSimpleField",
+        createPrimitiveField("field1", Schema.Type.INT),
+        createPrimitiveField("field3", Schema.Type.INT));
+    try {
+      assertEquals(start, deduceWriterSchema(end, start, allowDroppedColumns));
+      assertTrue(allowDroppedColumns);
+    } catch (MissingSchemaFieldException e) {
+      assertFalse(allowDroppedColumns);
+      assertTrue(e.getMessage().contains("missingSimpleField.field2"));
+    }
+
+    //complex case
+    start = createRecord("missingComplexField",
+        createPrimitiveField("field1", Schema.Type.INT),
+        createPrimitiveField("field2", Schema.Type.INT),
+        createArrayField("field3", createRecord("nestedRecord",
+                createPrimitiveField("nestedField1", Schema.Type.INT),
+                createPrimitiveField("nestedField2", Schema.Type.INT),
+                createPrimitiveField("nestedField3", Schema.Type.INT))),
+        createPrimitiveField("field4", Schema.Type.INT));
+    end = createRecord("missingComplexField",
+        createPrimitiveField("field1", Schema.Type.INT),
+        createPrimitiveField("field2", Schema.Type.INT),
+        createPrimitiveField("field4", Schema.Type.INT));
+    try {
+      assertEquals(start, deduceWriterSchema(end, start, allowDroppedColumns));
+      assertTrue(allowDroppedColumns);
+    } catch (MissingSchemaFieldException e) {
+      assertFalse(allowDroppedColumns);
+      assertTrue(e.getMessage().contains("missingComplexField.field3"));
+    }
+
+    //partial missing field
+    end = createRecord("missingComplexField",
+        createPrimitiveField("field1", Schema.Type.INT),
+        createArrayField("field3", createRecord("nestedRecord",
+            createPrimitiveField("nestedField2", Schema.Type.INT),
+            createPrimitiveField("nestedField3", Schema.Type.INT))),
+        createPrimitiveField("field4", Schema.Type.INT));
+    try {
+      assertEquals(start, deduceWriterSchema(end, start, allowDroppedColumns));
+      assertTrue(allowDroppedColumns);
+    } catch (MissingSchemaFieldException e) {
+      assertFalse(allowDroppedColumns);
+      assertTrue(e.getMessage().contains("missingComplexField.field3.element.nestedRecord.nestedField1"));
+      assertTrue(e.getMessage().contains("missingComplexField.field2"));
+    }
+  }
+
+  private static Schema deduceWriterSchema(Schema incomingSchema, Schema latestTableSchema) {
+    return deduceWriterSchema(incomingSchema, latestTableSchema, false);
+  }
+
+  private static final TypedProperties TYPED_PROPERTIES = new TypedProperties();
+
+  private static Schema deduceWriterSchema(Schema incomingSchema, Schema latestTableSchema, Boolean addNull) {
+    TYPED_PROPERTIES.setProperty(HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS.key(), addNull.toString());
+    return HoodieSchemaUtils.deduceWriterSchema(incomingSchema, Option.ofNullable(latestTableSchema),
+        Option.empty(), TYPED_PROPERTIES);
+  }
+
+  private static Schema.Field createNestedField(String name, Schema.Type type) {
+    return createNestedField(name, Schema.create(type));
+  }
+
+  private static Schema.Field createNestedField(String name, Schema schema) {
+    return new Schema.Field(name, createRecord(name, new Schema.Field("nested", schema, null, null)), null, null);
+  }
+
+  private static Schema.Field createArrayField(String name, Schema.Type type) {
+    return createArrayField(name, Schema.create(type));
+  }
+
+  private static Schema.Field createArrayField(String name, Schema schema) {
+    return new Schema.Field(name, Schema.createArray(schema), null, null);
+  }
+
+  private static Schema.Field createMapField(String name, Schema.Type type) {
+    return createMapField(name, Schema.create(type));
+  }
+
+  private static Schema.Field createMapField(String name, Schema schema) {
+    return new Schema.Field(name, Schema.createMap(schema), null, null);
+  }
+
+  private static Schema.Field createPrimitiveField(String name, Schema.Type type) {
+    return new Schema.Field(name, Schema.create(type), null, null);
+  }
+  
+  private static Schema createRecord(String name, Schema.Field... fields) {
+    return Schema.createRecord(name, null, null, false, Arrays.asList(fields));
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala
index 8e7f6bf14b7e..c9052a952e68 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.execution.datasources
 
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest
 import org.apache.spark.sql.SparkSession
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Test
@@ -31,9 +32,7 @@ class TestHoodieInMemoryFileIndex {
   @Test
   def testCreateInMemoryIndex(@TempDir tempDir: File): Unit = {
     val spark = SparkSession.builder
-      .appName("Hoodie Datasource test")
-      .master("local[2]")
-      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .config(getSparkConfForTest("Hoodie Datasource test"))
       .getOrCreate
 
     val folders: Seq[Path] = Seq(
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
index 9bb6fb6db8db..e12aad789d78 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
@@ -91,6 +91,7 @@ object HoodieProcedures {
       ,(ShowTablePropertiesProcedure.NAME, ShowTablePropertiesProcedure.builder)
       ,(HelpProcedure.NAME, HelpProcedure.builder)
       ,(ArchiveCommitsProcedure.NAME, ArchiveCommitsProcedure.builder)
+      ,(RunTTLProcedure.NAME, RunTTLProcedure.builder)
     )
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunTTLProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunTTLProcedure.scala
new file mode 100644
index 000000000000..2d3e704ad129
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunTTLProcedure.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.HoodieCLIUtils
+import org.apache.hudi.client.SparkRDDWriteClient
+import org.apache.hudi.config.HoodieTTLConfig
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType}
+
+import java.util.function.Supplier
+import scala.collection.JavaConverters._
+
+class RunTTLProcedure extends BaseProcedure with ProcedureBuilder with Logging {
+
+  private val PARAMETERS = Array[ProcedureParameter](
+    ProcedureParameter.required(0, "table", DataTypes.StringType),
+    ProcedureParameter.optional(1, "ttl_policy", DataTypes.StringType),
+    ProcedureParameter.optional(2, "retain_days", DataTypes.IntegerType),
+    ProcedureParameter.optional(3, "options", DataTypes.StringType)
+  )
+
+  private val OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("deleted_partitions", DataTypes.StringType, nullable = true, Metadata.empty)
+  ))
+
+  override def build: Procedure = new RunTTLProcedure
+
+  /**
+   * Returns the input parameters of this procedure.
+   */
+  override def parameters: Array[ProcedureParameter] = PARAMETERS
+
+  /**
+   * Returns the type of rows produced by this procedure.
+   */
+  override def outputType: StructType = OUTPUT_TYPE
+
+  override def call(args: ProcedureArgs): Seq[Row] = {
+    super.checkArgs(PARAMETERS, args)
+
+    val tableName = getArgValueOrDefault(args, PARAMETERS(0))
+    var confs: Map[String, String] = Map.empty
+    if (getArgValueOrDefault(args, PARAMETERS(1)).isDefined) {
+      confs += HoodieTTLConfig.PARTITION_TTL_STRATEGY_TYPE.key() -> getArgValueOrDefault(args, PARAMETERS(1)).get.toString
+    }
+    if (getArgValueOrDefault(args, PARAMETERS(2)).isDefined) {
+      confs += HoodieTTLConfig.DAYS_RETAIN.key() -> getArgValueOrDefault(args, PARAMETERS(2)).get.toString
+    }
+    if (getArgValueOrDefault(args, PARAMETERS(3)).isDefined) {
+      confs ++= HoodieCLIUtils.extractOptions(getArgValueOrDefault(args, PARAMETERS(3)).get.asInstanceOf[String])
+    }
+
+    val basePath = getBasePath(tableName, Option.empty)
+
+    var client: SparkRDDWriteClient[_] = null
+    try {
+      client = HoodieCLIUtils.createHoodieWriteClient(sparkSession, basePath, confs,
+        tableName.asInstanceOf[Option[String]])
+      val ttlInstantTime = client.createNewInstantTime()
+      val hoodieTTLMeta = client.managePartitionTTL(ttlInstantTime)
+      if (hoodieTTLMeta == null) {
+        Seq.empty
+      } else {
+        hoodieTTLMeta.getPartitionToReplaceFileIds.keySet().asScala.map { p =>
+          Row(p)
+        }.toSeq
+      }
+    } finally {
+      if (client != null) {
+        client.close()
+      }
+    }
+  }
+}
+
+object RunTTLProcedure {
+  val NAME = "run_ttl"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get() = new RunTTLProcedure
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
index f79d54ed0d2f..806f77544231 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
@@ -98,7 +98,8 @@ public void testHiveReadSchemaEvolutionTable(String tableType) throws Exception
 
       spark.sql("set hoodie.schema.on.read.enable=true");
       spark.sql(String.format("create table %s (col0 int, col1 float, col2 string) using hudi "
-              + "tblproperties (type='%s', primaryKey='col0', preCombineField='col1') location '%s'",
+              + "tblproperties (type='%s', primaryKey='col0', preCombineField='col1', "
+              + "hoodie.compaction.payload.class='org.apache.hudi.common.model.OverwriteWithLatestAvroPayload') location '%s'",
           tableName, tableType, path));
       spark.sql(String.format("insert into %s values(1, 1.1, 'text')", tableName));
       spark.sql(String.format("update %s set col2 = 'text2' where col0 = 1", tableName));
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
index fb4559263125..0e4dc22b8ce7 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
@@ -131,6 +131,7 @@ private HoodieRowParquetWriteSupport getWriteSupport(HoodieWriteConfig.Builder w
         writeConfig.getBloomFilterFPP(),
         writeConfig.getDynamicBloomFilterMaxNumEntries(),
         writeConfig.getBloomFilterType());
-    return new HoodieRowParquetWriteSupport(hadoopConf, SparkDatasetTestUtils.STRUCT_TYPE, Option.of(filter), writeConfig.getStorageConfig());
+    return HoodieRowParquetWriteSupport.getHoodieRowParquetWriteSupport(hadoopConf,
+        SparkDatasetTestUtils.STRUCT_TYPE, Option.of(filter), writeConfig);
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
index 296cf3d6e0db..2fa09861d25c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
@@ -78,7 +78,7 @@ public void testNullPartitionPathFields() {
   @Test
   public void testNullRecordKeyFields() {
     GenericRecord record = getRecord();
-    Assertions.assertThrows(StringIndexOutOfBoundsException.class, () ->   {
+    Assertions.assertThrows(HoodieKeyException.class, () ->   {
       ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp());
       keyGenerator.getRecordKey(record);
     });
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
index df69279cc89f..4c9fc1c9ddaa 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
@@ -62,7 +62,7 @@ private TypedProperties getProps() {
   @Test
   public void testNullRecordKeyFields() {
     GenericRecord record = getRecord();
-    Assertions.assertThrows(StringIndexOutOfBoundsException.class, () ->  {
+    Assertions.assertThrows(HoodieKeyException.class, () ->  {
       BaseKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getPropertiesWithoutRecordKeyProp());
       keyGenerator.getRecordKey(record);
     });
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
index fb740d00e2a5..187f96197b1d 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
@@ -69,7 +69,7 @@ private TypedProperties getWrongRecordKeyFieldProps() {
   @Test
   public void testNullRecordKeyFields() {
     GenericRecord record = getRecord();
-    Assertions.assertThrows(StringIndexOutOfBoundsException.class, () ->  {
+    Assertions.assertThrows(HoodieKeyException.class, () ->  {
       BaseKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(getPropertiesWithoutRecordKeyProp());
       keyGenerator.getRecordKey(record);
     });
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/HoodieSparkWriterTestBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/HoodieSparkWriterTestBase.scala
new file mode 100644
index 000000000000..c0c1c2c12bd4
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/HoodieSparkWriterTestBase.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.commons.io.FileUtils
+import org.apache.hudi.common.model.{HoodieRecord, HoodieTableType}
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.testutils.HoodieClientTestUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+import org.apache.spark.sql.{Dataset, Row, SQLContext, SparkSession}
+import org.junit.jupiter.api.{AfterEach, BeforeEach}
+
+import scala.collection.JavaConverters
+
+class HoodieSparkWriterTestBase {
+  var spark: SparkSession = _
+  var sqlContext: SQLContext = _
+  var sc: SparkContext = _
+  var tempPath: java.nio.file.Path = _
+  var tempBootStrapPath: java.nio.file.Path = _
+  var hoodieFooTableName = "hoodie_foo_tbl"
+  var tempBasePath: String = _
+  var commonTableModifier: Map[String, String] = Map()
+
+  case class StringLongTest(uuid: String, ts: Long)
+
+  /**
+   * Setup method running before each test.
+   */
+  @BeforeEach
+  def setUp(): Unit = {
+    initSparkContext()
+    tempPath = java.nio.file.Files.createTempDirectory("hoodie_test_path")
+    tempBootStrapPath = java.nio.file.Files.createTempDirectory("hoodie_test_bootstrap")
+    tempBasePath = tempPath.toAbsolutePath.toString
+    commonTableModifier = getCommonParams(tempPath, hoodieFooTableName, HoodieTableType.COPY_ON_WRITE.name())
+  }
+
+  /**
+   * Tear down method running after each test.
+   */
+  @AfterEach
+  def tearDown(): Unit = {
+    cleanupSparkContexts()
+    FileUtils.deleteDirectory(tempPath.toFile)
+    FileUtils.deleteDirectory(tempBootStrapPath.toFile)
+  }
+
+  /**
+   * Utility method for initializing the spark context.
+   */
+  def initSparkContext(): Unit = {
+    val sparkConf = HoodieClientTestUtils.getSparkConfForTest(getClass.getSimpleName)
+
+    spark = SparkSession.builder()
+      .withExtensions(new HoodieSparkSessionExtension)
+      .config(sparkConf)
+      .getOrCreate()
+
+    sc = spark.sparkContext
+    sc.setLogLevel("ERROR")
+    sqlContext = spark.sqlContext
+  }
+
+  /**
+   * Utility method for cleaning up spark resources.
+   */
+  def cleanupSparkContexts(): Unit = {
+    if (sqlContext != null) {
+      sqlContext.clearCache();
+      sqlContext = null;
+    }
+    if (sc != null) {
+      sc.stop()
+      sc = null
+    }
+    if (spark != null) {
+      spark.close()
+    }
+  }
+
+  /**
+   * Utility method for creating common params for writer.
+   *
+   * @param path               Path for hoodie table
+   * @param hoodieFooTableName Name of hoodie table
+   * @param tableType          Type of table
+   * @return Map of common params
+   */
+  def getCommonParams(path: java.nio.file.Path, hoodieFooTableName: String, tableType: String): Map[String, String] = {
+    Map("path" -> path.toAbsolutePath.toString,
+      HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName,
+      "hoodie.insert.shuffle.parallelism" -> "1",
+      "hoodie.upsert.shuffle.parallelism" -> "1",
+      DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator")
+  }
+
+  /**
+   * Utility method for dropping all hoodie meta related columns.
+   */
+  def dropMetaFields(df: Dataset[Row]): Dataset[Row] = {
+    df.drop(HoodieRecord.HOODIE_META_COLUMNS.get(0)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(1))
+      .drop(HoodieRecord.HOODIE_META_COLUMNS.get(2)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(3))
+      .drop(HoodieRecord.HOODIE_META_COLUMNS.get(4))
+  }
+
+  /**
+   * Utility method for converting list of Row to list of Seq.
+   *
+   * @param inputList list of Row
+   * @return list of Seq
+   */
+  def convertRowListToSeq(inputList: java.util.List[Row]): Seq[Row] =
+    JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
+
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
index a2598c766b19..784ddd6c883b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
@@ -262,7 +262,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
     }
 
     // Record's key field not specified
-    assertThrows(classOf[StringIndexOutOfBoundsException]) {
+    assertThrows(classOf[HoodieKeyException]) {
       val props = new TypedProperties()
       props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField")
       val keyGen = new ComplexKeyGenerator(props)
@@ -494,7 +494,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
       val props = new TypedProperties()
       props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField")
 
-      assertThrows(classOf[StringIndexOutOfBoundsException]) {
+      assertThrows(classOf[HoodieKeyException]) {
         new GlobalDeleteKeyGenerator(props).getRecordKey(baseRecord)
       }
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
index 1c6766063d24..0767d0559159 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
@@ -19,11 +19,8 @@ package org.apache.hudi
 
 import org.apache.avro.Schema
 import org.apache.commons.io.FileUtils
-import org.apache.hudi.DataSourceWriteOptions._
-import org.apache.hudi.HoodieSparkUtils.gteqSpark3_0
 import org.apache.hudi.client.SparkRDDWriteClient
-import org.apache.hudi.common.model._
-import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator
+import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord, HoodieRecordPayload, HoodieTableType, WriteOperationType}
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieIndexConfig, HoodieWriteConfig}
@@ -31,142 +28,35 @@ import org.apache.hudi.exception.{HoodieException, SchemaCompatibilityException}
 import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode
 import org.apache.hudi.functional.TestBootstrap
 import org.apache.hudi.keygen.{ComplexKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator}
-import org.apache.hudi.testutils.DataSourceTestUtils
-import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest
-import org.apache.spark.SparkContext
+import org.apache.hudi.testutils.{DataSourceTestUtils, HoodieClientTestUtils}
 import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql._
+import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.functions.{expr, lit}
-import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.hudi.command.SqlKeyGenerator
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotNull, assertNull, assertTrue, fail}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
+import org.junit.jupiter.api.Test
 import org.junit.jupiter.params.ParameterizedTest
-import org.junit.jupiter.params.provider.Arguments.arguments
-import org.junit.jupiter.params.provider._
+import org.junit.jupiter.params.provider.{Arguments, CsvSource, EnumSource, MethodSource, ValueSource}
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{spy, times, verify}
 import org.scalatest.Assertions.assertThrows
 import org.scalatest.Matchers.{be, convertToAnyShouldWrapper, intercept}
 
 import java.io.IOException
-import java.time.format.DateTimeFormatterBuilder
-import java.time.temporal.ChronoField
-import java.time.{Instant, ZoneId}
-import java.util.{Collections, Date, TimeZone, UUID}
+import java.time.Instant
+import java.util.{Collections, Date, UUID}
 import scala.collection.JavaConversions._
-import scala.collection.JavaConverters
 
 /**
  * Test suite for SparkSqlWriter class.
+ * All cases of using of {@link HoodieTimelineTimeZone.UTC} should be done in a separate test class {@link TestHoodieSparkSqlWriterUtc}.
+ * Otherwise UTC tests will generate infinite loops, if there is any initiated test with time zone that is greater then UTC+0.
+ * The reason is in a saved value in the heap of static {@link org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.lastInstantTime}.
  */
-class TestHoodieSparkSqlWriter {
-  var spark: SparkSession = _
-  var sqlContext: SQLContext = _
-  var sc: SparkContext = _
-  var tempPath: java.nio.file.Path = _
-  var tempBootStrapPath: java.nio.file.Path = _
-  var hoodieFooTableName = "hoodie_foo_tbl"
-  var tempBasePath: String = _
-  var commonTableModifier: Map[String, String] = Map()
-  case class StringLongTest(uuid: String, ts: Long)
+class TestHoodieSparkSqlWriter extends HoodieSparkWriterTestBase {
 
   /**
-   * Setup method running before each test.
-   */
-  @BeforeEach
-  def setUp(): Unit = {
-    initSparkContext()
-    tempPath = java.nio.file.Files.createTempDirectory("hoodie_test_path")
-    tempBootStrapPath = java.nio.file.Files.createTempDirectory("hoodie_test_bootstrap")
-    tempBasePath = tempPath.toAbsolutePath.toString
-    commonTableModifier = getCommonParams(tempPath, hoodieFooTableName, HoodieTableType.COPY_ON_WRITE.name())
-  }
-
-  /**
-   * Tear down method running after each test.
-   */
-  @AfterEach
-  def tearDown(): Unit = {
-    cleanupSparkContexts()
-    FileUtils.deleteDirectory(tempPath.toFile)
-    FileUtils.deleteDirectory(tempBootStrapPath.toFile)
-  }
-
-  /**
-   * Utility method for initializing the spark context.
-   *
-   * TODO rebase this onto existing base class to avoid duplication
-   */
-  def initSparkContext(): Unit = {
-    val sparkConf = getSparkConfForTest(getClass.getSimpleName)
-
-    spark = SparkSession.builder()
-      .withExtensions(new HoodieSparkSessionExtension)
-      .config(sparkConf)
-      .getOrCreate()
-
-    sc = spark.sparkContext
-    sc.setLogLevel("ERROR")
-    sqlContext = spark.sqlContext
-  }
-
-  /**
-   * Utility method for cleaning up spark resources.
-   */
-  def cleanupSparkContexts(): Unit = {
-    if (sqlContext != null) {
-      sqlContext.clearCache();
-      sqlContext = null;
-    }
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    if (spark != null) {
-      spark.close()
-    }
-  }
-
-  /**
-   * Utility method for dropping all hoodie meta related columns.
-   */
-  def dropMetaFields(df: Dataset[Row]): Dataset[Row] = {
-    df.drop(HoodieRecord.HOODIE_META_COLUMNS.get(0)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(1))
-      .drop(HoodieRecord.HOODIE_META_COLUMNS.get(2)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(3))
-      .drop(HoodieRecord.HOODIE_META_COLUMNS.get(4))
-  }
-
-  /**
-   * Utility method for creating common params for writer.
-   *
-   * @param path               Path for hoodie table
-   * @param hoodieFooTableName Name of hoodie table
-   * @param tableType          Type of table
-   * @return                   Map of common params
-   */
-  def getCommonParams(path: java.nio.file.Path, hoodieFooTableName: String, tableType: String): Map[String, String] = {
-    Map("path" -> path.toAbsolutePath.toString,
-      HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName,
-      "hoodie.insert.shuffle.parallelism" -> "1",
-      "hoodie.upsert.shuffle.parallelism" -> "1",
-      DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
-      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
-      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
-      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator")
-  }
-
-  /**
-   * Utility method for converting list of Row to list of Seq.
-   *
-   * @param inputList list of Row
-   * @return list of Seq
-   */
-  def convertRowListToSeq(inputList: java.util.List[Row]): Seq[Row] =
-    JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
-
-  /**
-   * Utility method for performing bulk insert  tests.
+   * Local utility method for performing bulk insert  tests.
    *
    * @param sortMode           Bulk insert sort mode
    * @param populateMetaFields Flag for populating meta fields
@@ -226,12 +116,13 @@ class TestHoodieSparkSqlWriter {
     val originals = HoodieWriterUtils.parametersWithWriteDefaults(Map.empty)
     val rhsKey = "hoodie.right.hand.side.key"
     val rhsVal = "hoodie.right.hand.side.val"
-    val modifier = Map(OPERATION.key -> INSERT_OPERATION_OPT_VAL, TABLE_TYPE.key -> MOR_TABLE_TYPE_OPT_VAL, rhsKey -> rhsVal)
+    val modifier = Map(DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL,
+      DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL, rhsKey -> rhsVal)
     val modified = HoodieWriterUtils.parametersWithWriteDefaults(modifier)
     val matcher = (k: String, v: String) => modified(k) should be(v)
     originals foreach {
-      case ("hoodie.datasource.write.operation", _) => matcher("hoodie.datasource.write.operation", INSERT_OPERATION_OPT_VAL)
-      case ("hoodie.datasource.write.table.type", _) => matcher("hoodie.datasource.write.table.type", MOR_TABLE_TYPE_OPT_VAL)
+      case ("hoodie.datasource.write.operation", _) => matcher("hoodie.datasource.write.operation", DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      case ("hoodie.datasource.write.table.type", _) => matcher("hoodie.datasource.write.table.type", DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL)
       case (`rhsKey`, _) => matcher(rhsKey, rhsVal)
       case (k, v) => matcher(k, v)
     }
@@ -243,10 +134,15 @@ class TestHoodieSparkSqlWriter {
   @Test
   def testThrowExceptionInvalidSerializer(): Unit = {
     spark.stop()
-    val session = SparkSession.builder().appName("hoodie_test").master("local").getOrCreate()
+    val session = SparkSession.builder()
+      // Here we intentionally remove the "spark.serializer" config to test failure
+      .config(HoodieClientTestUtils.getSparkConfForTest("hoodie_test").remove("spark.serializer"))
+      .getOrCreate()
     try {
       val sqlContext = session.sqlContext
-      val options = Map("path" -> "hoodie/test/path", HoodieWriteConfig.TBL_NAME.key -> "hoodie_test_tbl")
+      val options = Map(
+        "path" -> (tempPath.toUri.toString + "/testThrowExceptionInvalidSerializer/basePath"),
+        HoodieWriteConfig.TBL_NAME.key -> "hoodie_test_tbl")
       val e = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.ErrorIfExists, options,
         session.emptyDataFrame))
       assert(e.getMessage.contains("spark.serializer"))
@@ -285,7 +181,7 @@ class TestHoodieSparkSqlWriter {
     assert(tableAlreadyExistException.getMessage.contains(s"${HoodieWriteConfig.TBL_NAME.key}:\thoodie_bar_tbl\thoodie_foo_tbl"))
 
     //on same path try append with delete operation and different("hoodie_bar_tbl") table name which should throw an exception
-    val deleteTableModifier = barTableModifier ++ Map(OPERATION.key -> "delete")
+    val deleteTableModifier = barTableModifier ++ Map(DataSourceWriteOptions.OPERATION.key -> "delete")
     val deleteCmdException = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, deleteTableModifier, dataFrame2))
     assert(tableAlreadyExistException.getMessage.contains("Config conflict"))
     assert(tableAlreadyExistException.getMessage.contains(s"${HoodieWriteConfig.TBL_NAME.key}:\thoodie_bar_tbl\thoodie_foo_tbl"))
@@ -449,7 +345,7 @@ def testBulkInsertForDropPartitionColumn(): Unit = {
       val fooTableModifier = commonTableModifier.updated("hoodie.bulkinsert.shuffle.parallelism", "4")
         .updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL)
         .updated(DataSourceWriteOptions.ENABLE_ROW_WRITER.key, "true")
-        .updated(INSERT_DROP_DUPS.key, "true")
+        .updated(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "true")
 
       // generate the inserts
       val schema = DataSourceTestUtils.getStructTypeExampleSchema
@@ -682,10 +578,11 @@ def testBulkInsertForDropPartitionColumn(): Unit = {
       .setBaseFileFormat(fooTableParams.getOrElse(HoodieWriteConfig.BASE_FILE_FORMAT.key,
         HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().name))
       .setArchiveLogFolder(HoodieTableConfig.ARCHIVELOG_FOLDER.defaultValue())
-      .setPayloadClassName(PAYLOAD_CLASS_NAME.key)
-      .setPreCombineField(fooTableParams.getOrElse(PRECOMBINE_FIELD.key, PRECOMBINE_FIELD.defaultValue()))
+      .setPayloadClassName(DataSourceWriteOptions.PAYLOAD_CLASS_NAME.key)
+      .setPreCombineField(fooTableParams.getOrElse(DataSourceWriteOptions.PRECOMBINE_FIELD.key, DataSourceWriteOptions.PRECOMBINE_FIELD.defaultValue()))
       .setPartitionFields(fooTableParams(DataSourceWriteOptions.PARTITIONPATH_FIELD.key))
-      .setKeyGeneratorClassProp(fooTableParams.getOrElse(KEYGENERATOR_CLASS_NAME.key, KEYGENERATOR_CLASS_NAME.defaultValue()))
+      .setKeyGeneratorClassProp(fooTableParams.getOrElse(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key,
+        DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.defaultValue()))
       if(addBootstrapPath) {
         tableMetaClientBuilder
           .setBootstrapBasePath(fooTableParams(HoodieBootstrapConfig.BASE_PATH.key))
@@ -1336,53 +1233,6 @@ def testBulkInsertForDropPartitionColumn(): Unit = {
     assert(exc.getMessage.contains("Consistent hashing bucket index does not work with COW table. Use simple bucket index or an MOR table."))
   }
 
-  /*
-   * Test case for instant is generated with commit timezone when TIMELINE_TIMEZONE set to UTC
-   * related to HUDI-5978
-   * Issue [HUDI-7275] is tracking this test being disabled
-   */
-  @Disabled
-  def testInsertDatasetWithTimelineTimezoneUTC(): Unit = {
-    val defaultTimezone = TimeZone.getDefault
-    try {
-      val fooTableModifier = commonTableModifier.updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
-        .updated(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "false")
-        .updated(HoodieTableConfig.TIMELINE_TIMEZONE.key, "UTC") // utc timezone
-
-      // generate the inserts
-      val schema = DataSourceTestUtils.getStructTypeExampleSchema
-      val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
-      val records = DataSourceTestUtils.generateRandomRows(100)
-      val recordsSeq = convertRowListToSeq(records)
-      val df = spark.createDataFrame(sc.parallelize(recordsSeq), structType)
-
-      // get UTC instant before write
-      val beforeWriteInstant = Instant.now()
-
-      // set local timezone to America/Los_Angeles(UTC-7)
-      TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
-
-      // write to Hudi
-      val (success, writeInstantTimeOpt, _, _, _, hoodieTableConfig) = HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableModifier, df)
-      assertTrue(success)
-      val hoodieTableTimelineTimezone = HoodieTimelineTimeZone.valueOf(hoodieTableConfig.getString(HoodieTableConfig.TIMELINE_TIMEZONE))
-      assertEquals(hoodieTableTimelineTimezone, HoodieTimelineTimeZone.UTC)
-
-      val utcFormatter = new DateTimeFormatterBuilder()
-        .appendPattern(HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT)
-        .appendValue(ChronoField.MILLI_OF_SECOND, 3)
-        .toFormatter
-        .withZone(ZoneId.of("UTC"))
-      // instant parsed by UTC timezone
-      val writeInstant = Instant.from(utcFormatter.parse(writeInstantTimeOpt.get()))
-
-      assertTrue(beforeWriteInstant.toEpochMilli < writeInstant.toEpochMilli,
-        s"writeInstant(${writeInstant.toEpochMilli}) must always be greater than beforeWriteInstant(${beforeWriteInstant.toEpochMilli}) if writeInstant was generated with UTC timezone")
-    } finally {
-      TimeZone.setDefault(defaultTimezone)
-    }
-  }
-
   private def fetchActualSchema(): Schema = {
     val tableMetaClient = HoodieTableMetaClient.builder()
       .setConf(spark.sparkContext.hadoopConfiguration)
@@ -1406,19 +1256,19 @@ object TestHoodieSparkSqlWriter {
 
     // NOTE: Hudi doesn't support Orc in Spark < 3.0
     //       Please check HUDI-4496 for more details
-    val targetScenarios = if (gteqSpark3_0) {
+    val targetScenarios = if (HoodieSparkUtils.gteqSpark3_0) {
       parquetScenarios ++ orcScenarios
     } else {
       parquetScenarios
     }
 
-    java.util.Arrays.stream(targetScenarios.map(as => arguments(as.map(_.asInstanceOf[AnyRef]):_*)))
+    java.util.Arrays.stream(targetScenarios.map(as => Arguments.arguments(as.map(_.asInstanceOf[AnyRef]):_*)))
   }
 
   def deletePartitionsWildcardTestParams(): java.util.stream.Stream[Arguments] = {
     java.util.stream.Stream.of(
-      arguments("*5/03/1*", Seq("2016/03/15")),
-      arguments("2016/03/*", Seq("2015/03/16", "2015/03/17")))
+      Arguments.arguments("*5/03/1*", Seq("2016/03/15")),
+      Arguments.arguments("2016/03/*", Seq("2015/03/16", "2015/03/17")))
   }
 
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriterUtc.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriterUtc.scala
new file mode 100644
index 000000000000..ca4d23f719d7
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriterUtc.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hudi.common.model.HoodieTimelineTimeZone
+import org.apache.hudi.common.table.HoodieTableConfig
+import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator
+import org.apache.hudi.testutils.DataSourceTestUtils
+import org.apache.spark.sql.SaveMode
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
+import org.junit.jupiter.api.Test
+
+import java.time.{Instant, ZoneId}
+import java.time.format.DateTimeFormatterBuilder
+import java.time.temporal.ChronoField
+import java.util.TimeZone
+
+/**
+ * Test suite for SparkSqlWriter class for all cases of using of {@link HoodieTimelineTimeZone.UTC}.
+ * Using of {@link HoodieTimelineTimeZone.LOCAL} here could lead to infinite loops, because it could save
+ * value of static {@link HoodieInstantTimeGenerator.lastInstantTime} in the heap,
+ * which will be greater than instant time for {@link HoodieTimelineTimeZone.UTC}.
+ */
+class TestHoodieSparkSqlWriterUtc extends HoodieSparkWriterTestBase {
+  /*
+   * Test case for instant is generated with commit timezone when TIMELINE_TIMEZONE set to UTC
+   * related to HUDI-5978
+   */
+  @Test
+  def testInsertDatasetWithTimelineTimezoneUTC(): Unit = {
+    val defaultTimezone = TimeZone.getDefault
+    try {
+      val fooTableModifier = commonTableModifier.updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+        .updated(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "false")
+        .updated(HoodieTableConfig.TIMELINE_TIMEZONE.key, "UTC") // utc timezone
+
+      // generate the inserts
+      val schema = DataSourceTestUtils.getStructTypeExampleSchema
+      val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
+      val records = DataSourceTestUtils.generateRandomRows(100)
+      val recordsSeq = convertRowListToSeq(records)
+      val df = spark.createDataFrame(sc.parallelize(recordsSeq), structType)
+
+      // get UTC instant before write
+      val beforeWriteInstant = Instant.now()
+
+      // set local timezone to America/Los_Angeles(UTC-7)
+      TimeZone.setDefault(TimeZone.getTimeZone("Asia/Novosibirsk"))
+
+      // write to Hudi
+      val (success, writeInstantTimeOpt, _, _, _, hoodieTableConfig) = HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableModifier, df)
+      assertTrue(success)
+      val hoodieTableTimelineTimezone = HoodieTimelineTimeZone.valueOf(hoodieTableConfig.getString(HoodieTableConfig.TIMELINE_TIMEZONE))
+      assertEquals(hoodieTableTimelineTimezone, HoodieTimelineTimeZone.UTC)
+
+      val utcFormatter = new DateTimeFormatterBuilder()
+        .appendPattern(HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT)
+        .appendValue(ChronoField.MILLI_OF_SECOND, 3)
+        .toFormatter
+        .withZone(ZoneId.of("UTC"))
+      // instant parsed by UTC timezone
+      val writeInstant = Instant.from(utcFormatter.parse(writeInstantTimeOpt.get()))
+
+      assertTrue(beforeWriteInstant.toEpochMilli < writeInstant.toEpochMilli,
+        s"writeInstant(${writeInstant.toEpochMilli}) must always be greater than beforeWriteInstant(${beforeWriteInstant.toEpochMilli}) if writeInstant was generated with UTC timezone")
+    } finally {
+      TimeZone.setDefault(defaultTimezone)
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
index 15b6b2b35da7..85c3c619111b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
@@ -20,6 +20,7 @@ package org.apache.hudi
 
 import org.apache.avro.generic.GenericRecord
 import org.apache.hudi.testutils.DataSourceTestUtils
+import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest
 import org.apache.spark.sql.types.{ArrayType, StructField, StructType}
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.junit.jupiter.api.Assertions._
@@ -88,11 +89,7 @@ class TestHoodieSparkUtils {
   @Test
   def testCreateRddSchemaEvol(): Unit = {
     val spark = SparkSession.builder
-      .appName("Hoodie Datasource test")
-      .master("local[2]")
-      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar")
-      .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
+      .config(getSparkConfForTest("Hoodie Datasource test"))
       .getOrCreate
 
     val schema = DataSourceTestUtils.getStructTypeExampleSchema
@@ -126,11 +123,7 @@ class TestHoodieSparkUtils {
   @Test
   def testCreateRddWithNestedSchemas(): Unit = {
     val spark = SparkSession.builder
-      .appName("Hoodie Datasource test")
-      .master("local[2]")
-      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar")
-      .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
+      .config(getSparkConfForTest("Hoodie Datasource test"))
       .getOrCreate
 
     val innerStruct1 = new StructType().add("innerKey","string",false).add("innerValue", "long", true)
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala
index d9d5b59c8d76..70886d964445 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala
@@ -18,120 +18,24 @@
 package org.apache.hudi
 
 import org.apache.avro.Schema
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.avro.model.HoodieMetadataRecord
-import org.apache.hudi.common.model._
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.testutils.DataSourceTestUtils
-import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest
-import org.apache.spark.SparkContext
-import org.apache.spark.sql._
-import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+import org.apache.spark.sql.SaveMode
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Tag, Test}
+import org.junit.jupiter.api.{Tag, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.CsvSource
 
-import scala.collection.JavaConverters
-
 /**
  * Test suite for TableSchemaResolver with SparkSqlWriter.
  */
 @Tag("functional")
-class TestTableSchemaResolverWithSparkSQL {
-  var spark: SparkSession = _
-  var sqlContext: SQLContext = _
-  var sc: SparkContext = _
-  var tempPath: java.nio.file.Path = _
-  var tempBootStrapPath: java.nio.file.Path = _
-  var hoodieFooTableName = "hoodie_foo_tbl"
-  var tempBasePath: String = _
-  var commonTableModifier: Map[String, String] = Map()
-
-  case class StringLongTest(uuid: String, ts: Long)
-
-  /**
-   * Setup method running before each test.
-   */
-  @BeforeEach
-  def setUp(): Unit = {
-    initSparkContext()
-    tempPath = java.nio.file.Files.createTempDirectory("hoodie_test_path")
-    tempBootStrapPath = java.nio.file.Files.createTempDirectory("hoodie_test_bootstrap")
-    tempBasePath = tempPath.toAbsolutePath.toString
-    commonTableModifier = getCommonParams(tempPath, hoodieFooTableName, HoodieTableType.COPY_ON_WRITE.name())
-  }
-
-  /**
-   * Tear down method running after each test.
-   */
-  @AfterEach
-  def tearDown(): Unit = {
-    cleanupSparkContexts()
-    FileUtils.deleteDirectory(tempPath.toFile)
-    FileUtils.deleteDirectory(tempBootStrapPath.toFile)
-  }
-
-  /**
-   * Utility method for initializing the spark context.
-   */
-  def initSparkContext(): Unit = {
-    spark = SparkSession.builder()
-      .config(getSparkConfForTest(hoodieFooTableName))
-      .getOrCreate()
-    sc = spark.sparkContext
-    sc.setLogLevel("ERROR")
-    sqlContext = spark.sqlContext
-  }
-
-  /**
-   * Utility method for cleaning up spark resources.
-   */
-  def cleanupSparkContexts(): Unit = {
-    if (sqlContext != null) {
-      sqlContext.clearCache();
-      sqlContext = null;
-    }
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    if (spark != null) {
-      spark.close()
-    }
-  }
-
-  /**
-   * Utility method for creating common params for writer.
-   *
-   * @param path               Path for hoodie table
-   * @param hoodieFooTableName Name of hoodie table
-   * @param tableType          Type of table
-   * @return Map of common params
-   */
-  def getCommonParams(path: java.nio.file.Path, hoodieFooTableName: String, tableType: String): Map[String, String] = {
-    Map("path" -> path.toAbsolutePath.toString,
-      HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName,
-      "hoodie.insert.shuffle.parallelism" -> "1",
-      "hoodie.upsert.shuffle.parallelism" -> "1",
-      DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
-      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
-      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
-      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator")
-  }
-
-  /**
-   * Utility method for converting list of Row to list of Seq.
-   *
-   * @param inputList list of Row
-   * @return list of Seq
-   */
-  def convertRowListToSeq(inputList: java.util.List[Row]): Seq[Row] =
-    JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
+class TestTableSchemaResolverWithSparkSQL extends HoodieSparkWriterTestBase {
 
   @Test
   def testTableSchemaResolverInMetadataTable(): Unit = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala
index 4da9f87e6f35..377e2dd9d7cf 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala
@@ -23,9 +23,9 @@ import org.apache.avro.Schema
 import org.apache.hadoop.conf.Configuration
 import org.apache.hudi.common.config.HoodieReaderConfig.FILE_GROUP_READER_ENABLED
 import org.apache.hudi.common.engine.HoodieReaderContext
+import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.{HoodieRecord, WriteOperationType}
 import org.apache.hudi.{AvroConversionUtils, SparkFileFormatInternalRowReaderContext}
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -50,7 +50,7 @@ class TestHoodieFileGroupReaderOnSpark extends TestHoodieFileGroupReaderBase[Int
   def setup() {
     val sparkConf = new SparkConf
     sparkConf.set("spark.app.name", getClass.getName)
-    sparkConf.set("spark.master", "local[*]")
+    sparkConf.set("spark.master", "local[8]")
     sparkConf.set("spark.default.parallelism", "4")
     sparkConf.set("spark.sql.shuffle.partitions", "4")
     sparkConf.set("spark.driver.maxResultSize", "2g")
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala
index dfb69da29c00..ed7437fd5101 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala
@@ -19,7 +19,7 @@ package org.apache.hudi.functional
 
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hudi.HoodieConversionUtils.toJavaOption
-import org.apache.hudi.common.model.{HoodieRecord, HoodieTableType, WriteOperationType}
+import org.apache.hudi.common.model.{HoodieRecord, HoodieTableType, OverwriteWithLatestAvroPayload}
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.util
 import org.apache.hudi.config.HoodieWriteConfig
@@ -31,7 +31,7 @@ import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions, ScalaAssert
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
 import org.apache.spark.sql.{HoodieUnsafeUtils, Row, SaveMode, SparkSession, SparkSessionExtensions, functions}
-import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
+import org.junit.jupiter.api.Assertions.{assertEquals}
 import org.junit.jupiter.api.{AfterEach, BeforeEach}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.CsvSource
@@ -49,6 +49,8 @@ class TestBasicSchemaEvolution extends HoodieSparkClientTestBase with ScalaAsser
     "hoodie.bulkinsert.shuffle.parallelism" -> "2",
     "hoodie.delete.shuffle.parallelism" -> "1",
     HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key() -> "true",
+    HoodieWriteConfig.WRITE_PAYLOAD_CLASS_NAME.key() -> classOf[OverwriteWithLatestAvroPayload].getName,
+    HoodieWriteConfig.WRITE_PAYLOAD_TYPE.key() -> "OVERWRITE_LATEST_AVRO",
     DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
     DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index cb0209de979c..2014db073d91 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -22,30 +22,34 @@ import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.hudi.DataSourceWriteOptions.{INLINE_CLUSTERING_ENABLE, KEYGENERATOR_CLASS_NAME}
 import org.apache.hudi.HoodieConversionUtils.toJavaOption
 import org.apache.hudi.QuickstartUtils.{convertToStringList, getQuickstartWriteConfigs}
+import org.apache.hudi.avro.AvroSchemaCompatibility.SchemaIncompatibilityType
+import org.apache.hudi.client.SparkRDDWriteClient
 import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT, TIMESTAMP_OUTPUT_DATE_FORMAT, TIMESTAMP_TIMEZONE_FORMAT, TIMESTAMP_TYPE_FIELD}
 import org.apache.hudi.common.config.{HoodieCommonConfig, HoodieMetadataConfig}
+import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
 import org.apache.hudi.common.model.{HoodieRecord, WriteOperationType}
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline, TimelineUtils}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.{deleteRecordsToStrings, recordsToStrings}
-import org.apache.hudi.common.util
+import org.apache.hudi.common.util.{ClusteringUtils, Option}
+import org.apache.hudi.common.{HoodiePendingRollbackInfo, util}
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.config.metrics.HoodieMetricsConfig
 import org.apache.hudi.exception.ExceptionUtil.getRootCause
-import org.apache.hudi.exception.HoodieException
+import org.apache.hudi.exception.{HoodieException, SchemaBackwardsCompatibilityException}
 import org.apache.hudi.functional.CommonOptionUtils._
 import org.apache.hudi.functional.TestCOWDataSource.convertColumnsToNullable
 import org.apache.hudi.hive.HiveSyncConfigHolder
 import org.apache.hudi.keygen._
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions
 import org.apache.hudi.metrics.{Metrics, MetricsReporterType}
+import org.apache.hudi.table.HoodieSparkTable
 import org.apache.hudi.testutils.HoodieSparkClientTestBase
 import org.apache.hudi.util.JFunction
 import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, QuickstartUtils, ScalaAssertionSupport}
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
@@ -97,10 +101,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     System.gc()
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testShortNameStorage(recordType: HoodieRecordType) {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testShortNameStorage(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     // Insert Operation
     val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
@@ -489,6 +492,27 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(snapshotDF2.count(), (validRecordsFromBatch1 + validRecordsFromBatch2))
   }
 
+  @Test
+  def bulkInsertCompositeKeys(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts(HoodieRecordType.AVRO)
+
+    // Insert Operation
+    val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
+    val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
+
+    val inputDf1 = inputDF.withColumn("new_col",lit("value1"))
+    val inputDf2 = inputDF.withColumn("new_col", lit(null).cast("String") )
+
+    inputDf1.union(inputDf2).write.format("hudi")
+        .options(writeOpts)
+        .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "_row_key,new_col")
+        .option(DataSourceWriteOptions.OPERATION.key(),"bulk_insert")
+        .mode(SaveMode.Overwrite)
+        .save(basePath)
+
+    assertEquals(200, spark.read.format("org.apache.hudi").options(readOpts).load(basePath).count())
+  }
+
   /**
    * This tests the case that query by with a specified partition condition on hudi table which is
    * different between the value of the partition field and the actual partition path,
@@ -565,10 +589,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
    * archival should kick in and 2 commits should be archived. If schema is valid, no exception will be thrown. If not,
    * NPE will be thrown.
    */
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testArchivalWithBulkInsert(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testArchivalWithBulkInsert(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     var structType: StructType = null
     for (i <- 1 to 7) {
@@ -697,10 +720,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     }
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testOverWriteModeUseReplaceAction(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testOverWriteModeUseReplaceAction(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
     val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
     val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
     inputDF1.write.format("org.apache.hudi")
@@ -775,10 +797,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(expectedCount, hudiReadPathDF.count())
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testOverWriteTableModeUseReplaceAction(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testOverWriteTableModeUseReplaceAction(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
     val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
@@ -805,10 +826,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals("replacecommit", commits(1))
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testOverWriteModeUseReplaceActionOnDisJointPartitions(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testOverWriteModeUseReplaceActionOnDisJointPartitions(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     // step1: Write 5 records to hoodie table for partition1 DEFAULT_FIRST_PARTITION_PATH
     val records1 = recordsToStrings(dataGen.generateInsertsForPartition("001", 5, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).toList
@@ -865,10 +885,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals("replacecommit", commits(2))
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testOverWriteTableModeUseReplaceActionOnDisJointPartitions(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testOverWriteTableModeUseReplaceActionOnDisJointPartitions(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     // step1: Write 5 records to hoodie table for partition1 DEFAULT_FIRST_PARTITION_PATH
     val records1 = recordsToStrings(dataGen.generateInsertsForPartition("001", 5, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).toList
@@ -1004,10 +1023,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     })
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testWithAutoCommitOn(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testWithAutoCommitOn(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     val records1 = recordsToStrings(dataGen.generateInserts("000", 100)).toList
     val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
@@ -1319,8 +1337,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
 
   @ParameterizedTest
   @CsvSource(Array(
-    "true,false,AVRO", "true,true,AVRO", "false,true,AVRO", "false,false,AVRO",
-    "true,false,SPARK", "true,true,SPARK", "false,true,SPARK", "false,false,SPARK"
+    "true,false,AVRO", "true,true,AVRO", "false,true,AVRO", "false,false,AVRO"
   ))
   def testQueryCOWWithBasePathAndFileIndex(partitionEncode: Boolean, isMetadataEnabled: Boolean, recordType: HoodieRecordType): Unit = {
     testPartitionPruning(
@@ -1517,11 +1534,10 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     }
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testSaveAsTableInDifferentModes(recordType: HoodieRecordType): Unit = {
+  @Test
+  def testSaveAsTableInDifferentModes(): Unit = {
     val options = scala.collection.mutable.Map.empty ++ commonOpts ++ Map("path" -> basePath)
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType, options.toMap)
+    val (writeOpts, readOpts) = getWriterReaderOpts(HoodieRecordType.AVRO, options.toMap)
 
     // first use the Overwrite mode
     val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
@@ -1584,10 +1600,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(spark.read.format("hudi").options(readOpts).load(basePath).count(), 9)
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testMetricsReporterViaDataSource(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, _) = getWriterReaderOpts(recordType, getQuickstartWriteConfigs.asScala.toMap)
+  @Test
+  def testMetricsReporterViaDataSource(): Unit = {
+    val (writeOpts, _) = getWriterReaderOpts(HoodieRecordType.AVRO, getQuickstartWriteConfigs.asScala.toMap)
 
     val dataGenerator = new QuickstartUtils.DataGenerator()
     val records = convertToStringList(dataGenerator.generateInserts(10))
@@ -1681,7 +1696,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       })
   }
 
-  def getWriterReaderOpts(recordType: HoodieRecordType,
+  def getWriterReaderOpts(recordType: HoodieRecordType = HoodieRecordType.AVRO,
                           opt: Map[String, String] = commonOpts,
                           enableFileIndex: Boolean = DataSourceReadOptions.ENABLE_HOODIE_FILE_INDEX.defaultValue()):
   (Map[String, String], Map[String, String]) = {
@@ -1773,8 +1788,8 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       (df2.write.format("hudi").options(hudiOptions).mode("append").save(basePath))
       fail("Option succeeded, but was expected to fail.")
     } catch {
-      case ex: org.apache.hudi.exception.HoodieInsertException => {
-        assertTrue(ex.getMessage.equals("Failed insert schema compatibility check"))
+      case ex: SchemaBackwardsCompatibilityException => {
+        assertTrue(ex.getMessage.contains(SchemaIncompatibilityType.READER_FIELD_MISSING_DEFAULT_VALUE.name()))
       }
       case ex: Exception => {
         fail(ex)
@@ -1783,7 +1798,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
 
     // Try adding the string column again. This operation is expected to succeed since 'MAKE_NEW_COLUMNS_NULLABLE'
     // parameter has been set to 'true'.
-    hudiOptions = hudiOptions + (HoodieCommonConfig.MAKE_NEW_COLUMNS_NULLABLE.key() -> "true")
+    hudiOptions = hudiOptions + (HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS.key() -> "true")
     try {
       (df2.write.format("hudi").options(hudiOptions).mode("append").save(basePath))
     } catch {
@@ -1807,9 +1822,9 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
   }
 
   @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testInsertOverwriteCluster(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, _) = getWriterReaderOpts(recordType)
+  @EnumSource(value = classOf[HoodieInstant.State], names = Array("REQUESTED", "INFLIGHT", "COMPLETED"))
+  def testInsertOverwriteCluster(firstClusteringState: HoodieInstant.State): Unit = {
+    val (writeOpts, _) = getWriterReaderOpts()
 
     // Insert Operation
     val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
@@ -1819,6 +1834,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       INLINE_CLUSTERING_ENABLE.key() -> "true",
       "hoodie.clustering.inline.max.commits" -> "2",
       "hoodie.clustering.plan.strategy.sort.columns" -> "_row_key",
+      "hoodie.clustering.plan.strategy.max.num.groups" -> "1",
       "hoodie.insert.shuffle.parallelism" -> "4",
       "hoodie.upsert.shuffle.parallelism" -> "4",
       DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
@@ -1831,7 +1847,15 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       .mode(SaveMode.Overwrite)
       .save(basePath)
 
-    for (i <- 1 until 6) {
+    val metaClient = HoodieTableMetaClient.builder()
+      .setBasePath(basePath)
+      .setConf(hadoopConf)
+      .build()
+
+    assertTrue(metaClient.getActiveTimeline.getLastClusteringInstant.isEmpty)
+
+    var lastClustering: HoodieInstant = null
+    for (i <- 1 until 4) {
       val records = recordsToStrings(dataGen.generateInsertsForPartition("00" + i, 10, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).toList
       val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
       inputDF.write.format("hudi")
@@ -1839,21 +1863,72 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
         .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL)
         .mode(SaveMode.Append)
         .save(basePath)
+      val lastInstant = metaClient.reloadActiveTimeline.getCommitsTimeline.lastInstant.get
+      if (i == 1 || i == 3) {
+        // Last instant is clustering
+        assertTrue(TimelineUtils.getCommitMetadata(lastInstant, metaClient.getActiveTimeline)
+          .getOperationType.equals(WriteOperationType.CLUSTER))
+        assertTrue(ClusteringUtils.isClusteringInstant(metaClient.getActiveTimeline, lastInstant))
+        lastClustering = lastInstant
+        assertEquals(
+          lastClustering,
+          metaClient.getActiveTimeline.getLastClusteringInstant.get)
+      } else {
+        assertTrue(TimelineUtils.getCommitMetadata(lastInstant, metaClient.getActiveTimeline)
+          .getOperationType.equals(WriteOperationType.INSERT_OVERWRITE))
+        assertFalse(ClusteringUtils.isClusteringInstant(metaClient.getActiveTimeline, lastInstant))
+        assertEquals(
+          lastClustering,
+          metaClient.getActiveTimeline.getLastClusteringInstant.get)
+      }
+      if (i == 1) {
+        val writeConfig = HoodieWriteConfig.newBuilder()
+          .forTable("hoodie_test")
+          .withPath(basePath)
+          .withProps(optsWithCluster)
+          .build()
+        if (firstClusteringState == HoodieInstant.State.INFLIGHT
+          || firstClusteringState == HoodieInstant.State.REQUESTED) {
+          // Move the clustering to inflight for testing
+          fs.delete(new Path(metaClient.getMetaPath, lastInstant.getFileName), false)
+          val inflightClustering = metaClient.reloadActiveTimeline.lastInstant.get
+          assertTrue(inflightClustering.isInflight)
+          assertEquals(
+            inflightClustering,
+            metaClient.getActiveTimeline.getLastClusteringInstant.get)
+        }
+        if (firstClusteringState == HoodieInstant.State.REQUESTED) {
+          val table = HoodieSparkTable.create(writeConfig, context)
+          table.rollbackInflightClustering(
+            metaClient.getActiveTimeline.getLastClusteringInstant.get,
+            new java.util.function.Function[String, Option[HoodiePendingRollbackInfo]] {
+              override def apply(commitToRollback: String): Option[HoodiePendingRollbackInfo] = {
+                new SparkRDDWriteClient(context, writeConfig).getTableServiceClient
+                  .getPendingRollbackInfo(table.getMetaClient, commitToRollback, false)
+              }
+            })
+          val requestedClustering = metaClient.reloadActiveTimeline.getCommitsTimeline.lastInstant.get
+          assertTrue(requestedClustering.isRequested)
+          assertEquals(
+            requestedClustering,
+            metaClient.getActiveTimeline.getLastClusteringInstant.get)
+        }
+        // This should not schedule any new clustering
+        new SparkRDDWriteClient(context, writeConfig)
+          .scheduleClustering(org.apache.hudi.common.util.Option.of(Map[String, String]()))
+        assertEquals(lastInstant.getTimestamp,
+          metaClient.reloadActiveTimeline.getCommitsTimeline.lastInstant.get.getTimestamp)
+      }
     }
-
-    val metaClient = HoodieTableMetaClient.builder()
-      .setBasePath(basePath)
-      .setConf(hadoopConf)
-      .build()
-    val timeline = metaClient.getActiveTimeline
-    val instants = timeline.getAllCommitsTimeline.filterCompletedInstants.getInstants
-    assertEquals(9, instants.size)
+    val timeline = metaClient.reloadActiveTimeline
+    val instants = timeline.getCommitsTimeline.getInstants
+    assertEquals(6, instants.size)
     val replaceInstants = instants.filter(i => i.getAction.equals(HoodieTimeline.REPLACE_COMMIT_ACTION)).toList
-    assertEquals(8, replaceInstants.size)
+    assertEquals(5, replaceInstants.size)
     val clusterInstants = replaceInstants.filter(i => {
       TimelineUtils.getCommitMetadata(i, metaClient.getActiveTimeline).getOperationType.equals(WriteOperationType.CLUSTER)
     })
-    assertEquals(3, clusterInstants.size)
+    assertEquals(2, clusterInstants.size)
   }
 
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestGetPartitionValuesFromPath.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestGetPartitionValuesFromPath.scala
index aadd9397f47d..9b6feacca0f1 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestGetPartitionValuesFromPath.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestGetPartitionValuesFromPath.scala
@@ -18,7 +18,7 @@
 
 package org.apache.hudi.functional
 
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestGetPartitionValuesFromPath extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
index 1cfd0e7fbbaf..896219ed6a83 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
@@ -27,7 +27,7 @@ import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
 import org.apache.hudi.common.model._
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
-import org.apache.hudi.common.testutils.RawTripTestPayload.{recordToString, recordsToStrings}
+import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
 import org.apache.hudi.common.util
 import org.apache.hudi.config.{HoodieCompactionConfig, HoodieIndexConfig, HoodieWriteConfig}
 import org.apache.hudi.functional.TestCOWDataSource.convertColumnsToNullable
@@ -43,7 +43,7 @@ import org.apache.spark.sql.types.BooleanType
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
-import org.junit.jupiter.params.provider.{CsvSource, EnumSource}
+import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource}
 import org.slf4j.LoggerFactory
 
 import java.util.function.Consumer
@@ -950,10 +950,9 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
     assertEquals(20, spark.read.format("hudi").options(readOpts).load(basePath).count())
   }
 
-  @ParameterizedTest
-  @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
-  def testTempFilesCleanForClustering(recordType: HoodieRecordType): Unit = {
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType)
+  @Test
+  def testTempFilesCleanForClustering(): Unit = {
+    val (writeOpts, readOpts) = getWriterReaderOpts()
 
     val records1 = recordsToStrings(dataGen.generateInserts("001", 1000)).asScala
     val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
@@ -1232,9 +1231,8 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
    * The read-optimized query should read `fg1_dc1.parquet` only in this case.
    */
   @ParameterizedTest
-  @CsvSource(Array("true,AVRO", "true,SPARK", "false,AVRO", "false,SPARK"))
-  def testReadOptimizedQueryAfterInflightCompactionAndCompletedDeltaCommit(enableFileIndex: Boolean,
-                                                                           recordType: HoodieRecordType): Unit = {
+  @ValueSource(booleans = Array(true, false))
+  def testReadOptimizedQueryAfterInflightCompactionAndCompletedDeltaCommit(enableFileIndex: Boolean): Unit = {
     val (tableName, tablePath) = ("hoodie_mor_ro_read_test_table", s"${basePath}_mor_test_table")
     val precombineField = "col3"
     val recordKeyField = "key"
@@ -1252,7 +1250,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
       "hoodie.upsert.shuffle.parallelism" -> "1")
     val pathForROQuery = getPathForROQuery(tablePath, !enableFileIndex, 0)
 
-    val (writeOpts, readOpts) = getWriterReaderOpts(recordType, options, enableFileIndex)
+    val (writeOpts, readOpts) = getWriterReaderOpts(HoodieRecordType.AVRO, options, enableFileIndex)
 
     // First batch with all inserts
     // Deltacommit1 (DC1, completed), writing file group 1 (fg1)
@@ -1385,7 +1383,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
     assertEquals(inputRows, readRows)
   }
 
-  def getWriterReaderOpts(recordType: HoodieRecordType,
+  def getWriterReaderOpts(recordType: HoodieRecordType = HoodieRecordType.AVRO,
                           opt: Map[String, String] = commonOpts,
                           enableFileIndex: Boolean = DataSourceReadOptions.ENABLE_HOODIE_FILE_INDEX.defaultValue()):
   (Map[String, String], Map[String, String]) = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala
index 56866e7bf40a..b5304cd2e23c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala
@@ -367,7 +367,7 @@ class TestRecordLevelIndex extends RecordLevelIndexTestBase {
       saveMode = SaveMode.Append)
 
     hudiOpts += (HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key -> "false")
-    metaClient.getTableConfig.setMetadataPartitionState(metaClient, MetadataPartitionType.RECORD_INDEX, false)
+    metaClient.getTableConfig.setMetadataPartitionState(metaClient, MetadataPartitionType.RECORD_INDEX.getPartitionPath, false)
 
     doWriteAndValidateDataAndRecordIndex(hudiOpts,
       operation = DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
index 8e235960fba3..97fdc1e10b21 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, Literal, Or}
 import org.apache.spark.sql.types.StringType
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
-import org.junit.jupiter.api.Tag
+import org.junit.jupiter.api.io.TempDir
+import org.junit.jupiter.api.{Tag, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.ValueSource
 
@@ -155,4 +156,36 @@ class TestRecordLevelIndexWithSQL extends RecordLevelIndexTestBase {
     val readDf = spark.read.format("hudi").options(hudiOpts).load(basePath)
     readDf.registerTempTable(sqlTempTable)
   }
+
+  @Test
+  def testInFilterOnNonRecordKey(): Unit = {
+    var hudiOpts = commonOpts
+    hudiOpts = hudiOpts + (
+      DataSourceWriteOptions.TABLE_TYPE.key -> HoodieTableType.COPY_ON_WRITE.name(),
+      DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true")
+
+    val dummyTablePath = tempDir.resolve("dummy_table").toAbsolutePath.toString
+    spark.sql(
+      s"""
+         |create table dummy_table (
+         |  record_key_col string,
+         |  not_record_key_col string,
+         |  partition_key_col string
+         |) using hudi
+         | options (
+         |  primaryKey ='record_key_col',
+         |  hoodie.metadata.enable = 'true',
+         |  hoodie.metadata.record.index.enable = 'true',
+         |  hoodie.datasource.write.recordkey.field = 'record_key_col',
+         |  hoodie.enable.data.skipping = 'true'
+         | )
+         | partitioned by(partition_key_col)
+         | location '$dummyTablePath'
+       """.stripMargin)
+    spark.sql(s"insert into dummy_table values('row1', 'row2', 'p1')")
+    spark.sql(s"insert into dummy_table values('row2', 'row1', 'p2')")
+    spark.sql(s"insert into dummy_table values('row3', 'row1', 'p2')")
+
+    assertEquals(2, spark.read.format("hudi").options(hudiOpts).load(dummyTablePath).filter("not_record_key_col in ('row1', 'abc')").count())
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
index b554aa735ec8..80d151d5b5ed 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
@@ -28,19 +28,16 @@ import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.timeline.TimelineUtils
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
+import org.apache.hudi.hadoop.fs.HadoopFSUtils
 import org.apache.hudi.keygen.NonpartitionedKeyGenerator
 import org.apache.hudi.{DataSourceReadOptions, HoodieSparkUtils}
-import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.hadoop.fs.HadoopFSUtils
-
 import org.apache.spark.sql
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.apache.spark.sql.{Dataset, Row}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.scalatest.Inspectors.forAll
 
 import java.io.File
-
 import scala.collection.JavaConversions._
 
 @SparkSQLCoreFlow
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSqlStatement.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSqlStatement.scala
index e120cc00fc57..607b99e87b85 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSqlStatement.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSqlStatement.scala
@@ -18,7 +18,7 @@
 package org.apache.hudi.functional
 
 import org.apache.hudi.common.util.FileIOUtils
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestSqlStatement extends HoodieSparkSqlTestBase {
   val STATE_INIT = 0
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
index 273303fdae63..b185a44dc6f1 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
@@ -23,7 +23,7 @@ import org.apache.hudi.ColumnStatsIndexHelper.buildColumnStatsTableFor
 import org.apache.hudi.config.HoodieClusteringConfig.LayoutOptimizationStrategy
 import org.apache.hudi.sort.SpaceCurveSortingHelper
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.apache.spark.sql.types.{IntegerType, StructField}
 import org.junit.jupiter.api.{Disabled, Tag, Test}
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
index a555378713a6..727daef00583 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
@@ -27,12 +27,13 @@ import org.apache.hudi.common.util.Option
 import org.apache.hudi.hive.HiveSyncConfigHolder._
 import org.apache.hudi.hive.{HiveSyncTool, HoodieHiveSyncClient}
 import org.apache.hudi.hive.testutils.HiveTestUtil
+import org.apache.hudi.metadata.MetadataPartitionType
 import org.apache.hudi.sync.common.HoodieSyncConfig.{META_SYNC_BASE_PATH, META_SYNC_DATABASE_NAME, META_SYNC_NO_PARTITION_METADATA, META_SYNC_TABLE_NAME}
 import org.apache.spark.sql.catalyst.analysis.Analyzer
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
 import org.apache.spark.sql.hudi.command.{CreateIndexCommand, ShowIndexesCommand}
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 
 class TestFunctionalIndex extends HoodieSparkSqlTestBase {
@@ -186,7 +187,9 @@ class TestFunctionalIndex extends HoodieSparkSqlTestBase {
                | options (
                |  primaryKey ='id',
                |  type = '$tableType',
-               |  preCombineField = 'ts'
+               |  preCombineField = 'ts',
+               |  hoodie.metadata.record.index.enable = 'true',
+               |  hoodie.datasource.write.recordkey.field = 'id'
                | )
                | partitioned by(ts)
                | location '$basePath'
@@ -195,6 +198,13 @@ class TestFunctionalIndex extends HoodieSparkSqlTestBase {
           spark.sql(s"insert into $tableName values(2, 'a2', 10, 1001)")
           spark.sql(s"insert into $tableName values(3, 'a3', 10, 1002)")
 
+          var metaClient = HoodieTableMetaClient.builder()
+            .setBasePath(basePath)
+            .setConf(spark.sessionState.newHadoopConf())
+            .build()
+
+          assert(metaClient.getTableConfig.isMetadataPartitionAvailable(MetadataPartitionType.RECORD_INDEX))
+
           val sqlParser: ParserInterface = spark.sessionState.sqlParser
           val analyzer: Analyzer = spark.sessionState.analyzer
 
@@ -202,8 +212,9 @@ class TestFunctionalIndex extends HoodieSparkSqlTestBase {
           var resolvedLogicalPlan = analyzer.execute(logicalPlan)
           assertTableIdentifier(resolvedLogicalPlan.asInstanceOf[ShowIndexesCommand].table, databaseName, tableName)
 
-          val createIndexSql = s"create index idx_datestr on $tableName using column_stats(ts) options(func='from_unixtime', format='yyyy-MM-dd')"
+          var createIndexSql = s"create index idx_datestr on $tableName using column_stats(ts) options(func='from_unixtime', format='yyyy-MM-dd')"
           logicalPlan = sqlParser.parsePlan(createIndexSql)
+
           resolvedLogicalPlan = analyzer.execute(logicalPlan)
           assertTableIdentifier(resolvedLogicalPlan.asInstanceOf[CreateIndexCommand].table, databaseName, tableName)
           assertResult("idx_datestr")(resolvedLogicalPlan.asInstanceOf[CreateIndexCommand].indexName)
@@ -211,14 +222,32 @@ class TestFunctionalIndex extends HoodieSparkSqlTestBase {
           assertResult(false)(resolvedLogicalPlan.asInstanceOf[CreateIndexCommand].ignoreIfExists)
 
           spark.sql(createIndexSql)
-          val metaClient = HoodieTableMetaClient.builder()
+          metaClient = HoodieTableMetaClient.builder()
             .setBasePath(basePath)
             .setConf(spark.sessionState.newHadoopConf())
             .build()
           assertTrue(metaClient.getFunctionalIndexMetadata.isPresent)
-          val functionalIndexMetadata = metaClient.getFunctionalIndexMetadata.get()
+          var functionalIndexMetadata = metaClient.getFunctionalIndexMetadata.get()
           assertEquals(1, functionalIndexMetadata.getIndexDefinitions.size())
           assertEquals("func_index_idx_datestr", functionalIndexMetadata.getIndexDefinitions.get("func_index_idx_datestr").getIndexName)
+
+          // Verify one can create more than one functional index
+          createIndexSql = s"create index name_lower on $tableName using column_stats(ts) options(func='identity')"
+          spark.sql(createIndexSql)
+          metaClient = HoodieTableMetaClient.builder()
+            .setBasePath(basePath)
+            .setConf(spark.sessionState.newHadoopConf())
+            .build()
+          functionalIndexMetadata = metaClient.getFunctionalIndexMetadata.get()
+          assertEquals(2, functionalIndexMetadata.getIndexDefinitions.size())
+          assertEquals("func_index_name_lower", functionalIndexMetadata.getIndexDefinitions.get("func_index_name_lower").getIndexName)
+
+          // Ensure that both the indexes are tracked correctly in metadata partition config
+          val mdtPartitions = metaClient.getTableConfig.getMetadataPartitions
+          assert(mdtPartitions.contains("func_index_name_lower") && mdtPartitions.contains("func_index_idx_datestr"))
+
+          // [HUDI-7472] After creating functional index, the existing MDT partitions should still be available
+          assert(metaClient.getTableConfig.isMetadataPartitionAvailable(MetadataPartitionType.RECORD_INDEX))
         }
       }
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestIndexSyntax.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestIndexSyntax.scala
index 43b4063260fa..158d8ca4f018 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestIndexSyntax.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestIndexSyntax.scala
@@ -23,8 +23,8 @@ import org.apache.hudi.HoodieSparkUtils
 import org.apache.spark.sql.catalyst.analysis.Analyzer
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
 import org.apache.spark.sql.hudi.command.{CreateIndexCommand, DropIndexCommand, ShowIndexesCommand}
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestIndexSyntax extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala
index 816fecc38f51..26f1a901d89c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala
@@ -20,7 +20,7 @@
 package org.apache.spark.sql.hudi.command.index
 
 import org.apache.hudi.HoodieSparkUtils
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestSecondaryIndex extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
index b9628d05af14..b101e838c841 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
@@ -15,12 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.HoodieSparkRecordMerger
 import org.apache.hudi.common.config.HoodieStorageConfig
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.HoodieAvroRecordMerger
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
 import org.apache.hudi.common.table.HoodieTableMetaClient
@@ -30,10 +29,9 @@ import org.apache.hudi.exception.ExceptionUtil.getRootCause
 import org.apache.hudi.hadoop.fs.HadoopFSUtils
 import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex
 import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest
-
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.checkMessageContains
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.checkMessageContains
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.util.Utils
 import org.joda.time.DateTimeZone
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieInternalRowUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieInternalRowUtils.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieInternalRowUtils.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieInternalRowUtils.scala
index 35afff918b9f..2ce4393c6a8c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieInternalRowUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieInternalRowUtils.scala
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.avro.generic.GenericData
 import org.apache.avro.{LogicalTypes, Schema}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieOptionConfig.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieOptionConfig.scala
similarity index 95%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieOptionConfig.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieOptionConfig.scala
index 985300c44c25..42db138671e9 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieOptionConfig.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestHoodieOptionConfig.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecordMerger, OverwriteWithLatestAvroPayload}
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness
+import org.apache.spark.sql.hudi.HoodieOptionConfig
 import org.apache.spark.sql.types._
 import org.junit.jupiter.api.Assertions.assertTrue
 import org.junit.jupiter.api.Test
@@ -35,14 +36,14 @@ class TestHoodieOptionConfig extends SparkClientFunctionalTestHarness {
     assertTrue(with1.size == 5)
     assertTrue(with1("primaryKey") == "id")
     assertTrue(with1("type") == "cow")
-    assertTrue(with1("payloadClass") == classOf[OverwriteWithLatestAvroPayload].getName)
+    assertTrue(with1("payloadClass") == classOf[DefaultHoodieRecordPayload].getName)
     assertTrue(with1("recordMergerStrategy") == HoodieRecordMerger.DEFAULT_MERGER_STRATEGY_UUID)
     assertTrue(with1("payloadType") == DataSourceWriteOptions.PAYLOAD_TYPE.defaultValue)
 
     val ops2 = Map("primaryKey" -> "id",
       "preCombineField" -> "timestamp",
       "type" -> "mor",
-      "payloadClass" -> classOf[DefaultHoodieRecordPayload].getName,
+      "payloadClass" -> classOf[OverwriteWithLatestAvroPayload].getName,
       "recordMergerStrategy" -> HoodieRecordMerger.DEFAULT_MERGER_STRATEGY_UUID,
       "payloadType" -> DataSourceWriteOptions.PAYLOAD_TYPE.defaultValue
     )
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestLazyPartitionPathFetching.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestLazyPartitionPathFetching.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestLazyPartitionPathFetching.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestLazyPartitionPathFetching.scala
index e2635c0cba87..aa6cd64fcb3e 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestLazyPartitionPathFetching.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestLazyPartitionPathFetching.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 class TestLazyPartitionPathFetching extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestNestedSchemaPruningOptimization.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestNestedSchemaPruningOptimization.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestNestedSchemaPruningOptimization.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestNestedSchemaPruningOptimization.scala
index cd4f90da4e79..62b2352d9c7f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestNestedSchemaPruningOptimization.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestNestedSchemaPruningOptimization.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.hudi.common.config.HoodieCommonConfig
 import org.apache.hudi.config.HoodieWriteConfig
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartitionPushDownWhenListingPaths.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestPartitionPushDownWhenListingPaths.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartitionPushDownWhenListingPaths.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestPartitionPushDownWhenListingPaths.scala
index 1b5e590913f3..7740da5e664c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartitionPushDownWhenListingPaths.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestPartitionPushDownWhenListingPaths.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.hudi.common.config.HoodieMetadataConfig
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSqlConf.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestSqlConf.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSqlConf.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestSqlConf.scala
index dbf6d173865e..26b21e95437b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSqlConf.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/TestSqlConf.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.common
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -23,12 +23,11 @@ import org.apache.hudi.DataSourceReadOptions._
 import org.apache.hudi.common.config.DFSPropertiesConfiguration
 import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
+import org.scalatest.BeforeAndAfter
 
 import java.io.File
 import java.nio.file.{Files, Paths}
 
-import org.scalatest.BeforeAndAfter
-
 class TestSqlConf extends HoodieSparkSqlTestBase with BeforeAndAfter {
 
   def setEnv(key: String, value: String): String = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTable.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTable.scala
index b3cd9e497f55..268f5a87bc16 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTable.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.ddl
 
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.junit.jupiter.api.Assertions.assertFalse
 
 import scala.collection.JavaConverters._
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableAddPartition.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableAddPartition.scala
similarity index 97%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableAddPartition.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableAddPartition.scala
index 68c6c115448e..3ad6f113a827 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableAddPartition.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableAddPartition.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.ddl
+
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestAlterTableAddPartition extends HoodieSparkSqlTestBase {
 
   test("Add partition for non-partitioned table") {
-    withTable(generateTableName){ tableName =>
+    withTable(generateTableName) { tableName =>
       // create table
       spark.sql(
         s"""
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableDropPartition.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableDropPartition.scala
index a032f0c3081c..fc9c2c55b7b8 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestAlterTableDropPartition.scala
@@ -15,19 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.ddl
 
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.avro.model.{HoodieCleanMetadata, HoodieCleanPartitionMetadata}
 import org.apache.hudi.common.model.{HoodieCleaningPolicy, HoodieCommitMetadata}
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant}
+import org.apache.hudi.common.table.timeline.HoodieInstant
 import org.apache.hudi.common.util.{PartitionPathEncodeUtils, StringUtils, Option => HOption}
 import org.apache.hudi.config.{HoodieCleanConfig, HoodieWriteConfig}
 import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
 import org.apache.hudi.{HoodieCLIUtils, HoodieSparkUtils}
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.getLastCleanMetadata
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.getLastCleanMetadata
 import org.junit.jupiter.api.Assertions
 import org.junit.jupiter.api.Assertions.assertTrue
 
@@ -621,7 +622,7 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
   }
 
   test("Test drop partition with wildcards") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
         spark.sql(
@@ -653,6 +654,6 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
           Seq("2023-09-01")
         )
       }
-    })
+    }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
index 36e585eef98f..5e6beb2ef00b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.ddl
 
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.HoodieSparkUtils
@@ -28,7 +28,9 @@ import org.apache.hudi.keygen.constant.KeyGeneratorType
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.getLastCommitMetadata
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.getLastCommitMetadata
 import org.apache.spark.sql.types._
 import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue}
 
@@ -535,9 +537,10 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
             )(table.schema.fields)
 
             // Should not include non.hoodie.property
-            assertResult(2)(table.properties.size)
+            assertResult(3)(table.properties.size)
             assertResult("cow")(table.properties("type"))
             assertResult("id,name")(table.properties("primaryKey"))
+            assertResult("hudi")(table.properties("provider"))
           }
         }
       }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestSpark3DDL.scala
similarity index 92%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestSpark3DDL.scala
index 6a64c69021c8..5e43d714a5ec 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestSpark3DDL.scala
@@ -15,21 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.ddl
 
 import org.apache.hadoop.fs.Path
-import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD_OPT_KEY, PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY, SPARK_SQL_INSERT_INTO_OPERATION, TABLE_NAME}
-import org.apache.hudi.QuickstartUtils.{DataGenerator, convertToStringList, getQuickstartWriteConfigs}
 import org.apache.hudi.common.config.HoodieStorageConfig
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, RawTripTestPayload}
 import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex
 import org.apache.hudi.testutils.DataSourceTestUtils
-import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkRecordMerger, HoodieSparkUtils}
+import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkRecordMerger, HoodieSparkUtils, QuickstartUtils}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.functions.{arrays_zip, col, expr, lit}
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 
@@ -75,7 +76,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
         val tableName = generateTableName
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
         if (HoodieSparkUtils.gteqSpark3_1) {
-          spark.sql("set " + SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
+          spark.sql("set " + DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
           spark.sql("set hoodie.schema.on.read.enable=true")
           // NOTE: This is required since as this tests use type coercions which were only permitted in Spark 2.x
           //       and are disallowed now by default in Spark 3.x
@@ -136,14 +137,14 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           )
           spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true)
           spark.sessionState.catalog.refreshTable(TableIdentifier(tableName))
-          spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)
+          spark.sessionState.conf.unsetConf(DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key)
         }
       }
     })
   }
 
   test("Test alter column types 2") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
@@ -176,7 +177,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           )
         }
       }
-    })
+    }
   }
 
   test("Test Enable and Disable Schema on read") {
@@ -232,7 +233,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
   }
 
   test("Test alter table properties and add rename drop column") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
@@ -242,7 +243,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
 
         if (HoodieSparkUtils.gteqSpark3_1) {
           spark.sql("set hoodie.schema.on.read.enable=true")
-          spark.sql("set " + SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
+          spark.sql("set " + DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
           // NOTE: This is required since as this tests use type coercions which were only permitted in Spark 2.x
           //       and are disallowed now by default in Spark 3.x
           spark.sql("set spark.sql.storeAssignmentPolicy=legacy")
@@ -335,8 +336,8 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 6 or id = 2 or id = 11 order by id").show(false)
         }
       }
-      spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)
-    })
+      spark.sessionState.conf.unsetConf(DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key)
+    }
   }
 
   test("Test Chinese table ") {
@@ -346,7 +347,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
         if (HoodieSparkUtils.gteqSpark3_1) {
           spark.sql("set hoodie.schema.on.read.enable=true")
-          spark.sql("set " + SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
+          spark.sql("set " + DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert")
           spark.sql(
             s"""
                |create table $tableName (
@@ -387,13 +388,13 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           )
         }
       }
-      spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)
+      spark.sessionState.conf.unsetConf(DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION.key)
     })
   }
 
 
   test("Test alter column by add rename and drop") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
@@ -453,7 +454,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           validateInternalSchema(tablePath, isDropColumn = false, currentMaxColumnId = maxColumnId)
         }
       }
-    })
+    }
   }
 
   private def validateInternalSchema(basePath: String, isDropColumn: Boolean, currentMaxColumnId: Int): Unit = {
@@ -543,8 +544,8 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
   }
 
   test("Test alter column with complex schema") {
-    withRecordType()(withTempDir { tmp =>
-      withSQLConf(s"$SPARK_SQL_INSERT_INTO_OPERATION" -> "upsert",
+    withTempDir { tmp =>
+      withSQLConf(s"${DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION}" -> "upsert",
         "hoodie.schema.on.read.enable" -> "true",
         "spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") {
         val tableName = generateTableName
@@ -628,7 +629,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
           )
         }
       }
-    })
+    }
   }
 
   test("Test schema auto evolution complex") {
@@ -711,36 +712,38 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
         val tableName = generateTableName
         val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
         if (HoodieSparkUtils.gteqSpark3_1) {
-          val dataGen = new DataGenerator
-          val inserts = convertToStringList(dataGen.generateInserts(10))
+          val dataGen = new QuickstartUtils.DataGenerator
+          val inserts = QuickstartUtils.convertToStringList(dataGen.generateInserts(10))
           val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
+            .withColumn("ts", lit("20240404000000")) // to make test determinate for HOODIE_AVRO_DEFAULT payload
           df.write.format("hudi").
-            options(getQuickstartWriteConfigs).
+            options(QuickstartUtils.getQuickstartWriteConfigs).
             option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, tableType).
-            option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-            option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-            option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
+            option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+            option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "uuid").
+            option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
             option("hoodie.schema.on.read.enable","true").
-            option(TABLE_NAME.key(), tableName).
+            option(DataSourceWriteOptions.TABLE_NAME.key(), tableName).
             option("hoodie.table.name", tableName).
             mode("overwrite").
             save(tablePath)
 
-          val updates = convertToStringList(dataGen.generateUpdates(10))
+          val updates = QuickstartUtils.convertToStringList(dataGen.generateUpdates(10))
           // type change: fare (double -> String)
           // add new column and drop a column
           val dfUpdate = spark.read.json(spark.sparkContext.parallelize(updates, 2))
             .withColumn("fare", expr("cast(fare as string)"))
             .withColumn("addColumn", lit("new"))
+            .withColumn("ts", lit("20240404000005")) // to make test determinate for HOODIE_AVRO_DEFAULT payload
           dfUpdate.drop("begin_lat").write.format("hudi").
-            options(getQuickstartWriteConfigs).
+            options(QuickstartUtils.getQuickstartWriteConfigs).
             option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, tableType).
-            option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-            option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-            option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
+            option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+            option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "uuid").
+            option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
             option("hoodie.schema.on.read.enable","true").
             option("hoodie.datasource.write.reconcile.schema","true").
-            option(TABLE_NAME.key(), tableName).
+            option(DataSourceWriteOptions.TABLE_NAME.key(), tableName).
             option("hoodie.table.name", tableName).
             mode("append").
             save(tablePath)
@@ -758,35 +761,38 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
 
           spark.sql(s"select * from  hudi_trips_snapshot").show(false)
           //  test insert_over_write  + update again
-          val overwrite = convertToStringList(dataGen.generateInserts(10))
+          val overwrite = QuickstartUtils.convertToStringList(dataGen.generateInserts(10))
           val dfOverWrite = spark.
             read.json(spark.sparkContext.parallelize(overwrite, 2)).
             filter("partitionpath = 'americas/united_states/san_francisco'")
+            .withColumn("ts", lit("20240404000010")) // to make test determinate for HOODIE_AVRO_DEFAULT payload
             .withColumn("fare", expr("cast(fare as string)")) // fare now in table is string type, we forbid convert string to double.
           dfOverWrite.write.format("hudi").
-            options(getQuickstartWriteConfigs).
+            options(QuickstartUtils.getQuickstartWriteConfigs).
             option("hoodie.datasource.write.operation","insert_overwrite").
-            option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-            option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-            option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
+            option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+            option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "uuid").
+            option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
             option("hoodie.schema.on.read.enable","true").
             option("hoodie.datasource.write.reconcile.schema","true").
-            option(TABLE_NAME.key(), tableName).
+            option(DataSourceWriteOptions.TABLE_NAME.key(), tableName).
             option("hoodie.table.name", tableName).
             mode("append").
             save(tablePath)
           spark.read.format("hudi").load(tablePath).show(false)
 
-          val updatesAgain = convertToStringList(dataGen.generateUpdates(10))
-          val dfAgain = spark.read.json(spark.sparkContext.parallelize(updatesAgain, 2)).withColumn("fare", expr("cast(fare as string)"))
+          val updatesAgain = QuickstartUtils.convertToStringList(dataGen.generateUpdates(10))
+          val dfAgain = spark.read.json(spark.sparkContext.parallelize(updatesAgain, 2)).
+            withColumn("fare", expr("cast(fare as string)")).
+            withColumn("ts", lit("20240404000015")) // to make test determinate for HOODIE_AVRO_DEFAULT payload
           dfAgain.write.format("hudi").
-            options(getQuickstartWriteConfigs).
-            option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-            option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-            option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
+            options(QuickstartUtils.getQuickstartWriteConfigs).
+            option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+            option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "uuid").
+            option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
             option("hoodie.schema.on.read.enable","true").
             option("hoodie.datasource.write.reconcile.schema","true").
-            option(TABLE_NAME.key(), tableName).
+            option(DataSourceWriteOptions.TABLE_NAME.key(), tableName).
             option("hoodie.table.name", tableName).
             mode("append").
             save(tablePath)
@@ -880,6 +886,9 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
 
           // Not checking answer as this is an unsafe casting operation, just need to make sure that error is not thrown
           spark.sql(s"select id, name, cast(price as string), ts from $tableName")
+
+          // clear after using INMEMORY index
+          HoodieInMemoryHashIndex.clear()
         }
       }
     }
@@ -945,6 +954,9 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
             Seq(11, "a11", "-10.04", 1000),
             Seq(12, "a12", "-10.04", 1000)
           )
+
+          // clear after using INMEMORY index
+          HoodieInMemoryHashIndex.clear()
         }
       }
     }
@@ -1010,6 +1022,9 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase {
             Seq(11, "a11", "-10.04", 1000),
             Seq(12, "a12", "-10.04", 1000)
           )
+
+          // clear after using INMEMORY index
+          HoodieInMemoryHashIndex.clear()
         }
       }
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCDCForSparkSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCDCForSparkSQL.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCDCForSparkSQL.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCDCForSparkSQL.scala
index a799ce8f787d..59f9eed83b0a 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCDCForSparkSQL.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCDCForSparkSQL.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceReadOptions._
 import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION
@@ -23,6 +23,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode.{DATA_BEFORE, DATA_BEFORE_AFTER, OP_KEY_ONLY}
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.junit.jupiter.api.Assertions.assertEquals
 
 class TestCDCForSparkSQL extends HoodieSparkSqlTestBase {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCompactionTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCompactionTable.scala
similarity index 97%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCompactionTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCompactionTable.scala
index 568e3569725c..31948c3298da 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCompactionTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestCompactionTable.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
+
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestCompactionTable extends HoodieSparkSqlTestBase {
 
   test("Test compaction table") {
-    withRecordType()(withTempDir {tmp =>
+    withRecordType()(withTempDir { tmp =>
       val tableName = generateTableName
       spark.sql(
         s"""
@@ -75,7 +77,7 @@ class TestCompactionTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test compaction path") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val tableName = generateTableName
       spark.sql(
         s"""
@@ -132,6 +134,6 @@ class TestCompactionTable extends HoodieSparkSqlTestBase {
       checkException(s"run compaction on '${tmp.getCanonicalPath}' at 12345")(
         s"specific 12345 instants is not exist"
       )
-    })
+    }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
index 1ac7185f642d..23255b763ff3 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
@@ -17,7 +17,9 @@
  * under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
+
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestDataSkippingQuery extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteFromTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteFromTable.scala
similarity index 96%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteFromTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteFromTable.scala
index e3ea01730222..b289ce74646c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteFromTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteFromTable.scala
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
+
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestDeleteFromTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteTable.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteTable.scala
index bc87405b9f91..b9cafb6ec079 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDeleteTable.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.HoodieSparkUtils.isSpark2
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestDeleteTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDropTable.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDropTable.scala
index 0781fc6af06f..743abc5b2fd0 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDropTable.scala
@@ -15,15 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
-import org.apache.hudi.common.fs.FSUtils
+import org.apache.hadoop.fs.Path
 import org.apache.hudi.hadoop.fs.HadoopFSUtils
-
-import org.apache.hadoop.fs.{LocalFileSystem, Path}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestDropTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieTableValuedFunction.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieTableValuedFunction.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
index bdf512d3451a..44e5f26aa580 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieTableValuedFunction.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
@@ -15,13 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.spark.sql.functions.{col, from_json}
-
-import scala.collection.Seq
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestHoodieTableValuedFunction extends HoodieSparkSqlTestBase {
 
@@ -450,6 +449,7 @@ class TestHoodieTableValuedFunction extends HoodieSparkSqlTestBase {
                |""".stripMargin
           )
 
+          spark.sql("set hoodie.merge.allow.duplicate.on.inserts = false")
           spark.sql(
             s"""
                | insert into $tableName
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestInsertTable.scala
similarity index 92%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestInsertTable.scala
index 21369ea34e0c..47a718cff735 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestInsertTable.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.client.common.HoodieSparkEngineContext
@@ -31,8 +31,10 @@ import org.apache.hudi.index.HoodieIndex.IndexType
 import org.apache.hudi.{DataSourceWriteOptions, HoodieCLIUtils, HoodieSparkUtils}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted}
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.getLastCommitMetadata
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
 import org.apache.spark.sql.hudi.command.HoodieSparkValidateDuplicateKeyRecordMerger
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.getLastCommitMetadata
 import org.junit.jupiter.api.Assertions.assertEquals
 
 import java.io.File
@@ -40,6 +42,116 @@ import java.util.concurrent.CountDownLatch
 
 class TestInsertTable extends HoodieSparkSqlTestBase {
 
+  test("Test table type name incase-sensitive test") {
+    withTempDir { tmp =>
+      val targetTable = generateTableName
+      val tablePath = s"${tmp.getCanonicalPath}/$targetTable"
+
+      spark.sql(
+        s"""
+           |create table ${targetTable} (
+           |  `id` string,
+           |  `name` string,
+           |  `dt` bigint,
+           |  `day` STRING,
+           |  `hour` INT
+           |) using hudi
+           |tblproperties (
+           |  'primaryKey' = 'id',
+           |  'type' = 'MOR',
+           |  'preCombineField'='dt',
+           |  'hoodie.index.type' = 'BUCKET',
+           |  'hoodie.bucket.index.hash.field' = 'id',
+           |  'hoodie.bucket.index.num.buckets'=512
+           | )
+             partitioned by (`day`,`hour`)
+             location '${tablePath}'
+             """.stripMargin)
+
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'aa' as name, 123 as dt, '2024-02-19' as `day`, 10 as `hour`
+           |""".stripMargin)
+
+      spark.sql(
+        s"""
+           |merge into ${targetTable} as target
+           |using (
+           |select '2' as id, 'bb' as name, 456 as dt, '2024-02-19' as `day`, 10 as `hour`
+           |) as source
+           |on target.id = source.id
+           |when matched then update set *
+           |when not matched then insert *
+           |""".stripMargin
+      )
+
+      // check result after insert and merge data into target table
+      checkAnswer(s"select id, name, dt, day, hour from $targetTable limit 10")(
+        Seq("1", "aa", 123, "2024-02-19", 10),
+        Seq("2", "bb", 456, "2024-02-19", 10)
+      )
+    }
+  }
+
+  test("Test FirstValueAvroPayload test") {
+    withTempDir { tmp =>
+      val targetTable = generateTableName
+      val tablePath = s"${tmp.getCanonicalPath}/$targetTable"
+
+      spark.sql(
+        s"""
+           |create table ${targetTable} (
+           |  `id` string,
+           |  `name` string,
+           |  `dt` bigint,
+           |  `day` STRING,
+           |  `hour` INT
+           |) using hudi
+           |tblproperties (
+           |  'primaryKey' = 'id',
+           |  'type' = 'mor',
+           |  'preCombineField'='dt',
+           |  'hoodie.index.type' = 'BUCKET',
+           |  'hoodie.bucket.index.hash.field' = 'id',
+           |  'hoodie.bucket.index.num.buckets'=12,
+           |  'hoodie.datasource.write.payload.class'='org.apache.hudi.common.model.FirstValueAvroPayload'
+           | )
+           partitioned by (`day`,`hour`)
+           location '${tablePath}'
+           """.stripMargin)
+
+      spark.sql("set hoodie.file.group.reader.enabled=false")
+
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'aa' as name, 123 as dt, '2024-02-19' as `day`, 10 as `hour`
+           |""".stripMargin)
+
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'bb' as name, 123 as dt, '2024-02-19' as `day`, 10 as `hour`
+           |""".stripMargin)
+
+      checkAnswer(s"select id, name, dt, day, hour from $targetTable limit 10")(
+        Seq("1", "aa", 123, "2024-02-19", 10)
+      )
+
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'cc' as name, 124 as dt, '2024-02-19' as `day`, 10 as `hour`
+           |""".stripMargin)
+
+      checkAnswer(s"select id, name, dt, day, hour from $targetTable limit 10")(
+        Seq("1", "cc", 124, "2024-02-19", 10)
+      )
+
+    }
+  }
+
   test("Test Insert Into with values") {
     withRecordType()(withTempDir { tmp =>
       val tableName = generateTableName
@@ -76,7 +188,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test Insert Into with static partition") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val tableName = generateTableName
       // Create a partitioned table
       spark.sql(
@@ -125,11 +237,11 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         Seq(2, "a2", 20.0, 2000, "2021-01-06"),
         Seq(3, "a3", 30.0, 3000, "2021-01-07")
       )
-    })
+    }
   }
 
   test("Test Insert Into with dynamic partition") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val tableName = generateTableName
       // Create a partitioned table
       spark.sql(
@@ -179,7 +291,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         Seq(2, "a2", 20.0, 2000, "2021-01-06"),
         Seq(3, "a3", 30.0, 3000, "2021-01-07")
       )
-    })
+    }
   }
 
   test("Test Insert Into with multi partition") {
@@ -340,6 +452,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         Seq(2, "a2", 12.0)
       )
 
+      spark.sql("set hoodie.merge.allow.duplicate.on.inserts = false")
       assertThrows[HoodieDuplicateKeyException] {
         try {
           spark.sql(s"insert into $tableName select 1, 'a1', 10")
@@ -359,7 +472,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test Insert Overwrite") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         withTable(generateTableName) { tableName =>
           // Create a partitioned table
@@ -504,7 +617,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           )
         }
       }
-    })
+    }
   }
 
   test("Test insert overwrite for multi partitioned table") {
@@ -606,19 +719,19 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test Different Type of Partition Column") {
-   withRecordType()(withTempDir { tmp =>
-     val typeAndValue = Seq(
-       ("string", "'1000'"),
-       ("int", 1000),
-       ("bigint", 10000),
-       ("timestamp", "TIMESTAMP'2021-05-20 00:00:00'"),
-       ("date", "DATE'2021-05-20'")
-     )
-     typeAndValue.foreach { case (partitionType, partitionValue) =>
-       val tableName = generateTableName
-       validateDifferentTypesOfPartitionColumn(tmp, partitionType, partitionValue, tableName)
-     }
-   })
+    withTempDir { tmp =>
+      val typeAndValue = Seq(
+        ("string", "'1000'"),
+        ("int", 1000),
+        ("bigint", 10000),
+        ("timestamp", "TIMESTAMP'2021-05-20 00:00:00'"),
+        ("date", "DATE'2021-05-20'")
+      )
+      typeAndValue.foreach { case (partitionType, partitionValue) =>
+        val tableName = generateTableName
+        validateDifferentTypesOfPartitionColumn(tmp, partitionType, partitionValue, tableName)
+      }
+    }
   }
 
   test("Test TimestampType Partition Column With Consistent Logical Timestamp Enabled") {
@@ -636,7 +749,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test insert for uppercase table name") {
-    withRecordType()(withTempDir{ tmp =>
+    withTempDir { tmp =>
       val tableName = s"H_$generateTableName"
       if (HoodieSparkUtils.gteqSpark3_5) {
         // [SPARK-44284] Spark 3.5+ requires conf below to be case sensitive
@@ -663,84 +776,82 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         .setConf(spark.sessionState.newHadoopConf())
         .build()
       assertResult(tableName)(metaClient.getTableConfig.getTableName)
-    })
+    }
   }
 
   test("Test Insert Exception") {
-    withRecordType() {
-      val tableName = generateTableName
+    val tableName = generateTableName
+    spark.sql(
+      s"""
+         |create table $tableName (
+         |  id int,
+         |  name string,
+         |  price double,
+         |  dt string
+         |) using hudi
+         | tblproperties (primaryKey = 'id')
+         | partitioned by (dt)
+     """.stripMargin)
+    val tooManyDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) {
+      s"""
+         |[INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is too many data columns:
+         |Table columns: `id`, `name`, `price`.
+         |Data columns: `1`, `a1`, `10`, `2021-06-20`.
+         |""".stripMargin
+    } else if (HoodieSparkUtils.gteqSpark3_4) {
+      """
+        |too many data columns:
+        |Table columns: 'id', 'name', 'price'.
+        |Data columns: '1', 'a1', '10', '2021-06-20'.
+        |""".stripMargin
+    } else {
+      """
+        |too many data columns:
+        |Table columns: 'id', 'name', 'price'
+        |Data columns: '1', 'a1', '10', '2021-06-20'
+        |""".stripMargin
+    }
+    checkExceptionContain(s"insert into $tableName partition(dt = '2021-06-20') select 1, 'a1', 10, '2021-06-20'")(
+      tooManyDataColumnsErrorMsg)
+
+    val notEnoughDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) {
+      s"""
+         |[INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is not enough data columns:
+         |Table columns: `id`, `name`, `price`, `dt`.
+         |Data columns: `1`, `a1`, `10`.
+         |""".stripMargin
+    } else if (HoodieSparkUtils.gteqSpark3_4) {
+      """
+        |not enough data columns:
+        |Table columns: 'id', 'name', 'price', 'dt'.
+        |Data columns: '1', 'a1', '10'.
+        |""".stripMargin
+    } else {
+      """
+        |not enough data columns:
+        |Table columns: 'id', 'name', 'price', 'dt'
+        |Data columns: '1', 'a1', '10'
+        |""".stripMargin
+    }
+    checkExceptionContain(s"insert into $tableName select 1, 'a1', 10")(notEnoughDataColumnsErrorMsg)
+    withSQLConf("hoodie.sql.bulk.insert.enable" -> "true", "hoodie.sql.insert.mode" -> "strict") {
+      val tableName2 = generateTableName
       spark.sql(
         s"""
-           |create table $tableName (
+           |create table $tableName2 (
            |  id int,
            |  name string,
            |  price double,
-           |  dt string
+           |  ts long
            |) using hudi
-           | tblproperties (primaryKey = 'id')
-           | partitioned by (dt)
-       """.stripMargin)
-      val tooManyDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) {
-        s"""
-          |[INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is too many data columns:
-          |Table columns: `id`, `name`, `price`.
-          |Data columns: `1`, `a1`, `10`, `2021-06-20`.
-          |""".stripMargin
-      } else if (HoodieSparkUtils.gteqSpark3_4) {
-        """
-          |too many data columns:
-          |Table columns: 'id', 'name', 'price'.
-          |Data columns: '1', 'a1', '10', '2021-06-20'.
-          |""".stripMargin
-      } else {
-        """
-          |too many data columns:
-          |Table columns: 'id', 'name', 'price'
-          |Data columns: '1', 'a1', '10', '2021-06-20'
-          |""".stripMargin
-      }
-      checkExceptionContain(s"insert into $tableName partition(dt = '2021-06-20') select 1, 'a1', 10, '2021-06-20'")(
-        tooManyDataColumnsErrorMsg)
-
-      val notEnoughDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) {
-        s"""
-          |[INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is not enough data columns:
-          |Table columns: `id`, `name`, `price`, `dt`.
-          |Data columns: `1`, `a1`, `10`.
-          |""".stripMargin
-      } else if (HoodieSparkUtils.gteqSpark3_4) {
-        """
-          |not enough data columns:
-          |Table columns: 'id', 'name', 'price', 'dt'.
-          |Data columns: '1', 'a1', '10'.
-          |""".stripMargin
-      } else {
-        """
-          |not enough data columns:
-          |Table columns: 'id', 'name', 'price', 'dt'
-          |Data columns: '1', 'a1', '10'
-          |""".stripMargin
-      }
-      checkExceptionContain(s"insert into $tableName select 1, 'a1', 10")(notEnoughDataColumnsErrorMsg)
-      withSQLConf("hoodie.sql.bulk.insert.enable" -> "true", "hoodie.sql.insert.mode" -> "strict") {
-        val tableName2 = generateTableName
-        spark.sql(
-          s"""
-             |create table $tableName2 (
-             |  id int,
-             |  name string,
-             |  price double,
-             |  ts long
-             |) using hudi
-             | tblproperties (
-             |   primaryKey = 'id',
-             |   preCombineField = 'ts'
-             | )
-          """.stripMargin)
-        checkException(s"insert into $tableName2 values(1, 'a1', 10, 1000)")(
-          "Table with primaryKey can not use bulk insert in strict mode."
-        )
-      }
+           | tblproperties (
+           |   primaryKey = 'id',
+           |   preCombineField = 'ts'
+           | )
+        """.stripMargin)
+      checkException(s"insert into $tableName2 values(1, 'a1', 10, 1000)")(
+        "Table with primaryKey can not use bulk insert in strict mode."
+      )
     }
   }
 
@@ -773,8 +884,8 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
 
   test("Test bulk insert with insert into for single partitioned table") {
     withSQLConf("hoodie.sql.insert.mode" -> "non-strict") {
-      withRecordType()(withTempDir { tmp =>
-        Seq("cow", "mor").foreach {tableType =>
+      withTempDir { tmp =>
+        Seq("cow", "mor").foreach { tableType =>
           withTable(generateTableName) { tableName =>
             spark.sql(
               s"""
@@ -817,7 +928,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             )
           }
         }
-      })
+      }
     }
   }
 
@@ -906,7 +1017,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   test("Test bulk insert with CTAS") {
     withSQLConf("hoodie.sql.insert.mode" -> "non-strict",
       "hoodie.sql.bulk.insert.enable" -> "true") {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         Seq("cow", "mor").foreach { tableType =>
           withTable(generateTableName) { inputTable =>
             spark.sql(
@@ -948,13 +1059,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             }
           }
         }
-      })
+      }
     }
   }
 
   test("Test bulk insert with empty dataset") {
     withSQLConf(SPARK_SQL_INSERT_INTO_OPERATION.key -> WriteOperationType.BULK_INSERT.value()) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         Seq("cow", "mor").foreach { tableType =>
           withTable(generateTableName) { inputTable =>
             spark.sql(
@@ -992,7 +1103,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             }
           }
         }
-      })
+      }
     }
   }
 
@@ -1004,7 +1115,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         Array()
       }
       withSQLConf(bulkInsertConf: _*) {
-        withRecordType()(withTempDir { tmp =>
+        withTempDir { tmp =>
           Seq("cow", "mor").foreach { tableType =>
             withTable(generateTableName) { inputTable =>
               spark.sql(
@@ -1048,14 +1159,14 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
               }
             }
           }
-        })
+        }
       }
     }
   }
 
   test("Test bulk insert with insert overwrite table") {
     withSQLConf(SPARK_SQL_INSERT_INTO_OPERATION.key -> WriteOperationType.BULK_INSERT.value()) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         Seq("cow", "mor").foreach { tableType =>
           withTable(generateTableName) { nonPartitionedTable =>
             spark.sql(
@@ -1082,13 +1193,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             }
           }
         }
-      })
+      }
     }
   }
 
   test("Test bulk insert with insert overwrite partition") {
     withSQLConf(SPARK_SQL_INSERT_INTO_OPERATION.key -> WriteOperationType.BULK_INSERT.value()) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         Seq("cow", "mor").foreach { tableType =>
           withTable(generateTableName) { partitionedTable =>
             spark.sql(
@@ -1129,12 +1240,12 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             }
           }
         }
-      })
+      }
     }
   }
 
   test("Test combine before insert") {
-    withSQLConf("hoodie.sql.bulk.insert.enable" -> "false") {
+    withSQLConf("hoodie.sql.bulk.insert.enable" -> "false", "hoodie.merge.allow.duplicate.on.inserts" -> "false") {
       withRecordType()(withTempDir{tmp =>
         val tableName = generateTableName
         spark.sql(
@@ -1305,7 +1416,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
 
   test("Test Insert Into With Catalog Identifier for spark >= 3.2.0") {
     Seq("hudi", "parquet").foreach { format =>
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         val tableName = s"spark_catalog.default.$generateTableName"
         // Create a partitioned table
         if (HoodieSparkUtils.gteqSpark3_2) {
@@ -1342,7 +1453,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             Seq(2, "a2", 10.0, 1000, "2021-01-05")
           )
         }
-      })
+      }
     }
   }
 
@@ -1448,6 +1559,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         Seq(3, "a3", 30.0, 3000, "2021-01-07")
       )
 
+      spark.sql("set hoodie.merge.allow.duplicate.on.inserts = false")
       spark.sql(
         s"""
            | insert into $tableName values
@@ -1612,7 +1724,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   test("Test Insert Overwrite Into Bucket Index Table") {
     withSQLConf("hoodie.sql.bulk.insert.enable" -> "false") {
       Seq("mor", "cow").foreach { tableType =>
-        withRecordType()(withTempDir { tmp =>
+        withTempDir { tmp =>
           val tableName = generateTableName
           // Create a partitioned table
           spark.sql(
@@ -1657,14 +1769,14 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           checkAnswer(s"select id, name, price, ts, dt from $tableName order by dt")(
             Seq(13, "a2", 12.0, 1000, "2021-01-05")
           )
-        })
+        }
       }
     }
   }
 
   test("Test Insert Overwrite Into Consistent Bucket Index Table") {
     withSQLConf("hoodie.sql.bulk.insert.enable" -> "false") {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         val tableName = generateTableName
         // Create a partitioned table
         spark.sql(
@@ -1717,13 +1829,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
         checkAnswer(s"select id, name, price, ts, dt from $tableName order by dt")(
           Seq(13, "a3", 12.0, 1000, "2021-01-05")
         )
-      })
+      }
     }
   }
 
   test("Test Hudi should not record empty preCombineKey in hoodie.properties") {
     withSQLConf("hoodie.datasource.write.operation" -> "insert") {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         val tableName = generateTableName
         spark.sql(
           s"""
@@ -1751,7 +1863,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           Seq(2, "name2", 12.0),
           Seq(3, "name3", 13.0)
         )
-      })
+      }
     }
   }
 
@@ -1953,17 +2065,17 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     spark.sessionState.conf.unsetConf("hoodie.sql.insert.mode")
     spark.sessionState.conf.unsetConf("hoodie.datasource.insert.dup.policy")
     spark.sessionState.conf.unsetConf("hoodie.datasource.write.operation")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         withTable(generateTableName) { tableName =>
           ingestAndValidateData(tableType, tableName, tmp, WriteOperationType.UPSERT)
         }
       }
-    })
+    }
   }
 
   test("Test sql write operation with INSERT_INTO override both strict mode and sql write operation") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         Seq(WriteOperationType.INSERT, WriteOperationType.BULK_INSERT, WriteOperationType.UPSERT).foreach { operation =>
           withTable(generateTableName) { tableName =>
@@ -1972,11 +2084,11 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           }
         }
       }
-    })
+    }
   }
 
   test("Test sql write operation with INSERT_INTO override only sql write operation") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         Seq(WriteOperationType.INSERT, WriteOperationType.BULK_INSERT, WriteOperationType.UPSERT).foreach { operation =>
           withTable(generateTableName) { tableName =>
@@ -1985,7 +2097,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           }
         }
       }
-    })
+    }
   }
 
   test("Test sql write operation with INSERT_INTO override only strict mode") {
@@ -1994,14 +2106,14 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     spark.sessionState.conf.unsetConf(DataSourceWriteOptions.INSERT_DUP_POLICY.key())
     spark.sessionState.conf.unsetConf("hoodie.datasource.write.operation")
     spark.sessionState.conf.unsetConf("hoodie.sql.bulk.insert.enable")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         withTable(generateTableName) { tableName =>
           ingestAndValidateData(tableType, tableName, tmp, WriteOperationType.UPSERT,
             List("set hoodie.sql.insert.mode = upsert"))
         }
       }
-    })
+    }
   }
 
   def ingestAndValidateData(tableType: String, tableName: String, tmp: File,
@@ -2075,13 +2187,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     spark.sessionState.conf.unsetConf("hoodie.sql.insert.mode")
     spark.sessionState.conf.unsetConf("hoodie.datasource.insert.dup.policy")
     spark.sessionState.conf.unsetConf("hoodie.datasource.write.operation")
-    withRecordType()(withTempDir { tmp =>
-      Seq("cow","mor").foreach { tableType =>
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
         withTable(generateTableName) { tableName =>
           ingestAndValidateDataNoPrecombine(tableType, tableName, tmp, WriteOperationType.INSERT)
         }
       }
-    })
+    }
   }
 
   var listenerCallCount: Int = 0
@@ -2101,7 +2213,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
 
   test("Test multiple partition fields pruning") {
 
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val targetTable = generateTableName
       countDownLatch = new CountDownLatch(1)
       listenerCallCount = 0
@@ -2148,12 +2260,12 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
       assertResult(1)(rddHead.partitions.size)
       countDownLatch.await
       assert(listenerCallCount >= 1)
-    })
+    }
   }
 
   test("Test single partiton field pruning") {
 
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       countDownLatch = new CountDownLatch(1)
       listenerCallCount = 0
       val targetTable = generateTableName
@@ -2200,11 +2312,11 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
       assertResult(1)(rddHead.partitions.size)
       countDownLatch.await
       assert(listenerCallCount >= 1)
-    })
+    }
   }
 
   test("Test inaccurate index type") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val targetTable = generateTableName
 
       assertThrows[IllegalArgumentException] {
@@ -2232,7 +2344,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
                |""".stripMargin)
         }
       }
-    })
+    }
   }
 
   test("Test vectorized read nested columns for LegacyHoodieParquetFileFormat") {
@@ -2338,7 +2450,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test insert dup policy with INSERT_INTO explicit new configs INSERT operation ") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val operation = WriteOperationType.INSERT
         Seq(NONE_INSERT_DUP_POLICY, DROP_INSERT_DUP_POLICY).foreach { dupPolicy =>
@@ -2350,11 +2462,11 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
           }
         }
       }
-    })
+    }
   }
 
   test("Test insert dup policy with INSERT_INTO explicit new configs BULK_INSERT operation ") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       Seq("cow").foreach { tableType =>
         val operation = WriteOperationType.BULK_INSERT
         val dupPolicy = NONE_INSERT_DUP_POLICY
@@ -2365,7 +2477,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
             dupPolicy)
         }
       }
-    })
+    }
   }
 
   test("Test DROP insert dup policy with INSERT_INTO explicit new configs BULK INSERT operation") {
@@ -2432,7 +2544,6 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     })
   }
 
-
   def ingestAndValidateDataDupPolicy(tableType: String, tableName: String, tmp: File,
                                      expectedOperationtype: WriteOperationType = WriteOperationType.INSERT,
                                      setOptions: List[String] = List.empty,
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoLogOnlyTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoLogOnlyTable.scala
similarity index 97%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoLogOnlyTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoLogOnlyTable.scala
index 48ee872d4d95..d25b9752e35b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoLogOnlyTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoLogOnlyTable.scala
@@ -15,9 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.testutils.DataSourceTestUtils
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestMergeIntoLogOnlyTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable.scala
index a2111265bd64..d712e2df2597 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable.scala
@@ -15,14 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES
 import org.apache.hudi.config.HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT
-import org.apache.hudi.{DataSourceReadOptions, HoodieDataSourceHelpers, HoodieSparkUtils, ScalaAssertionSupport}
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.hadoop.fs.HadoopFSUtils
-
+import org.apache.hudi.{DataSourceReadOptions, HoodieDataSourceHelpers, HoodieSparkUtils, ScalaAssertionSupport}
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.apache.spark.sql.internal.SQLConf
 
 class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSupport {
@@ -131,7 +130,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
   test("Test MergeInto with more than once update actions for spark >= 3.1.x") {
 
     if (HoodieSparkUtils.gteqSpark3_1) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         val targetTable = generateTableName
         spark.sql(
           s"""
@@ -180,7 +179,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
         checkAnswer(s"select id, name, data, country, ts from $targetTable")(
           Seq(1, "lb", 5, "shu", 1646643196L)
         )
-      })
+      }
     }
   }
 
@@ -265,7 +264,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
 
   test("Test MergeInto for MOR table ") {
     spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName = generateTableName
       // Create a mor partitioned table.
@@ -393,12 +392,12 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
       checkAnswer(s"select id,name,price,dt from $tableName order by id")(
         Seq(1, "a1", 12, "2021-03-21")
       )
-    })
+    }
   }
 
   test("Test MergeInto with insert only") {
     spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       // Create a partitioned mor table
       val tableName = generateTableName
@@ -449,7 +448,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
         Seq(1, "a1", 10, "2021-03-21"),
         Seq(2, "a2", 10, "2021-03-20")
       )
-    })
+    }
   }
 
   test("Test MergeInto For PreCombineField") {
@@ -529,7 +528,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
 
   test("Test MergeInto with preCombine field expression") {
     spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       Seq("cow", "mor").foreach { tableType =>
         val tableName1 = generateTableName
@@ -607,11 +606,11 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
           Seq(1, "a1", 24, "2021-03-21", 1002)
         )
       }
-    })
+    }
   }
 
   test("Test MergeInto with primaryKey expression") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName1 = generateTableName
       spark.sql(
@@ -707,7 +706,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
       checkAnswer(s"select id,name,price,v,dt from $tableName1 order by id")(
         Seq(1, "a1", 10, 1000, "2021-03-21")
       )
-    })
+    }
   }
 
   test("Test MergeInto with combination of delete update insert") {
@@ -882,7 +881,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
   }
 
   test("Test Different Type of PreCombineField") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val typeAndValue = Seq(
         ("string", "'1000'"),
@@ -938,7 +937,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
           Seq(1, "a1", 20.0)
         )
       }
-    })
+    }
   }
 
   test("Test MergeInto For MOR With Compaction On") {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable2.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable2.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable2.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable2.scala
index b8f315575ddf..e38447692de3 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable2.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTable2.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.config.HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
 
@@ -144,7 +145,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
   }
 
   test("Test Merge Into CTAS Table") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName = generateTableName
       spark.sql(
@@ -176,7 +177,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
       checkAnswer(s"select id, name from $tableName")(
         Seq(1, "a1_1")
       )
-    })
+    }
   }
 
   test("Test Merge With Complex Data Type") {
@@ -245,7 +246,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
   }
 
   test("Test column name matching for insert * and update set *") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName = generateTableName
       // Create table
@@ -329,11 +330,11 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
         Seq(3, "a3", 102.0, 1000, "2021-05-05"),
         Seq(4, "a4", 100.0, 1000, "2021-05-06")
       )
-    })
+    }
   }
 
   test("Test MergeInto For Source Table With Column Aliases") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName = generateTableName
       // Create table
@@ -373,7 +374,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
           Seq(1, "a1", 10.0, 1000)
         )
       }
-    })
+    }
   }
 
   /* TODO [HUDI-6472]
@@ -559,7 +560,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
 */
 
   test("Test only insert when source table contains history") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       val tableName = generateTableName
       // Create table
@@ -601,7 +602,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
         Seq(1, "a1", 1.0, 10, "2022-08-18"),
         Seq(2, "a2", 10.0, 100, "2022-08-18")
       )
-    })
+    }
   }
 
   test("Test only insert when source table contains history and target table has multiple keys") {
@@ -653,7 +654,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
 
   test("Test Merge Into For Source Table With Different Column Order") {
     spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       val tableName = generateTableName
       // Create a mor partitioned table.
       spark.sql(
@@ -687,7 +688,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
       checkAnswer(s"select id,name,price,dt from $tableName")(
         Seq(1, "a1", 10, "2021-03-21")
       )
-    })
+    }
   }
 
   test("Test Merge into with String cast to Double") {
@@ -923,6 +924,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
            | partitioned by(dt)
            | location '${tmp.getCanonicalPath}'
      """.stripMargin)
+      spark.sql("set hoodie.merge.allow.duplicate.on.inserts = false")
 
       spark.sql(
         s"""
@@ -971,6 +973,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
            | partitioned by(dt)
            | location '${path1}'
          """.stripMargin)
+      spark.sql("set hoodie.merge.allow.duplicate.on.inserts = false")
 
       spark.sql(s"insert into $sourceTable values(1, 'a1', cast(3.01 as double), 11, '2022-09-26'),(2, 'a2', cast(3.02 as double), 12, '2022-09-27'),(3, 'a3', cast(3.03 as double), 13, '2022-09-28'),(4, 'a4', cast(3.04 as double), 14, '2022-09-29')")
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTableWithNonRecordKeyField.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTableWithNonRecordKeyField.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTableWithNonRecordKeyField.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTableWithNonRecordKeyField.scala
index 419bb43de43f..8e06995475b8 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTableWithNonRecordKeyField.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTableWithNonRecordKeyField.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES
 import org.apache.hudi.{HoodieSparkUtils, ScalaAssertionSupport}
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestMergeIntoTableWithNonRecordKeyField extends HoodieSparkSqlTestBase with ScalaAssertionSupport {
 
@@ -247,7 +248,7 @@ class TestMergeIntoTableWithNonRecordKeyField extends HoodieSparkSqlTestBase wit
 
   test("Test pkless multiple source match") {
     for (withPrecombine <- Seq(true, false)) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         spark.sql("set hoodie.payload.combined.schema.validate = true")
         val tableName = generateTableName
 
@@ -292,13 +293,13 @@ class TestMergeIntoTableWithNonRecordKeyField extends HoodieSparkSqlTestBase wit
             Seq(1, "a1", 30.0, 100)
           )
         }
-      })
+      }
     }
 
   }
 
   test("Test MergeInto Basic pkless") {
-    withRecordType()(withTempDir { tmp =>
+    withTempDir { tmp =>
       spark.sql("set hoodie.payload.combined.schema.validate = true")
       spark.sql(s"set ${SPARK_SQL_OPTIMIZED_WRITES.key()}=true")
       val tableName = generateTableName
@@ -385,6 +386,6 @@ class TestMergeIntoTableWithNonRecordKeyField extends HoodieSparkSqlTestBase wit
        """.stripMargin)
       val cnt = spark.sql(s"select * from $tableName where id = 1").count()
       assertResult(0)(cnt)
-    })
+    }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartialUpdateForMergeInto.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestPartialUpdateForMergeInto.scala
similarity index 87%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartialUpdateForMergeInto.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestPartialUpdateForMergeInto.scala
index 0c1ca31479d8..e4151bd7e950 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestPartialUpdateForMergeInto.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestPartialUpdateForMergeInto.scala
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.avro.Schema
-import org.apache.hudi.DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES
+import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.avro.HoodieAvroUtils
-import org.apache.hudi.common.config.HoodieReaderConfig.FILE_GROUP_READER_ENABLED
-import org.apache.hudi.common.config.HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT
+import org.apache.hudi.common.config.HoodieReaderConfig
+import org.apache.hudi.common.config.HoodieStorageConfig
 import org.apache.hudi.common.config.{HoodieCommonConfig, HoodieMetadataConfig}
 import org.apache.hudi.common.engine.HoodieLocalEngineContext
 import org.apache.hudi.common.function.SerializableFunctionUnchecked
@@ -31,10 +31,10 @@ import org.apache.hudi.common.table.log.HoodieLogFileReader
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType
 import org.apache.hudi.common.table.view.{FileSystemViewManager, FileSystemViewStorageConfig, SyncableFileSystemView}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
-import org.apache.hudi.common.testutils.HoodieTestUtils.{getDefaultHadoopConf, getLogFileListFromFileSlice}
-import org.apache.hudi.config.HoodieIndexConfig.INDEX_TYPE
-import org.apache.hudi.config.HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT
+import org.apache.hudi.common.testutils.HoodieTestUtils
+import org.apache.hudi.config.{HoodieIndexConfig, HoodieWriteConfig}
 import org.apache.hudi.metadata.HoodieTableMetadata
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
 
 import java.util.function.Predicate
@@ -71,9 +71,9 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
     withTempDir { tmp =>
       val tableName = generateTableName
       val basePath = tmp.getCanonicalPath + "/" + tableName
-      spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-      spark.sql(s"set ${ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
-      spark.sql(s"set ${FILE_GROUP_READER_ENABLED.key} = true")
+      spark.sql(s"set ${HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
+      spark.sql(s"set ${DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
+      spark.sql(s"set ${HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key} = true")
 
       // Create a table with five data fields
       spark.sql(
@@ -119,11 +119,11 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
     withTempDir { tmp =>
       val tableName = generateTableName
       val basePath = tmp.getCanonicalPath + "/" + tableName
-      spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-      spark.sql(s"set ${ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
-      spark.sql(s"set ${FILE_GROUP_READER_ENABLED.key} = true")
+      spark.sql(s"set ${HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
+      spark.sql(s"set ${DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
+      spark.sql(s"set ${HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key} = true")
       // Write inserts to log block
-      spark.sql(s"set ${INDEX_TYPE.key} = INMEMORY")
+      spark.sql(s"set ${HoodieIndexConfig.INDEX_TYPE.key} = INMEMORY")
 
       // Create a table with five data fields
       spark.sql(
@@ -176,10 +176,10 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
     withTempDir { tmp =>
       val tableName = generateTableName
       val basePath = tmp.getCanonicalPath + "/" + tableName
-      spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-      spark.sql(s"set ${ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
-      spark.sql(s"set ${LOGFILE_DATA_BLOCK_FORMAT.key} = $logDataBlockFormat")
-      spark.sql(s"set ${FILE_GROUP_READER_ENABLED.key} = true")
+      spark.sql(s"set ${HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
+      spark.sql(s"set ${DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
+      spark.sql(s"set ${HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT.key} = $logDataBlockFormat")
+      spark.sql(s"set ${HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key} = true")
 
       // Create a table with five data fields
       spark.sql(
@@ -279,10 +279,10 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
     withTempDir { tmp =>
       val tableName = generateTableName
       val basePath = tmp.getCanonicalPath + "/" + tableName
-      spark.sql(s"set ${MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
-      spark.sql(s"set ${ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
-      spark.sql(s"set ${LOGFILE_DATA_BLOCK_FORMAT.key} = $logDataBlockFormat")
-      spark.sql(s"set ${FILE_GROUP_READER_ENABLED.key} = true")
+      spark.sql(s"set ${HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key} = 0")
+      spark.sql(s"set ${DataSourceWriteOptions.ENABLE_MERGE_INTO_PARTIAL_UPDATES.key} = true")
+      spark.sql(s"set ${HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT.key} = $logDataBlockFormat")
+      spark.sql(s"set ${HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key} = true")
 
       // Create a table with five data fields
       spark.sql(
@@ -379,13 +379,13 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
                        expectedNumLogFile: Int,
                        changedFields: Seq[Seq[String]],
                        isPartial: Boolean): Unit = {
-    val hadoopConf = getDefaultHadoopConf
+    val hadoopConf = HoodieTestUtils.getDefaultHadoopConf
     val metaClient: HoodieTableMetaClient =
       HoodieTableMetaClient.builder.setConf(hadoopConf).setBasePath(basePath).build
     val metadataConfig = HoodieMetadataConfig.newBuilder.build
     val engineContext = new HoodieLocalEngineContext(hadoopConf)
     val viewManager: FileSystemViewManager = FileSystemViewManager.createViewManager(
-      engineContext, metadataConfig, FileSystemViewStorageConfig.newBuilder.build,
+      engineContext, FileSystemViewStorageConfig.newBuilder.build,
       HoodieCommonConfig.newBuilder.build,
       new SerializableFunctionUnchecked[HoodieTableMetaClient, HoodieTableMetadata] {
         override def apply(v1: HoodieTableMetaClient): HoodieTableMetadata = {
@@ -398,12 +398,12 @@ class TestPartialUpdateForMergeInto extends HoodieSparkSqlTestBase {
     val fileSlice: Optional[FileSlice] = fsView.getAllFileSlices("")
       .filter(new Predicate[FileSlice] {
         override def test(fileSlice: FileSlice): Boolean = {
-          getLogFileListFromFileSlice(fileSlice).size() == expectedNumLogFile
+          HoodieTestUtils.getLogFileListFromFileSlice(fileSlice).size() == expectedNumLogFile
         }
       })
       .findFirst()
     assertTrue(fileSlice.isPresent)
-    val logFilePathList: List[String] = getLogFileListFromFileSlice(fileSlice.get)
+    val logFilePathList: List[String] = HoodieTestUtils.getLogFileListFromFileSlice(fileSlice.get)
     Collections.sort(logFilePathList)
 
     val avroSchema = new TableSchemaResolver(metaClient).getTableAvroSchema
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestQueryMergeOnReadOptimizedTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestQueryMergeOnReadOptimizedTable.scala
similarity index 97%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestQueryMergeOnReadOptimizedTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestQueryMergeOnReadOptimizedTable.scala
index 16d56373442c..2f4d12bda44a 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestQueryMergeOnReadOptimizedTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestQueryMergeOnReadOptimizedTable.scala
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
+
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestQueryMergeOnReadOptimizedTable extends HoodieSparkSqlTestBase {
   test("Test Query Merge_On_Read Read_Optimized table") {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestRepairTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestRepairTable.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestRepairTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestRepairTable.scala
index 8078ed29bd7e..fccc7b61f1f5 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestRepairTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestRepairTable.scala
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD}
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.table.HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE
 import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-
 import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestRepairTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestShowPartitions.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestShowPartitions.scala
index 0704e895309d..1afb0f0974fc 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestShowPartitions.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.HoodieSparkUtils.isSpark2
 import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestShowPartitions extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTimeTravelTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTimeTravelTable.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTimeTravelTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTimeTravelTable.scala
index 73bad3be282d..9924b7003536 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTimeTravelTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTimeTravelTable.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestTimeTravelTable extends HoodieSparkSqlTestBase {
   test("Test Insert and Update Record with time travel") {
@@ -69,7 +70,7 @@ class TestTimeTravelTable extends HoodieSparkSqlTestBase {
 
   test("Test Insert Into Records with time travel To new Table") {
     if (HoodieSparkUtils.gteqSpark3_2) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         // Create Non-Partitioned table
         val tableName1 = generateTableName
         spark.sql(
@@ -138,7 +139,7 @@ class TestTimeTravelTable extends HoodieSparkSqlTestBase {
           Seq(1, "a1", 10.0, 1000, "2022-02-14"),
           Seq(2, "a2", 10.0, 1000, "2022-02-15")
         )
-      })
+      }
     }
   }
 
@@ -238,18 +239,18 @@ class TestTimeTravelTable extends HoodieSparkSqlTestBase {
   test("Test Unsupported syntax can be parsed") {
     if (HoodieSparkUtils.gteqSpark3_2) {
       checkAnswer("select 1 distribute by 1")(Seq(1))
-      withRecordType()(withTempDir { dir =>
+      withTempDir { dir =>
         val path = dir.toURI.getPath
         spark.sql(s"insert overwrite local directory '$path' using parquet select 1")
         // Requires enable hive support, so didn't test it
         // spark.sql(s"insert overwrite local directory '$path' stored as orc select 1")
-      })
+      }
     }
   }
 
   test("Test Select Record with time travel and Repartition") {
     if (HoodieSparkUtils.gteqSpark3_2) {
-      withRecordType()(withTempDir { tmp =>
+      withTempDir { tmp =>
         val tableName = generateTableName
         spark.sql(
           s"""
@@ -289,7 +290,7 @@ class TestTimeTravelTable extends HoodieSparkSqlTestBase {
           s"select id, name, price, ts from $tableName TIMESTAMP AS OF '$instant1' distribute by cast(rand() * 2 as int)")(
           Seq(1, "a1", 10.0, 1000)
         )
-      })
+      }
     }
   }
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTruncateTable.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTruncateTable.scala
index 808bfebb802c..411562c35583 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestTruncateTable.scala
@@ -16,11 +16,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestTruncateTable extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestUpdateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestUpdateTable.scala
similarity index 98%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestUpdateTable.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestUpdateTable.scala
index 0c2c34ae6d9e..5d023b8d856c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestUpdateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestUpdateTable.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hudi
+package org.apache.spark.sql.hudi.dml
 
 import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES
 import org.apache.hudi.HoodieSparkUtils.isSpark2
 import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.junit.jupiter.api.Assertions.assertEquals
 
 class TestUpdateTable extends HoodieSparkSqlTestBase {
@@ -233,8 +234,8 @@ class TestUpdateTable extends HoodieSparkSqlTestBase {
   }
 
   test("Test ignoring case for Update Table") {
-    withRecordType()(withTempDir { tmp =>
-      Seq("cow", "mor").foreach {tableType =>
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
         // create table
         spark.sql(
@@ -270,7 +271,7 @@ class TestUpdateTable extends HoodieSparkSqlTestBase {
           Seq(1, "a1", 40.0, 1000)
         )
       }
-    })
+    }
   }
 
   test("Test decimal type") {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/HoodieSparkProcedureTestBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/HoodieSparkProcedureTestBase.scala
index cff411051178..ff4f7aa6ab06 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/HoodieSparkProcedureTestBase.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/HoodieSparkProcedureTestBase.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hudi.procedure
 
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class HoodieSparkProcedureTestBase extends HoodieSparkSqlTestBase {
   override def generateTableName: String = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
index b5b13f468060..3d07286ca190 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
@@ -21,7 +21,7 @@ import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.util.CollectionUtils.createImmutableList
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{CallCommand, NamedArgument, PositionalArgument}
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 import org.apache.spark.sql.types.{DataType, DataTypes}
 
 import java.math.BigDecimal
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCopyToTempViewProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCopyToTempViewProcedure.scala
index 5cb5b68fa045..6f54dfb5094c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCopyToTempViewProcedure.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCopyToTempViewProcedure.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hudi.procedure
 
 import org.apache.hudi.HoodieSparkUtils
-import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
+import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestCopyToTempViewProcedure extends HoodieSparkSqlTestBase {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestTTLProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestTTLProcedure.scala
new file mode 100644
index 000000000000..002375ac462e
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestTTLProcedure.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.hudi.procedure
+
+import org.apache.hudi.SparkDatasetMixin
+import org.apache.hudi.client.SparkRDDWriteClient
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.model.{HoodieRecord, HoodieTableType}
+import org.apache.hudi.common.table.HoodieTableConfig
+import org.apache.hudi.common.table.timeline.HoodieTimeline
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator.{TRIP_EXAMPLE_SCHEMA, getCommitTimeAtUTC}
+import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils}
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.spark.api.java.JavaSparkContext
+
+import java.util.Properties
+import scala.collection.JavaConverters._
+
+class TestTTLProcedure extends HoodieSparkProcedureTestBase with SparkDatasetMixin {
+
+  test("Test Call run_ttl Procedure by Table") {
+    withSQLConf("hoodie.partition.ttl.automatic" -> "false") {
+      withTempDir { tmp => {
+        val tableName = generateTableName
+        val basePath = s"${tmp.getCanonicalPath}/$tableName"
+        initTable(basePath)
+
+        val writeConfig = getConfigBuilder(basePath, tableName, true).build()
+        val client = getHoodieWriteClient(writeConfig)
+        val dataGen = new HoodieTestDataGenerator(0xDEED)
+        val partitionPaths = dataGen.getPartitionPaths()
+        val partitionPath0 = partitionPaths(0)
+        val instant0 = getCommitTimeAtUTC(0)
+
+        writeRecordsForPartition(client, dataGen, partitionPath0, instant0)
+
+        val instant1 = getCommitTimeAtUTC(1000)
+        val partitionPath1 = partitionPaths(1)
+        writeRecordsForPartition(client, dataGen, partitionPath1, instant1)
+
+        val currentInstant = client.createNewInstantTime()
+        val partitionPath2 = partitionPaths(2)
+        writeRecordsForPartition(client, dataGen, partitionPath2, currentInstant)
+        spark.sql(
+          s"""
+             | create table $tableName using hudi
+             | location '$basePath'
+             | tblproperties (
+             |   primaryKey = '_row_key',
+             |   preCombineField = '_row_key',
+             |   type = 'cow'
+             | )
+             |""".stripMargin)
+
+        checkAnswer(s"call run_ttl(table => '$tableName', retain_days => 1)")(
+          Seq(partitionPath0),
+          Seq(partitionPath1)
+        )
+      }
+      }
+    }
+  }
+
+  private def writeRecordsForPartition(client: SparkRDDWriteClient[Nothing],
+                                       dataGen: HoodieTestDataGenerator,
+                                       partition: String, instantTime: String): Unit = {
+    val records: java.util.List[HoodieRecord[Nothing]] =
+      dataGen.generateInsertsForPartition(instantTime, 10, partition)
+        .asInstanceOf[java.util.List[HoodieRecord[Nothing]]]
+    // Use this JavaRDD to call the insert method
+    client.startCommitWithTime(instantTime, HoodieTimeline.COMMIT_ACTION)
+    client.insert(spark.sparkContext.parallelize(records.asScala).toJavaRDD(), instantTime)
+  }
+
+  private def getHoodieWriteClient(cfg: HoodieWriteConfig): SparkRDDWriteClient[Nothing] = {
+    val writeClient = new SparkRDDWriteClient[Nothing](
+      new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext)), cfg
+    )
+    writeClient
+  }
+
+  private def initTable(basePath: String): Unit = {
+    val props = new Properties()
+    props.put("hoodie.datasource.write.partitionpath.field", "partition_path")
+    props.put("hoodie.datasource.write.keygenerator.class", "org.apache.hudi.keygen.SimpleKeyGenerator")
+    props.put(HoodieTableConfig.PARTITION_FIELDS.key(), "partition_path")
+    props.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), "_row_key")
+    HoodieTestUtils.init(basePath, HoodieTableType.COPY_ON_WRITE, props);
+  }
+
+  protected def getConfigBuilder(basePath: String, tableName: String, autoCommit: Boolean): HoodieWriteConfig.Builder =
+    HoodieWriteConfig
+      .newBuilder
+      .withPath(basePath)
+      .withSchema(TRIP_EXAMPLE_SCHEMA)
+      .withAutoCommit(autoCommit)
+      .withPreCombineField("_row_key")
+      .forTable(tableName)
+
+}
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
index 74cb90de0209..8f31cae29bc9 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java
@@ -90,7 +90,8 @@ public class HiveSyncConfigHolder {
       .defaultValue("false")
       .markAdvanced()
       .withDocumentation("‘INT64’ with original type TIMESTAMP_MICROS is converted to hive ‘timestamp’ type. "
-          + "Disabled by default for backward compatibility.");
+          + "Disabled by default for backward compatibility. \n"
+          + "NOTE: On Spark entrypoints, this is defaulted to TRUE");
   public static final ConfigProperty<String> HIVE_TABLE_PROPERTIES = ConfigProperty
       .key("hoodie.datasource.hive_sync.table_properties")
       .noDefaultValue()
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index 4f0ff2b070b9..f99bd2d0e020 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -39,7 +39,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -383,18 +382,7 @@ private List<Partition> getTablePartitions(String tableName, List<String> writte
       return syncClient.getAllPartitions(tableName);
     }
 
-    List<String> partitionKeys = config.getSplitStrings(META_SYNC_PARTITION_FIELDS).stream()
-        .map(String::toLowerCase)
-        .collect(Collectors.toList());
-
-    List<FieldSchema> partitionFields = syncClient.getMetastoreFieldSchemas(tableName)
-        .stream()
-        .filter(f -> partitionKeys.contains(f.getName()))
-        .sorted(Comparator.comparing(f -> partitionKeys.indexOf(f.getName())))
-        .collect(Collectors.toList());
-
-    return syncClient.getPartitionsByFilter(tableName,
-        syncClient.generatePushDownFilter(writtenPartitions, partitionFields));
+    return syncClient.getPartitionsFromList(tableName, writtenPartitions);
   }
 
   /**
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
index 4865a304a301..5a88f03e1630 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
@@ -66,6 +66,7 @@
 import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
 import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
 import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
+import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS;
 import static org.apache.hudi.sync.common.util.TableUtils.tableId;
 
 /**
@@ -217,8 +218,19 @@ public List<Partition> getAllPartitions(String tableName) {
   }
 
   @Override
-  public List<Partition> getPartitionsByFilter(String tableName, String filter) {
+  public List<Partition> getPartitionsFromList(String tableName, List<String> partitions) {
+    String filter = null;
     try {
+      List<String> partitionKeys = config.getSplitStrings(META_SYNC_PARTITION_FIELDS).stream()
+          .map(String::toLowerCase)
+          .collect(Collectors.toList());
+
+      List<FieldSchema> partitionFields = this.getMetastoreFieldSchemas(tableName)
+          .stream()
+          .filter(f -> partitionKeys.contains(f.getName()))
+          .collect(Collectors.toList());
+      filter = this.generatePushDownFilter(partitions, partitionFields);
+
       return client.listPartitionsByFilter(databaseName, tableName, filter, (short)-1)
           .stream()
           .map(p -> new Partition(p.getValues(), p.getSd().getLocation()))
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java
index 362ad1453c43..e92c7aedb508 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.testutils.InProcessTimeGenerator;
@@ -203,7 +204,7 @@ private List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSch
       // Create 5 files
       String fileId = UUID.randomUUID().toString();
       Path filePath = new Path(partPath.toString() + "/" + FSUtils
-          .makeBaseFileName(commitTime, "1-0-1", fileId));
+          .makeBaseFileName(commitTime, "1-0-1", fileId, HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension()));
       generateParquetData(filePath, isParquetSchemaSimple);
       HoodieWriteStat writeStat = new HoodieWriteStat();
       writeStat.setFileId(fileId);
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
index 328e6efda082..c7ce2dd22e1f 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
@@ -36,6 +36,7 @@
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
@@ -391,7 +392,8 @@ public static void createCOWTableWithSchema(String instantTime, String schemaFil
     FileCreateUtils.createPartitionMetaFile(basePath, partitionPath);
     List<HoodieWriteStat> writeStats = new ArrayList<>();
     String fileId = UUID.randomUUID().toString();
-    Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeBaseFileName(instantTime, "1-0-1", fileId));
+    Path filePath = new Path(partPath.toString() + "/"
+        + FSUtils.makeBaseFileName(instantTime, "1-0-1", fileId, HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension()));
     Schema schema = SchemaTestUtil.getSchemaFromResource(HiveTestUtil.class, schemaFileName);
     generateParquetDataWithSchema(filePath, schema);
     HoodieWriteStat writeStat = new HoodieWriteStat();
@@ -530,7 +532,8 @@ private static List<HoodieWriteStat> createTestData(Path partPath, boolean isPar
     for (int i = 0; i < 5; i++) {
       // Create 5 files
       String fileId = UUID.randomUUID().toString();
-      Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeBaseFileName(instantTime, "1-0-1", fileId));
+      Path filePath = new Path(partPath.toString() + "/"
+          + FSUtils.makeBaseFileName(instantTime, "1-0-1", fileId, HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension()));
       generateParquetData(filePath, isParquetSchemaSimple);
       HoodieWriteStat writeStat = new HoodieWriteStat();
       writeStat.setFileId(fileId);
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
index b1acaf143961..ca0bec3604bd 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
@@ -99,11 +99,9 @@ default List<Partition> getAllPartitions(String tableName) {
   }
 
   /**
-   * Get the metadata of partitions that belong to the specified table
-   * @param tableName
-   * @return
+   * Get partitions given input list of partitions.
    */
-  default List<Partition> getPartitionsByFilter(String tableName, String filter) {
+  default List<Partition> getPartitionsFromList(String tableName, List<String> partitionList) {
     return Collections.emptyList();
   }
 
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
index aec0e484e6cd..fc3e31164acc 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
@@ -92,10 +92,7 @@ public HoodieTableMetaClient getMetaClient() {
    * Going through archive timeline is a costly operation, and it should be avoided unless some start time is given.
    */
   public Set<String> getDroppedPartitionsSince(Option<String> lastCommitTimeSynced, Option<String> lastCommitCompletionTimeSynced) {
-    HoodieTimeline timeline = lastCommitTimeSynced.isPresent()
-        ? TimelineUtils.getCommitsTimelineAfter(metaClient, lastCommitTimeSynced.get(), lastCommitCompletionTimeSynced)
-        : metaClient.getActiveTimeline();
-    return new HashSet<>(TimelineUtils.getDroppedPartitions(timeline));
+    return new HashSet<>(TimelineUtils.getDroppedPartitions(metaClient, lastCommitTimeSynced, lastCommitCompletionTimeSynced));
   }
 
   @Override
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index 9e6a504626a2..a2b3c67aa830 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -62,9 +62,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
index c72491341fe4..f17c5624084e 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
@@ -37,6 +37,7 @@
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.timeline.service.handlers.BaseFileHandler;
 import org.apache.hudi.timeline.service.handlers.FileSliceHandler;
 import org.apache.hudi.timeline.service.handlers.InstantStateHandler;
@@ -44,6 +45,7 @@
 import org.apache.hudi.timeline.service.handlers.TimelineHandler;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.module.afterburner.AfterburnerModule;
 import io.javalin.Javalin;
@@ -72,6 +74,7 @@ public class RequestHandler {
 
   private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().registerModule(new AfterburnerModule());
   private static final Logger LOG = LoggerFactory.getLogger(RequestHandler.class);
+  private static final TypeReference<List<String>> LIST_TYPE_REFERENCE = new TypeReference<List<String>>() {};
 
   private final TimelineService.Config timelineServiceConfig;
   private final FileSystemViewManager viewManager;
@@ -444,6 +447,19 @@ private void registerFileSlicesAPI() {
       writeValueAsString(ctx, success);
     }, false));
 
+    app.post(RemoteHoodieTableFileSystemView.LOAD_PARTITIONS_URL, new ViewHandler(ctx -> {
+      metricsRegistry.add("LOAD_PARTITIONS", 1);
+      String basePath = ctx.queryParamAsClass(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).getOrThrow(e -> new HoodieException("Basepath is invalid"));
+      try {
+        List<String> partitionPaths = OBJECT_MAPPER.readValue(ctx.queryParamAsClass(RemoteHoodieTableFileSystemView.PARTITIONS_PARAM, String.class)
+            .getOrThrow(e -> new HoodieException("Partitions param is invalid")), LIST_TYPE_REFERENCE);
+        boolean success = sliceHandler.loadPartitions(basePath, partitionPaths);
+        writeValueAsString(ctx, success);
+      } catch (IOException e) {
+        throw new HoodieIOException("Failed to parse request parameter", e);
+      }
+    }, false));
+
     app.post(RemoteHoodieTableFileSystemView.LOAD_ALL_PARTITIONS_URL, new ViewHandler(ctx -> {
       metricsRegistry.add("LOAD_ALL_PARTITIONS", 1);
       boolean success = sliceHandler
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
index f596ee79f9c5..c2e5dc265677 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
@@ -401,20 +401,20 @@ public static FileSystemViewManager buildFileSystemViewManager(Config config, Se
       case MEMORY:
         FileSystemViewStorageConfig.Builder inMemConfBuilder = FileSystemViewStorageConfig.newBuilder();
         inMemConfBuilder.withStorageType(FileSystemViewStorageType.MEMORY);
-        return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, inMemConfBuilder.build(), commonConfig);
+        return FileSystemViewManager.createViewManager(localEngineContext, inMemConfBuilder.build(), commonConfig);
       case SPILLABLE_DISK: {
         FileSystemViewStorageConfig.Builder spillableConfBuilder = FileSystemViewStorageConfig.newBuilder();
         spillableConfBuilder.withStorageType(FileSystemViewStorageType.SPILLABLE_DISK)
             .withBaseStoreDir(config.baseStorePathForFileGroups)
             .withMaxMemoryForView(config.maxViewMemPerTableInMB * 1024 * 1024L)
             .withMemFractionForPendingCompaction(config.memFractionForCompactionPerTable);
-        return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, spillableConfBuilder.build(), commonConfig);
+        return FileSystemViewManager.createViewManager(localEngineContext, spillableConfBuilder.build(), commonConfig);
       }
       case EMBEDDED_KV_STORE: {
         FileSystemViewStorageConfig.Builder rocksDBConfBuilder = FileSystemViewStorageConfig.newBuilder();
         rocksDBConfBuilder.withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE)
             .withRocksDBPath(config.rocksDBPath);
-        return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, rocksDBConfBuilder.build(), commonConfig);
+        return FileSystemViewManager.createViewManager(localEngineContext, rocksDBConfBuilder.build(), commonConfig);
       }
       default:
         throw new IllegalArgumentException("Invalid view manager storage type :" + config.viewStorageType);
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
index 4a4226724f8b..391145c5cf8b 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
@@ -163,4 +163,9 @@ public boolean loadAllPartitions(String basePath) {
     viewManager.getFileSystemView(basePath).loadAllPartitions();
     return true;
   }
+
+  public boolean loadPartitions(String basePath, List<String> partitionPaths) {
+    viewManager.getFileSystemView(basePath).loadPartitions(partitionPaths);
+    return true;
+  }
 }
diff --git a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/service/functional/TestRemoteHoodieTableFileSystemView.java b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/service/functional/TestRemoteHoodieTableFileSystemView.java
index c9a103e5264f..834697852822 100644
--- a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/service/functional/TestRemoteHoodieTableFileSystemView.java
+++ b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/service/functional/TestRemoteHoodieTableFileSystemView.java
@@ -19,7 +19,6 @@
 package org.apache.hudi.timeline.service.functional;
 
 import org.apache.hudi.common.config.HoodieCommonConfig;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -67,14 +66,13 @@ public class TestRemoteHoodieTableFileSystemView extends TestHoodieTableFileSyst
   protected SyncableFileSystemView getFileSystemView(HoodieTimeline timeline) {
     FileSystemViewStorageConfig sConf =
         FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
-    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
     HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().build();
     HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
 
     try {
       server = new TimelineService(localEngineContext, new Configuration(),
           TimelineService.Config.builder().serverPort(0).build(), FileSystem.get(new Configuration()),
-          FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, sConf, commonConfig));
+          FileSystemViewManager.createViewManager(localEngineContext, sConf, commonConfig));
       server.startService();
     } catch (Exception ex) {
       throw new RuntimeException(ex);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
index 5c626a53ae7e..03b6d934b5f8 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
@@ -43,6 +43,7 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 import java.util.Set;
@@ -240,7 +241,8 @@ private Option<String> doSchedule(SparkRDDWriteClient<HoodieRecordPayload> clien
     if (indexExists(partitionTypes)) {
       return Option.empty();
     }
-    Option<String> indexingInstant = client.scheduleIndexing(partitionTypes);
+
+    Option<String> indexingInstant = client.scheduleIndexing(partitionTypes, Collections.emptyList());
     if (!indexingInstant.isPresent()) {
       LOG.error("Scheduling of index action did not return any instant.");
     }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
index 4cebbf0b3cc1..06dbde8b108c 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
@@ -103,6 +103,7 @@
 import static org.apache.hudi.common.model.HoodieRecord.PARTITION_PATH_METADATA_FIELD;
 import static org.apache.hudi.common.model.HoodieRecord.RECORD_KEY_METADATA_FIELD;
 import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
 import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
 import static org.apache.hudi.hadoop.fs.CachingPath.getPathWithoutSchemeAndAuthority;
@@ -180,6 +181,8 @@ public class HoodieMetadataTableValidator implements Serializable {
 
   private final String taskLabels;
 
+  private List<Throwable> throwables = new ArrayList<>();
+
   public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
     this.jsc = jsc;
     this.cfg = cfg;
@@ -197,8 +200,30 @@ public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
     this.taskLabels = generateValidationTaskLabels();
   }
 
+  /**
+   * Returns list of Throwable which were encountered during validation. This method is useful
+   * when ignoreFailed parameter is set to true.
+   */
+  public List<Throwable> getThrowables() {
+    return throwables;
+  }
+
+  /**
+   * Returns true if there is a validation failure encountered during validation.
+   * This method is useful when ignoreFailed parameter is set to true.
+   */
+  public boolean hasValidationFailure() {
+    for (Throwable throwable : throwables) {
+      if (throwable instanceof HoodieValidationException) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private String generateValidationTaskLabels() {
     List<String> labelList = new ArrayList<>();
+    labelList.add(cfg.basePath);
     if (cfg.validateLatestBaseFiles) {
       labelList.add("validate-latest-base-files");
     }
@@ -411,10 +436,10 @@ public boolean run() {
     try {
       LOG.info(cfg.toString());
       if (cfg.continuous) {
-        LOG.info(" ****** do hoodie metadata table validation in CONTINUOUS mode ******");
+        LOG.info(" ****** do hoodie metadata table validation in CONTINUOUS mode - {} ******", taskLabels);
         doHoodieMetadataTableValidationContinuous();
       } else {
-        LOG.info(" ****** do hoodie metadata table validation once ******");
+        LOG.info(" ****** do hoodie metadata table validation once - {} ******", taskLabels);
         result = doHoodieMetadataTableValidationOnce();
       }
     } catch (Exception e) {
@@ -432,10 +457,11 @@ private boolean doHoodieMetadataTableValidationOnce() {
     try {
       return doMetadataTableValidation();
     } catch (Throwable e) {
-      LOG.error("Metadata table validation failed to HoodieValidationException", e);
+      LOG.error("Metadata table validation failed to HoodieValidationException {} {}", taskLabels, e);
       if (!cfg.ignoreFailed) {
         throw e;
       }
+      throwables.add(e);
       return false;
     }
   }
@@ -491,7 +517,7 @@ public boolean doMetadataTableValidation() {
     List<String> allPartitions = validatePartitions(engineContext, basePath);
 
     if (allPartitions.isEmpty()) {
-      LOG.warn("The result of getting all partitions is null or empty, skip current validation.");
+      LOG.warn("The result of getting all partitions is null or empty, skip current validation. {}", taskLabels);
       return true;
     }
 
@@ -500,12 +526,12 @@ public boolean doMetadataTableValidation() {
          HoodieMetadataValidationContext fsBasedContext =
              new HoodieMetadataValidationContext(engineContext, props, metaClient, false)) {
       Set<String> finalBaseFilesForCleaning = baseFilesForCleaning;
-      List<Pair<Boolean, String>> result = new ArrayList<>(
+      List<Pair<Boolean, ? extends Exception>> result = new ArrayList<>(
           engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
             try {
               validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning);
               LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", partitionPath, taskLabels));
-              return Pair.of(true, "");
+              return Pair.<Boolean, Exception>of(true, null);
             } catch (HoodieValidationException e) {
               LOG.error(
                   String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)",
@@ -513,26 +539,29 @@ public boolean doMetadataTableValidation() {
               if (!cfg.ignoreFailed) {
                 throw e;
               }
-              return Pair.of(false, e.getMessage() + " for partition: " + partitionPath);
+              return Pair.of(false, new HoodieValidationException(e.getMessage() + " for partition: " + partitionPath, e));
             }
           }).collectAsList());
 
       try {
         validateRecordIndex(engineContext, metaClient, metadataTableBasedContext.getTableMetadata());
-        result.add(Pair.of(true, ""));
+        result.add(Pair.of(true, null));
       } catch (HoodieValidationException e) {
         LOG.error(
-            "Metadata table validation failed due to HoodieValidationException in record index validation", e);
+            "Metadata table validation failed due to HoodieValidationException in record index validation for table: {} ", cfg.basePath, e);
         if (!cfg.ignoreFailed) {
           throw e;
         }
-        result.add(Pair.of(false, e.getMessage()));
+        result.add(Pair.of(false, e));
       }
 
-      for (Pair<Boolean, String> res : result) {
+      for (Pair<Boolean, ? extends Exception> res : result) {
         finalResult &= res.getKey();
         if (res.getKey().equals(false)) {
           LOG.error("Metadata Validation failed for table: " + cfg.basePath + " with error: " + res.getValue());
+          if (res.getRight() != null) {
+            throwables.add(res.getRight());
+          }
         }
       }
 
@@ -563,19 +592,19 @@ private boolean checkMetadataTableIsAvailable() {
       int finishedInstants = mdtMetaClient.getCommitsTimeline().filterCompletedInstants().countInstants();
       if (finishedInstants == 0) {
         if (metaClient.getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
-          LOG.info("There is no completed commit in both metadata table and corresponding data table.");
+          LOG.info("There is no completed commit in both metadata table and corresponding data table: {}", taskLabels);
           return false;
         } else {
-          throw new HoodieValidationException("There is no completed instant for metadata table.");
+          throw new HoodieValidationException("There is no completed instant for metadata table: " + cfg.basePath);
         }
       }
       return true;
     } catch (TableNotFoundException tbe) {
       // Suppress the TableNotFound exception if Metadata table is not available to read for now
-      LOG.warn("Metadata table is not found. Skip current validation.");
+      LOG.warn("Metadata table is not found for table: {}. Skip current validation.", cfg.basePath);
       return false;
     } catch (Exception ex) {
-      LOG.warn("Metadata table is not available to read for now, ", ex);
+      LOG.warn("Metadata table is not available to read for now for table: {}, ", cfg.basePath, ex);
       return false;
     }
   }
@@ -622,9 +651,43 @@ private List<String> validatePartitions(HoodieSparkEngineContext engineContext,
 
     if (allPartitionPathsFromFS.size() != allPartitionPathsMeta.size()
         || !allPartitionPathsFromFS.equals(allPartitionPathsMeta)) {
-      String message = "Compare Partitions Failed! " + "AllPartitionPathsFromFS : " + allPartitionPathsFromFS + " and allPartitionPathsMeta : " + allPartitionPathsMeta;
-      LOG.error(message);
-      throw new HoodieValidationException(message);
+      List<String> additionalFromFS = new ArrayList<>(allPartitionPathsFromFS);
+      additionalFromFS.remove(allPartitionPathsMeta);
+      List<String> additionalFromMDT = new ArrayList<>(allPartitionPathsMeta);
+      additionalFromMDT.remove(allPartitionPathsFromFS);
+      boolean misMatch = true;
+      List<String> actualAdditionalPartitionsInMDT = new ArrayList<>(additionalFromMDT);
+      if (additionalFromFS.isEmpty() && !additionalFromMDT.isEmpty()) {
+        // there is a chance that when we polled MDT there could have been a new completed commit which was not complete when we polled FS based
+        // listing. let's rule that out.
+        additionalFromMDT.forEach(partitionFromDMT -> {
+
+          HoodiePartitionMetadata hoodiePartitionMetadata =
+              new HoodiePartitionMetadata(metaClient.getFs(), FSUtils.getPartitionPath(basePath, partitionFromDMT));
+          Option<String> partitionCreationTimeOpt = hoodiePartitionMetadata.readPartitionCreatedCommitTime();
+          // if creation time is greater than last completed instant in active timeline, we can ignore the additional partition from MDT.
+          if (partitionCreationTimeOpt.isPresent() && !completedTimeline.containsInstant(partitionCreationTimeOpt.get())) {
+            Option<HoodieInstant> lastInstant = completedTimeline.lastInstant();
+            if (lastInstant.isPresent()
+                && HoodieTimeline.compareTimestamps(partitionCreationTimeOpt.get(), GREATER_THAN, lastInstant.get().getTimestamp())) {
+              LOG.warn("Ignoring additional partition " + partitionFromDMT + ", as it was deduced to be part of a "
+                  + "latest completed commit which was inflighht when FS based listing was polled.");
+              actualAdditionalPartitionsInMDT.remove(partitionFromDMT);
+            }
+          }
+        });
+        // if there is no additional partitions from FS listing and only additional partitions from MDT based listing is due to a new commit, we are good
+        if (actualAdditionalPartitionsInMDT.isEmpty()) {
+          misMatch = false;
+        }
+      }
+      if (misMatch) {
+        String message = "Compare Partitions Failed! " + " Additional partitions from FS, but missing from MDT : \"" + additionalFromFS
+            + "\" and additional partitions from MDT, but missing from FS listing : \"" + actualAdditionalPartitionsInMDT
+            + "\".\n All partitions from FS listing " + allPartitionPathsFromFS;
+        LOG.error(message);
+        throw new HoodieValidationException(message);
+      }
     }
 
     return allPartitionPathsMeta;
@@ -835,13 +898,13 @@ private void validateRecordIndexCount(HoodieSparkEngineContext sparkEngineContex
 
     if (countKeyFromTable != countKeyFromRecordIndex) {
       String message = String.format("Validation of record index count failed: "
-              + "%s entries from record index metadata, %s keys from the data table.",
+              + "%s entries from record index metadata, %s keys from the data table: " + cfg.basePath,
           countKeyFromRecordIndex, countKeyFromTable);
       LOG.error(message);
       throw new HoodieValidationException(message);
     } else {
       LOG.info(String.format(
-          "Validation of record index count succeeded: %s entries.", countKeyFromRecordIndex));
+          "Validation of record index count succeeded: %s entries. Table: %s", countKeyFromRecordIndex, cfg.basePath));
     }
   }
 
@@ -950,13 +1013,13 @@ private void validateRecordIndexContent(HoodieSparkEngineContext sparkEngineCont
     if (diffCount > 0) {
       String message = String.format("Validation of record index content failed: "
               + "%s keys (total %s) from the data table have wrong location in record index "
-              + "metadata. Sample mismatches: %s",
-          diffCount, countKey, String.join(";", result.getRight()));
+              + "metadata. Table: %s   Sample mismatches: %s",
+          diffCount, countKey, cfg.basePath, String.join(";", result.getRight()));
       LOG.error(message);
       throw new HoodieValidationException(message);
     } else {
       LOG.info(String.format(
-          "Validation of record index content succeeded: %s entries.", countKey));
+          "Validation of record index content succeeded: %s entries. Table: %s", countKey, cfg.basePath));
     }
   }
 
@@ -995,13 +1058,13 @@ private <T> void validate(
       List<T> infoListFromMetadataTable, List<T> infoListFromFS, String partitionPath, String label) {
     if (infoListFromMetadataTable.size() != infoListFromFS.size()
         || !infoListFromMetadataTable.equals(infoListFromFS)) {
-      String message = String.format("Validation of %s for partition %s failed."
+      String message = String.format("Validation of %s for partition %s failed for table: %s "
               + "\n%s from metadata: %s\n%s from file system and base files: %s",
-          label, partitionPath, label, infoListFromMetadataTable, label, infoListFromFS);
+          label, partitionPath, cfg.basePath, label, infoListFromMetadataTable, label, infoListFromFS);
       LOG.error(message);
       throw new HoodieValidationException(message);
     } else {
-      LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath));
+      LOG.info(String.format("Validation of %s succeeded for partition %s for table: %s", label, partitionPath, cfg.basePath));
     }
   }
 
@@ -1035,13 +1098,13 @@ private void validateFileSlices(
     }
 
     if (mismatch) {
-      String message = String.format("Validation of %s for partition %s failed."
+      String message = String.format("Validation of %s for partition %s failed for table: %s "
               + "\n%s from metadata: %s\n%s from file system and base files: %s",
-          label, partitionPath, label, fileSliceListFromMetadataTable, label, fileSliceListFromFS);
+          label, partitionPath, cfg.basePath, label, fileSliceListFromMetadataTable, label, fileSliceListFromFS);
       LOG.error(message);
       throw new HoodieValidationException(message);
     } else {
-      LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath));
+      LOG.info(String.format("Validation of %s succeeded for partition %s for table: %s ", label, partitionPath, cfg.basePath));
     }
   }
 
@@ -1217,6 +1280,7 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
             if (!cfg.ignoreFailed) {
               throw e;
             }
+            throwables.add(e);
           } catch (InterruptedException e) {
             // ignore InterruptedException here.
           }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieTTLJob.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieTTLJob.java
new file mode 100644
index 000000000000..3f11621d9d10
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieTTLJob.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieCleanConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utility class to run TTL management.
+ */
+public class HoodieTTLJob {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HoodieTTLJob.class);
+  private final Config cfg;
+  private final TypedProperties props;
+  private final JavaSparkContext jsc;
+  private HoodieTableMetaClient metaClient;
+
+  public HoodieTTLJob(JavaSparkContext jsc, Config cfg) {
+    this(jsc, cfg, UtilHelpers.buildProperties(jsc.hadoopConfiguration(), cfg.propsFilePath, cfg.configs),
+        UtilHelpers.createMetaClient(jsc, cfg.basePath, true));
+  }
+
+  public HoodieTTLJob(JavaSparkContext jsc, Config cfg, TypedProperties props, HoodieTableMetaClient metaClient) {
+    this.cfg = cfg;
+    this.jsc = jsc;
+    this.props = props;
+    this.metaClient = metaClient;
+    LOG.info("Creating TTL job with configs : " + props.toString());
+    // Disable async cleaning, will trigger synchronous cleaning manually.
+    this.props.put(HoodieCleanConfig.ASYNC_CLEAN.key(), false);
+    if (this.metaClient.getTableConfig().isMetadataTableAvailable()) {
+      // add default lock config options if MDT is enabled.
+      UtilHelpers.addLockOptions(cfg.basePath, this.props);
+    }
+  }
+
+  public void run() {
+    // need to do commit in SparkDeletePartitionCommitActionExecutor#execute
+    this.props.put(HoodieWriteConfig.AUTO_COMMIT_ENABLE.key(), "true");
+    try (SparkRDDWriteClient<HoodieRecordPayload> client =
+             UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism, Option.empty(), props)) {
+      client.managePartitionTTL(client.createNewInstantTime());
+    }
+  }
+
+  private HoodieWriteConfig getHoodieClientConfig() {
+    return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.basePath).withAutoCommit(true)
+        .withProps(props).build();
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true)
+    public String basePath = null;
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert/upsert/delete", required = false)
+    public int parallelism = 1500;
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master")
+    public String sparkMaster = null;
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
+    public String sparkMemory = null;
+
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+
+    @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for "
+        + "hoodie client for clustering")
+    public String propsFilePath = null;
+
+    @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+        + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
+        splitter = IdentitySplitter.class)
+    public List<String> configs = new ArrayList<>();
+  }
+
+  public static void main(String[] args) {
+    final HoodieTTLJob.Config cfg = new HoodieTTLJob.Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      throw new HoodieException("Failed to run ttl for " + cfg.basePath);
+    }
+
+    String dirName = new Path(cfg.basePath).getName();
+    JavaSparkContext jssc = UtilHelpers.buildSparkContext("hoodie-ttl-job-" + dirName, cfg.sparkMaster);
+
+    try {
+      new HoodieTTLJob(jssc, cfg).run();
+    } catch (Throwable throwable) {
+      throw new HoodieException("Failed to run ttl for " + cfg.basePath, throwable);
+    } finally {
+      jssc.stop();
+    }
+
+    LOG.info("Hoodie TTL job ran successfully");
+  }
+
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
index e2c23b151532..fdcb806b434d 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
@@ -102,7 +102,7 @@ public String sendRequest(String driverHost, int port) {
     try (CloseableHttpClient client = HttpClientBuilder.create().build()) {
 
       System.out.println("Sleeping for " + cfg.delaySecs + " secs ");
-      Thread.sleep(cfg.delaySecs * 1000);
+      Thread.sleep(cfg.delaySecs * 1000L);
       System.out.println("Woke up after sleeping for " + cfg.delaySecs + " secs ");
 
       HttpGet request = new HttpGet(url);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
index 18e92a8463ce..9dcecfa0bcb3 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
@@ -66,6 +66,7 @@
 import org.apache.hudi.utilities.sources.Source;
 import org.apache.hudi.utilities.sources.processor.ChainedJsonKafkaSourcePostProcessor;
 import org.apache.hudi.utilities.sources.processor.JsonKafkaSourcePostProcessor;
+import org.apache.hudi.utilities.streamer.StreamContext;
 import org.apache.hudi.utilities.transform.ChainedTransformer;
 import org.apache.hudi.utilities.transform.ErrorTableAwareChainedTransformer;
 import org.apache.hudi.utilities.transform.Transformer;
@@ -156,6 +157,24 @@ public static Source createSource(String sourceClass, TypedProperties cfg, JavaS
     }
   }
 
+  public static Source createSource(String sourceClass, TypedProperties cfg, JavaSparkContext jssc,
+                                    SparkSession sparkSession, HoodieIngestionMetrics metrics, StreamContext streamContext)
+      throws IOException {
+    try {
+      try {
+        return (Source) ReflectionUtils.loadClass(sourceClass,
+            new Class<?>[] {TypedProperties.class, JavaSparkContext.class,
+                SparkSession.class,
+                HoodieIngestionMetrics.class, StreamContext.class},
+            cfg, jssc, sparkSession, metrics, streamContext);
+      } catch (HoodieException e) {
+        return createSource(sourceClass, cfg, jssc, sparkSession, streamContext.getSchemaProvider(), metrics);
+      }
+    } catch (Throwable e) {
+      throw new IOException("Could not load source class " + sourceClass, e);
+    }
+  }
+
   public static JsonKafkaSourcePostProcessor createJsonKafkaSourcePostProcessor(String postProcessorClassNames, TypedProperties props) throws IOException {
     if (StringUtils.isNullOrEmpty(postProcessorClassNames)) {
       return null;
@@ -608,6 +627,7 @@ public static int retry(int maxRetryCount, CheckedSupplier<Integer> supplier, St
       } while (ret != 0 && maxRetryCount-- > 0);
     } catch (Throwable t) {
       LOG.error(errorMessage, t);
+      throw new RuntimeException("Failed in retry", t);
     }
     return ret;
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/CloudSourceConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/CloudSourceConfig.java
index 54be9cabef92..e3bdca1a3957 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/CloudSourceConfig.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/CloudSourceConfig.java
@@ -85,14 +85,14 @@ public class CloudSourceConfig extends HoodieConfig {
       .noDefaultValue()
       .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.cloud.data.select.relpath.prefix")
       .markAdvanced()
-      .withDocumentation("Only selects objects in the bucket whose relative path matches this prefix");
+      .withDocumentation("Only selects objects in the bucket whose relative path starts with this prefix");
 
   public static final ConfigProperty<String> IGNORE_RELATIVE_PATH_PREFIX = ConfigProperty
       .key(STREAMER_CONFIG_PREFIX + "source.cloud.data.ignore.relpath.prefix")
       .noDefaultValue()
       .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.cloud.data.ignore.relpath.prefix")
       .markAdvanced()
-      .withDocumentation("Ignore objects in the bucket whose relative path matches this prefix");
+      .withDocumentation("Ignore objects in the bucket whose relative path starts this prefix");
 
   public static final ConfigProperty<String> IGNORE_RELATIVE_PATH_SUBSTR = ConfigProperty
       .key(STREAMER_CONFIG_PREFIX + "source.cloud.data.ignore.relpath.substring")
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java
index b3b64cff905b..e50e7fa06124 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java
@@ -132,4 +132,11 @@ public class HoodieStreamerConfig extends HoodieConfig {
       .sinceVersion("0.14.0")
       .withDocumentation("Number of records to sample from the first write. To improve the estimation's accuracy, "
           + "for smaller or more compressable record size, set the sample size bigger. For bigger or less compressable record size, set smaller.");
+
+  public static final ConfigProperty<Boolean> ROW_THROW_EXPLICIT_EXCEPTIONS = ConfigProperty
+      .key(STREAMER_CONFIG_PREFIX + "row.throw.explicit.exceptions")
+      .defaultValue(false)
+      .markAdvanced()
+      .sinceVersion("0.15.0")
+      .withDocumentation("When enabled, the dataframe generated from reading source data is wrapped with an exception handler to explicitly surface exceptions.");
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/S3EventsHoodieIncrSourceConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/S3EventsHoodieIncrSourceConfig.java
index 3db572b1f84f..23ecb96d7956 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/S3EventsHoodieIncrSourceConfig.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/S3EventsHoodieIncrSourceConfig.java
@@ -47,6 +47,8 @@ public class S3EventsHoodieIncrSourceConfig extends HoodieConfig {
       .markAdvanced()
       .withDocumentation("Control whether we do existence check for files before consuming them");
 
+  @Deprecated
+  // Use {@link CloudSourceConfig.SELECT_RELATIVE_PATH_PREFIX}
   public static final ConfigProperty<String> S3_KEY_PREFIX = ConfigProperty
       .key(STREAMER_CONFIG_PREFIX + "source.s3incr.key.prefix")
       .noDefaultValue()
@@ -61,6 +63,8 @@ public class S3EventsHoodieIncrSourceConfig extends HoodieConfig {
       .markAdvanced()
       .withDocumentation("The file system prefix.");
 
+  @Deprecated
+  // Use {@link CloudSourceConfig.IGNORE_RELATIVE_PATH_PREFIX}
   public static final ConfigProperty<String> S3_IGNORE_KEY_PREFIX = ConfigProperty
       .key(STREAMER_CONFIG_PREFIX + "source.s3incr.ignore.key.prefix")
       .noDefaultValue()
@@ -68,6 +72,8 @@ public class S3EventsHoodieIncrSourceConfig extends HoodieConfig {
       .markAdvanced()
       .withDocumentation("Control whether to ignore the s3 objects starting with this prefix");
 
+  @Deprecated
+  // Use {@link CloudSourceConfig.IGNORE_RELATIVE_PATH_SUBSTR}
   public static final ConfigProperty<String> S3_IGNORE_KEY_SUBSTRING = ConfigProperty
       .key(STREAMER_CONFIG_PREFIX + "source.s3incr.ignore.key.substring")
       .noDefaultValue()
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index c794db32510e..4002d1579bb7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -22,7 +22,9 @@
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
 import org.apache.hudi.utilities.streamer.HoodieStreamer;
 import org.apache.hudi.utilities.streamer.StreamSync;
 
@@ -49,6 +51,6 @@ public DeltaSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaPro
   public DeltaSync(HoodieDeltaStreamer.Config cfg, SparkSession sparkSession, SchemaProvider schemaProvider,
                    TypedProperties props, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf,
                    Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient) throws IOException {
-    super(cfg, sparkSession, schemaProvider, props, hoodieSparkContext, fs, conf, onInitializingHoodieWriteClient);
+    super(cfg, sparkSession, props, hoodieSparkContext, fs, conf, onInitializingHoodieWriteClient, new DefaultStreamContext(schemaProvider, Option.empty()));
   }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/KafkaOffsetPostProcessor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/KafkaOffsetPostProcessor.java
index 500bb0c7f99f..294838a435fa 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/KafkaOffsetPostProcessor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/KafkaOffsetPostProcessor.java
@@ -18,18 +18,18 @@
 
 package org.apache.hudi.utilities.schema;
 
-import org.apache.avro.JsonProperties;
 import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.internal.schema.HoodieSchemaException;
 import org.apache.hudi.utilities.config.HoodieStreamerConfig;
 
+import org.apache.avro.JsonProperties;
 import org.apache.avro.Schema;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.avro.AvroSchemaUtils.createNullableSchema;
@@ -51,8 +51,6 @@ public static boolean shouldAddOffsets(TypedProperties props) {
     }
   }
 
-  private static final Logger LOG = LoggerFactory.getLogger(KafkaOffsetPostProcessor.class);
-
   public static final String KAFKA_SOURCE_OFFSET_COLUMN = "_hoodie_kafka_source_offset";
   public static final String KAFKA_SOURCE_PARTITION_COLUMN = "_hoodie_kafka_source_partition";
   public static final String KAFKA_SOURCE_TIMESTAMP_COLUMN = "_hoodie_kafka_source_timestamp";
@@ -65,16 +63,29 @@ public KafkaOffsetPostProcessor(TypedProperties props, JavaSparkContext jssc) {
   @Override
   public Schema processSchema(Schema schema) {
     // this method adds kafka offset fields namely source offset, partition, timestamp and kafka message key to the schema of the batch.
+    List<Schema.Field> fieldList = schema.getFields();
+    Set<String> fieldNames = fieldList.stream().map(Schema.Field::name).collect(Collectors.toSet());
+    // if the source schema already contains the kafka offset fields, then return the schema as is.
+    if (fieldNames.containsAll(Arrays.asList(KAFKA_SOURCE_OFFSET_COLUMN, KAFKA_SOURCE_PARTITION_COLUMN, KAFKA_SOURCE_TIMESTAMP_COLUMN, KAFKA_SOURCE_KEY_COLUMN))) {
+      return schema;
+    }
     try {
-      List<Schema.Field> fieldList = schema.getFields();
       List<Schema.Field> newFieldList = fieldList.stream()
           .map(f -> new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal())).collect(Collectors.toList());
-      newFieldList.add(new Schema.Field(KAFKA_SOURCE_OFFSET_COLUMN, Schema.create(Schema.Type.LONG), "offset column", 0));
-      newFieldList.add(new Schema.Field(KAFKA_SOURCE_PARTITION_COLUMN, Schema.create(Schema.Type.INT), "partition column", 0));
-      newFieldList.add(new Schema.Field(KAFKA_SOURCE_TIMESTAMP_COLUMN, Schema.create(Schema.Type.LONG), "timestamp column", 0));
-      newFieldList.add(new Schema.Field(KAFKA_SOURCE_KEY_COLUMN, createNullableSchema(Schema.Type.STRING), "kafka key column", JsonProperties.NULL_VALUE));
-      Schema newSchema = Schema.createRecord(schema.getName() + "_processed", schema.getDoc(), schema.getNamespace(), false, newFieldList);
-      return newSchema;
+      // handle case where source schema provider may have already set 1 or more of these fields
+      if (!fieldNames.contains(KAFKA_SOURCE_OFFSET_COLUMN)) {
+        newFieldList.add(new Schema.Field(KAFKA_SOURCE_OFFSET_COLUMN, Schema.create(Schema.Type.LONG), "offset column", 0));
+      }
+      if (!fieldNames.contains(KAFKA_SOURCE_PARTITION_COLUMN)) {
+        newFieldList.add(new Schema.Field(KAFKA_SOURCE_PARTITION_COLUMN, Schema.create(Schema.Type.INT), "partition column", 0));
+      }
+      if (!fieldNames.contains(KAFKA_SOURCE_TIMESTAMP_COLUMN)) {
+        newFieldList.add(new Schema.Field(KAFKA_SOURCE_TIMESTAMP_COLUMN, Schema.create(Schema.Type.LONG), "timestamp column", 0));
+      }
+      if (!fieldNames.contains(KAFKA_SOURCE_KEY_COLUMN)) {
+        newFieldList.add(new Schema.Field(KAFKA_SOURCE_KEY_COLUMN, createNullableSchema(Schema.Type.STRING), "kafka key column", JsonProperties.NULL_VALUE));
+      }
+      return Schema.createRecord(schema.getName() + "_processed", schema.getDoc(), schema.getNamespace(), false, newFieldList);
     } catch (Exception e) {
       throw new HoodieSchemaException("Kafka offset post processor failed with schema: " + schema, e);
     }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
index 2bf92280faf5..36c83d630300 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
@@ -27,6 +27,8 @@
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.AvroConvertor;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
+import org.apache.hudi.utilities.streamer.StreamContext;
 
 import org.apache.avro.generic.GenericRecord;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
@@ -69,10 +71,13 @@ public class AvroKafkaSource extends KafkaSource<GenericRecord> {
 
   public AvroKafkaSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
                          SchemaProvider schemaProvider, HoodieIngestionMetrics metrics) {
-    super(props, sparkContext, sparkSession,
-        UtilHelpers.getSchemaProviderForKafkaSource(schemaProvider, props, sparkContext),
-        SourceType.AVRO, metrics);
-    this.originalSchemaProvider = schemaProvider;
+    this(props, sparkContext, sparkSession, metrics, new DefaultStreamContext(schemaProvider, Option.empty()));
+  }
+
+  public AvroKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession, HoodieIngestionMetrics metrics, StreamContext streamContext) {
+    super(properties, sparkContext, sparkSession, SourceType.AVRO, metrics,
+        new DefaultStreamContext(UtilHelpers.getSchemaProviderForKafkaSource(streamContext.getSchemaProvider(), properties, sparkContext), streamContext.getSourceProfileSupplier()));
+    this.originalSchemaProvider = streamContext.getSchemaProvider();
 
     props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class.getName());
     deserializerClassName = getStringWithAltKeys(props, KAFKA_AVRO_VALUE_DESERIALIZER_CLASS, true);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/GcsEventsHoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/GcsEventsHoodieIncrSource.java
index a06130d39728..079507429093 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/GcsEventsHoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/GcsEventsHoodieIncrSource.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.utilities.sources.helpers.CloudDataFetcher;
 import org.apache.hudi.utilities.sources.helpers.CloudObjectIncrCheckpoint;
 import org.apache.hudi.utilities.sources.helpers.CloudObjectMetadata;
+import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
 import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
 import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.MissingCheckpointStrategy;
 import org.apache.hudi.utilities.sources.helpers.QueryInfo;
@@ -48,11 +49,9 @@
 import static org.apache.hudi.common.util.ConfigUtils.getIntWithAltKeys;
 import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
 import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.DATAFILE_FORMAT;
 import static org.apache.hudi.utilities.config.CloudSourceConfig.ENABLE_EXISTS_CHECK;
 import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.HOODIE_SRC_BASE_PATH;
 import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.NUM_INSTANTS_PER_FETCH;
-import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.SOURCE_FILE_FORMAT;
 import static org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.generateQueryInfo;
 import static org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.getHollowCommitHandleMode;
 import static org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.getMissingCheckpointStrategy;
@@ -116,18 +115,14 @@ public class GcsEventsHoodieIncrSource extends HoodieIncrSource {
   private final Option<SchemaProvider> schemaProvider;
   private final Option<SnapshotLoadQuerySplitter> snapshotLoadQuerySplitter;
 
-
-  public static final String GCS_OBJECT_KEY = "name";
-  public static final String GCS_OBJECT_SIZE = "size";
-
   private static final Logger LOG = LoggerFactory.getLogger(GcsEventsHoodieIncrSource.class);
 
   public GcsEventsHoodieIncrSource(TypedProperties props, JavaSparkContext jsc, SparkSession spark,
                                    SchemaProvider schemaProvider) {
 
     this(props, jsc, spark, schemaProvider,
-        new GcsObjectMetadataFetcher(props, getSourceFileFormat(props)),
-        new CloudDataFetcher(props, getStringWithAltKeys(props, DATAFILE_FORMAT, true)),
+        new GcsObjectMetadataFetcher(props),
+        new CloudDataFetcher(props),
         new QueryRunner(spark, props)
     );
   }
@@ -163,7 +158,8 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastChec
         sparkContext, srcPath, numInstantsPerFetch,
         Option.of(cloudObjectIncrCheckpoint.getCommit()),
         missingCheckpointStrategy, handlingMode, HoodieRecord.COMMIT_TIME_METADATA_FIELD,
-        GCS_OBJECT_KEY, GCS_OBJECT_SIZE, true,
+        CloudObjectsSelectorCommon.GCS_OBJECT_KEY,
+        CloudObjectsSelectorCommon.GCS_OBJECT_SIZE, true,
         Option.ofNullable(cloudObjectIncrCheckpoint.getKey()));
     LOG.info("Querying GCS with:" + cloudObjectIncrCheckpoint + " and queryInfo:" + queryInfo);
 
@@ -196,9 +192,4 @@ private Pair<Option<Dataset<Row>>, String> extractData(QueryInfo queryInfo, Data
     Option<Dataset<Row>> fileDataRows = gcsObjectDataFetcher.getCloudObjectDataDF(sparkSession, cloudObjectMetadata, props, schemaProvider);
     return Pair.of(fileDataRows, queryInfo.getEndInstant());
   }
-
-  private static String getSourceFileFormat(TypedProperties props) {
-    return getStringWithAltKeys(props, SOURCE_FILE_FORMAT, true);
-  }
-
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
index eb67abfee3a6..c8c3b3421c6f 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.utilities.sources;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.config.JsonKafkaPostProcessorConfig;
@@ -27,6 +28,8 @@
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
 import org.apache.hudi.utilities.sources.processor.JsonKafkaSourcePostProcessor;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
+import org.apache.hudi.utilities.streamer.StreamContext;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -44,10 +47,10 @@
 import java.util.List;
 
 import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
+import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_KEY_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_OFFSET_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_PARTITION_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_TIMESTAMP_COLUMN;
-import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_KEY_COLUMN;
 
 /**
  * Read json kafka data.
@@ -56,9 +59,12 @@ public class JsonKafkaSource extends KafkaSource<String> {
 
   public JsonKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession,
                          SchemaProvider schemaProvider, HoodieIngestionMetrics metrics) {
-    super(properties, sparkContext, sparkSession,
-        UtilHelpers.getSchemaProviderForKafkaSource(schemaProvider, properties, sparkContext),
-        SourceType.JSON, metrics);
+    this(properties, sparkContext, sparkSession, metrics, new DefaultStreamContext(schemaProvider, Option.empty()));
+  }
+
+  public JsonKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession, HoodieIngestionMetrics metrics, StreamContext streamContext) {
+    super(properties, sparkContext, sparkSession, SourceType.JSON, metrics,
+        new DefaultStreamContext(UtilHelpers.getSchemaProviderForKafkaSource(streamContext.getSchemaProvider(), properties, sparkContext), streamContext.getSourceProfileSupplier()));
     properties.put("key.deserializer", StringDeserializer.class.getName());
     properties.put("value.deserializer", StringDeserializer.class.getName());
     this.offsetGen = new KafkaOffsetGen(props);
@@ -87,7 +93,9 @@ protected  JavaRDD<String> maybeAppendKafkaOffsets(JavaRDD<ConsumerRecord<Object
             jsonNode.put(KAFKA_SOURCE_OFFSET_COLUMN, consumerRecord.offset());
             jsonNode.put(KAFKA_SOURCE_PARTITION_COLUMN, consumerRecord.partition());
             jsonNode.put(KAFKA_SOURCE_TIMESTAMP_COLUMN, consumerRecord.timestamp());
-            jsonNode.put(KAFKA_SOURCE_KEY_COLUMN, recordKey);
+            if (recordKey != null) {
+              jsonNode.put(KAFKA_SOURCE_KEY_COLUMN, recordKey);
+            }
             stringList.add(om.writeValueAsString(jsonNode));
           } catch (Throwable e) {
             stringList.add(recordValue);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/KafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/KafkaSource.java
index bb26d5795823..52a6a1217ccb 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/KafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/KafkaSource.java
@@ -26,6 +26,8 @@
 import org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
+import org.apache.hudi.utilities.streamer.SourceProfile;
+import org.apache.hudi.utilities.streamer.StreamContext;
 
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -50,9 +52,9 @@ abstract class KafkaSource<T> extends Source<JavaRDD<T>> {
   protected final boolean shouldAddOffsets;
 
   protected KafkaSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
-                        SchemaProvider schemaProvider, SourceType sourceType, HoodieIngestionMetrics metrics) {
-    super(props, sparkContext, sparkSession, schemaProvider, sourceType);
-    this.schemaProvider = schemaProvider;
+                        SourceType sourceType, HoodieIngestionMetrics metrics, StreamContext streamContext) {
+    super(props, sparkContext, sparkSession, sourceType, streamContext);
+    this.schemaProvider = streamContext.getSchemaProvider();
     this.metrics = metrics;
     this.shouldAddOffsets = KafkaOffsetPostProcessor.Config.shouldAddOffsets(props);
   }
@@ -60,21 +62,34 @@ protected KafkaSource(TypedProperties props, JavaSparkContext sparkContext, Spar
   @Override
   protected InputBatch<JavaRDD<T>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
     try {
-      OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
-      long totalNewMsgs = KafkaOffsetGen.CheckpointUtils.totalNewMessages(offsetRanges);
-      LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
-      if (totalNewMsgs <= 0) {
-        metrics.updateStreamerSourceNewMessageCount(METRIC_NAME_KAFKA_MESSAGE_IN_COUNT, 0);
-        return new InputBatch<>(Option.empty(), KafkaOffsetGen.CheckpointUtils.offsetsToStr(offsetRanges));
+      OffsetRange[] offsetRanges;
+      if (sourceProfileSupplier.isPresent() && sourceProfileSupplier.get().getSourceProfile() != null) {
+        SourceProfile<Long> kafkaSourceProfile = sourceProfileSupplier.get().getSourceProfile();
+        offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, kafkaSourceProfile.getSourceSpecificContext(), kafkaSourceProfile.getSourcePartitions(), metrics);
+        LOG.info("About to read numEvents {} of size {} bytes in {} partitions from Kafka for topic {} with offsetRanges {}",
+            kafkaSourceProfile.getSourceSpecificContext(), kafkaSourceProfile.getMaxSourceBytes(),
+            kafkaSourceProfile.getSourcePartitions(), offsetGen.getTopicName(), offsetRanges);
+      } else {
+        offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
       }
-      metrics.updateStreamerSourceNewMessageCount(METRIC_NAME_KAFKA_MESSAGE_IN_COUNT, totalNewMsgs);
-      JavaRDD<T> newDataRDD = toRDD(offsetRanges);
-      return new InputBatch<>(Option.of(newDataRDD), KafkaOffsetGen.CheckpointUtils.offsetsToStr(offsetRanges));
+      return toInputBatch(offsetRanges);
     } catch (org.apache.kafka.common.errors.TimeoutException e) {
       throw new HoodieSourceTimeoutException("Kafka Source timed out " + e.getMessage());
     }
   }
 
+  private InputBatch<JavaRDD<T>> toInputBatch(OffsetRange[] offsetRanges) {
+    long totalNewMsgs = KafkaOffsetGen.CheckpointUtils.totalNewMessages(offsetRanges);
+    LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
+    if (totalNewMsgs <= 0) {
+      metrics.updateStreamerSourceNewMessageCount(METRIC_NAME_KAFKA_MESSAGE_IN_COUNT, 0);
+      return new InputBatch<>(Option.empty(), KafkaOffsetGen.CheckpointUtils.offsetsToStr(offsetRanges));
+    }
+    metrics.updateStreamerSourceNewMessageCount(METRIC_NAME_KAFKA_MESSAGE_IN_COUNT, totalNewMsgs);
+    JavaRDD<T> newDataRDD = toRDD(offsetRanges);
+    return new InputBatch<>(Option.of(newDataRDD), KafkaOffsetGen.CheckpointUtils.offsetsToStr(offsetRanges));
+  }
+
   abstract JavaRDD<T> toRDD(OffsetRange[] offsetRanges);
 
   @Override
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ProtoKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ProtoKafkaSource.java
index 67927480454b..d7a15b3932cf 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ProtoKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ProtoKafkaSource.java
@@ -19,12 +19,16 @@
 package org.apache.hudi.utilities.sources;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.config.ProtoClassBasedSchemaProviderConfig;
 import org.apache.hudi.utilities.exception.HoodieReadFromSourceException;
 import org.apache.hudi.utilities.ingestion.HoodieIngestionMetrics;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
+import org.apache.hudi.utilities.streamer.StreamContext;
 
 import com.google.protobuf.Message;
 import org.apache.kafka.common.serialization.ByteArrayDeserializer;
@@ -51,9 +55,14 @@ public class ProtoKafkaSource extends KafkaSource<Message> {
 
   private final String className;
 
-  public ProtoKafkaSource(TypedProperties props, JavaSparkContext sparkContext,
-                          SparkSession sparkSession, SchemaProvider schemaProvider, HoodieIngestionMetrics metrics) {
-    super(props, sparkContext, sparkSession, schemaProvider, SourceType.PROTO, metrics);
+  public ProtoKafkaSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
+                          SchemaProvider schemaProvider, HoodieIngestionMetrics metrics) {
+    this(props, sparkContext, sparkSession, metrics, new DefaultStreamContext(schemaProvider, Option.empty()));
+  }
+
+  public ProtoKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession, HoodieIngestionMetrics metrics, StreamContext streamContext) {
+    super(properties, sparkContext, sparkSession, SourceType.PROTO, metrics,
+        new DefaultStreamContext(UtilHelpers.getSchemaProviderForKafkaSource(streamContext.getSchemaProvider(), properties, sparkContext), streamContext.getSourceProfileSupplier()));
     checkRequiredConfigProperties(props, Collections.singletonList(
         ProtoClassBasedSchemaProviderConfig.PROTO_SCHEMA_CLASS_NAME));
     props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
index f2cc48f280c0..1c7e9d990988 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
@@ -18,10 +18,13 @@
 
 package org.apache.hudi.utilities.sources;
 
+import org.apache.hudi.HoodieSparkUtils;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ConfigUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.utilities.UtilHelpers;
+import org.apache.hudi.utilities.exception.HoodieReadFromSourceException;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 
 import org.apache.hudi.utilities.sources.helpers.SanitizationUtils;
@@ -30,6 +33,8 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
+import static org.apache.hudi.utilities.config.HoodieStreamerConfig.ROW_THROW_EXPLICIT_EXCEPTIONS;
+
 public abstract class RowSource extends Source<Dataset<Row>> {
 
   public RowSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
@@ -46,7 +51,9 @@ protected final InputBatch<Dataset<Row>> fetchNewData(Option<String> lastCkptStr
       Dataset<Row> sanitizedRows = SanitizationUtils.sanitizeColumnNamesForAvro(dsr, props);
       SchemaProvider rowSchemaProvider =
           UtilHelpers.createRowBasedSchemaProvider(sanitizedRows.schema(), props, sparkContext);
-      return new InputBatch<>(Option.of(sanitizedRows), res.getValue(), rowSchemaProvider);
+      Dataset<Row> wrappedDf = HoodieSparkUtils.maybeWrapDataFrameWithException(sanitizedRows, HoodieReadFromSourceException.class.getName(),
+          "Failed to read from row source", ConfigUtils.getBooleanWithAltKeys(props, ROW_THROW_EXPLICIT_EXCEPTIONS));
+      return new InputBatch<>(Option.of(wrappedDf), res.getValue(), rowSchemaProvider);
     }).orElseGet(() -> new InputBatch<>(res.getKey(), res.getValue()));
   }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
index 4cbec4d22121..84b267709ad7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
@@ -23,14 +23,13 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.timeline.TimelineUtils.HollowCommitHandling;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.utilities.config.CloudSourceConfig;
 import org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.CloudDataFetcher;
 import org.apache.hudi.utilities.sources.helpers.CloudObjectIncrCheckpoint;
 import org.apache.hudi.utilities.sources.helpers.CloudObjectMetadata;
+import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
 import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
 import org.apache.hudi.utilities.sources.helpers.QueryInfo;
 import org.apache.hudi.utilities.sources.helpers.QueryRunner;
@@ -51,17 +50,11 @@
 import static org.apache.hudi.common.util.ConfigUtils.getIntWithAltKeys;
 import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
 import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.CLOUD_DATAFILE_EXTENSION;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.DATAFILE_FORMAT;
 import static org.apache.hudi.utilities.config.CloudSourceConfig.ENABLE_EXISTS_CHECK;
 import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.HOODIE_SRC_BASE_PATH;
 import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.NUM_INSTANTS_PER_FETCH;
-import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.SOURCE_FILE_FORMAT;
 import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_FS_PREFIX;
-import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_PREFIX;
-import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_SUBSTRING;
 import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_INCR_ENABLE_EXISTS_CHECK;
-import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_KEY_PREFIX;
 import static org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon.getCloudObjectMetadataPerPartition;
 import static org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.getHollowCommitHandleMode;
 import static org.apache.hudi.utilities.sources.helpers.IncrSourceHelper.getMissingCheckpointStrategy;
@@ -72,11 +65,9 @@
 public class S3EventsHoodieIncrSource extends HoodieIncrSource {
 
   private static final Logger LOG = LoggerFactory.getLogger(S3EventsHoodieIncrSource.class);
-  private static final String EMPTY_STRING = "";
   private final String srcPath;
   private final int numInstantsPerFetch;
   private final boolean checkIfFileExists;
-  private final String fileFormat;
   private final IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy;
   private final QueryRunner queryRunner;
   private final CloudDataFetcher cloudDataFetcher;
@@ -92,18 +83,9 @@ public static class Config {
     @Deprecated
     static final Boolean DEFAULT_ENABLE_EXISTS_CHECK = S3_INCR_ENABLE_EXISTS_CHECK.defaultValue();
 
-    // control whether to filter the s3 objects starting with this prefix
-    @Deprecated
-    static final String S3_KEY_PREFIX = S3EventsHoodieIncrSourceConfig.S3_KEY_PREFIX.key();
     @Deprecated
     static final String S3_FS_PREFIX = S3EventsHoodieIncrSourceConfig.S3_FS_PREFIX.key();
 
-    // control whether to ignore the s3 objects starting with this prefix
-    @Deprecated
-    static final String S3_IGNORE_KEY_PREFIX = S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_PREFIX.key();
-    // control whether to ignore the s3 objects with this substring
-    @Deprecated
-    static final String S3_IGNORE_KEY_SUBSTRING = S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_SUBSTRING.key();
     /**
      * {@link #SPARK_DATASOURCE_OPTIONS} is json string, passed to the reader while loading dataset.
      * Example Hudi Streamer conf
@@ -113,17 +95,13 @@ public static class Config {
     public static final String SPARK_DATASOURCE_OPTIONS = S3EventsHoodieIncrSourceConfig.SPARK_DATASOURCE_OPTIONS.key();
   }
 
-  public static final String S3_OBJECT_KEY = "s3.object.key";
-  public static final String S3_OBJECT_SIZE = "s3.object.size";
-  public static final String S3_BUCKET_NAME = "s3.bucket.name";
-
   public S3EventsHoodieIncrSource(
       TypedProperties props,
       JavaSparkContext sparkContext,
       SparkSession sparkSession,
       SchemaProvider schemaProvider) {
     this(props, sparkContext, sparkSession, schemaProvider, new QueryRunner(sparkSession, props),
-        new CloudDataFetcher(props, getStringWithAltKeys(props, CloudSourceConfig.DATAFILE_FORMAT, true)));
+        new CloudDataFetcher(props));
   }
 
   public S3EventsHoodieIncrSource(
@@ -138,13 +116,6 @@ public S3EventsHoodieIncrSource(
     this.srcPath = getStringWithAltKeys(props, HOODIE_SRC_BASE_PATH);
     this.numInstantsPerFetch = getIntWithAltKeys(props, NUM_INSTANTS_PER_FETCH);
     this.checkIfFileExists = getBooleanWithAltKeys(props, ENABLE_EXISTS_CHECK);
-
-    // This is to ensure backward compatibility where we were using the
-    // config SOURCE_FILE_FORMAT for file format in previous versions.
-    this.fileFormat = StringUtils.isNullOrEmpty(getStringWithAltKeys(props, DATAFILE_FORMAT, EMPTY_STRING))
-        ? getStringWithAltKeys(props, SOURCE_FILE_FORMAT, true)
-        : getStringWithAltKeys(props, DATAFILE_FORMAT, EMPTY_STRING);
-
     this.missingCheckpointStrategy = getMissingCheckpointStrategy(props);
     this.queryRunner = queryRunner;
     this.cloudDataFetcher = cloudDataFetcher;
@@ -162,7 +133,8 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastChec
             Option.of(cloudObjectIncrCheckpoint.getCommit()),
             missingCheckpointStrategy, handlingMode,
             HoodieRecord.COMMIT_TIME_METADATA_FIELD,
-            S3_OBJECT_KEY, S3_OBJECT_SIZE, true,
+            CloudObjectsSelectorCommon.S3_OBJECT_KEY,
+            CloudObjectsSelectorCommon.S3_OBJECT_SIZE, true,
             Option.ofNullable(cloudObjectIncrCheckpoint.getKey()));
     LOG.info("Querying S3 with:" + cloudObjectIncrCheckpoint + ", queryInfo:" + queryInfo);
 
@@ -172,7 +144,8 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastChec
     }
     Pair<QueryInfo, Dataset<Row>> queryInfoDatasetPair = queryRunner.run(queryInfo, snapshotLoadQuerySplitter);
     queryInfo = queryInfoDatasetPair.getLeft();
-    Dataset<Row> filteredSourceData = applyFilter(queryInfoDatasetPair.getRight(), fileFormat);
+    Dataset<Row> filteredSourceData = queryInfoDatasetPair.getRight().filter(
+        CloudObjectsSelectorCommon.generateFilter(CloudObjectsSelectorCommon.Type.S3, props));
 
     LOG.info("Adjusting end checkpoint:" + queryInfo.getEndInstant() + " based on sourceLimit :" + sourceLimit);
     Pair<CloudObjectIncrCheckpoint, Option<Dataset<Row>>> checkPointAndDataset =
@@ -190,7 +163,9 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastChec
     // Create S3 paths
     SerializableConfiguration serializableHadoopConf = new SerializableConfiguration(sparkContext.hadoopConfiguration());
     List<CloudObjectMetadata> cloudObjectMetadata = checkPointAndDataset.getRight().get()
-        .select(S3_BUCKET_NAME, S3_OBJECT_KEY, S3_OBJECT_SIZE)
+        .select(CloudObjectsSelectorCommon.S3_BUCKET_NAME,
+                CloudObjectsSelectorCommon.S3_OBJECT_KEY,
+                CloudObjectsSelectorCommon.S3_OBJECT_SIZE)
         .distinct()
         .mapPartitions(getCloudObjectMetadataPerPartition(s3Prefix, serializableHadoopConf, checkIfFileExists), Encoders.kryo(CloudObjectMetadata.class))
         .collectAsList();
@@ -199,25 +174,4 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastChec
     Option<Dataset<Row>> datasetOption = cloudDataFetcher.getCloudObjectDataDF(sparkSession, cloudObjectMetadata, props, schemaProvider);
     return Pair.of(datasetOption, checkPointAndDataset.getLeft().toString());
   }
-
-  Dataset<Row> applyFilter(Dataset<Row> source, String fileFormat) {
-    String filter = S3_OBJECT_SIZE + " > 0";
-    if (!StringUtils.isNullOrEmpty(getStringWithAltKeys(props, S3_KEY_PREFIX, true))) {
-      filter = filter + " and " + S3_OBJECT_KEY + " like '" + getStringWithAltKeys(props, S3_KEY_PREFIX) + "%'";
-    }
-    if (!StringUtils.isNullOrEmpty(getStringWithAltKeys(props, S3_IGNORE_KEY_PREFIX, true))) {
-      filter = filter + " and " + S3_OBJECT_KEY + " not like '" + getStringWithAltKeys(props, S3_IGNORE_KEY_PREFIX) + "%'";
-    }
-    if (!StringUtils.isNullOrEmpty(getStringWithAltKeys(props, S3_IGNORE_KEY_SUBSTRING, true))) {
-      filter = filter + " and " + S3_OBJECT_KEY + " not like '%" + getStringWithAltKeys(props, S3_IGNORE_KEY_SUBSTRING) + "%'";
-    }
-    // Match files with a given extension, or use the fileFormat as the fallback incase the config is not set.
-    if (!StringUtils.isNullOrEmpty(getStringWithAltKeys(props, CLOUD_DATAFILE_EXTENSION, true))) {
-      filter = filter + " and " + S3_OBJECT_KEY + " like '%" + getStringWithAltKeys(props, CLOUD_DATAFILE_EXTENSION) + "'";
-    } else {
-      filter = filter + " and " + S3_OBJECT_KEY + " like '%" + fileFormat + "%'";
-    }
-
-    return source.filter(filter);
-  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
index cbc0722056bf..dfb07c718a06 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
@@ -25,6 +25,9 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.utilities.callback.SourceCommitCallback;
 import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
+import org.apache.hudi.utilities.streamer.SourceProfileSupplier;
+import org.apache.hudi.utilities.streamer.StreamContext;
 
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
@@ -44,6 +47,7 @@ public enum SourceType {
   protected transient TypedProperties props;
   protected transient JavaSparkContext sparkContext;
   protected transient SparkSession sparkSession;
+  protected transient Option<SourceProfileSupplier> sourceProfileSupplier;
   private transient SchemaProvider overriddenSchemaProvider;
 
   private final SourceType sourceType;
@@ -55,11 +59,16 @@ protected Source(TypedProperties props, JavaSparkContext sparkContext, SparkSess
 
   protected Source(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
       SchemaProvider schemaProvider, SourceType sourceType) {
+    this(props, sparkContext, sparkSession, sourceType, new DefaultStreamContext(schemaProvider, Option.empty()));
+  }
+
+  protected Source(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession, SourceType sourceType, StreamContext streamContext) {
     this.props = props;
     this.sparkContext = sparkContext;
     this.sparkSession = sparkSession;
-    this.overriddenSchemaProvider = schemaProvider;
+    this.overriddenSchemaProvider = streamContext.getSchemaProvider();
     this.sourceType = sourceType;
+    this.sourceProfileSupplier = streamContext.getSourceProfileSupplier();
   }
 
   @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudDataFetcher.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudDataFetcher.java
index 9595ec1a9e6f..ed1a49e33e76 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudDataFetcher.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudDataFetcher.java
@@ -20,17 +20,21 @@
 
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.Serializable;
 import java.util.List;
 
+import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
+import static org.apache.hudi.utilities.config.CloudSourceConfig.DATAFILE_FORMAT;
+import static org.apache.hudi.utilities.config.HoodieIncrSourceConfig.SOURCE_FILE_FORMAT;
 import static org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon.loadAsDataset;
 
 /**
@@ -39,21 +43,28 @@
  */
 public class CloudDataFetcher implements Serializable {
 
-  private final String fileFormat;
-  private TypedProperties props;
+  private static final String EMPTY_STRING = "";
+
+  private final TypedProperties props;
 
   private static final Logger LOG = LoggerFactory.getLogger(CloudDataFetcher.class);
 
   private static final long serialVersionUID = 1L;
 
-  public CloudDataFetcher(TypedProperties props, String fileFormat) {
-    this.fileFormat = fileFormat;
+  public CloudDataFetcher(TypedProperties props) {
     this.props = props;
   }
 
+  public static String getFileFormat(TypedProperties props) {
+    // This is to ensure backward compatibility where we were using the
+    // config SOURCE_FILE_FORMAT for file format in previous versions.
+    return StringUtils.isNullOrEmpty(getStringWithAltKeys(props, DATAFILE_FORMAT, EMPTY_STRING))
+        ? getStringWithAltKeys(props, SOURCE_FILE_FORMAT, true)
+        : getStringWithAltKeys(props, DATAFILE_FORMAT, EMPTY_STRING);
+  }
+
   public Option<Dataset<Row>> getCloudObjectDataDF(SparkSession spark, List<CloudObjectMetadata> cloudObjectMetadata,
                                                    TypedProperties props, Option<SchemaProvider> schemaProviderOption) {
-    return loadAsDataset(spark, cloudObjectMetadata, props, fileFormat, schemaProviderOption);
+    return loadAsDataset(spark, cloudObjectMetadata, props, getFileFormat(props), schemaProviderOption);
   }
-
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java
index 750d619258e0..8676bf41cb50 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java
@@ -18,8 +18,8 @@
 
 package org.apache.hudi.utilities.sources.helpers;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.AvroConversionUtils;
+import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
@@ -33,6 +33,7 @@
 import org.apache.hudi.utilities.sources.InputBatch;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -56,9 +57,16 @@
 import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
 import static org.apache.hudi.common.util.ConfigUtils.containsConfigProperty;
 import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
+import static org.apache.hudi.utilities.config.CloudSourceConfig.CLOUD_DATAFILE_EXTENSION;
+import static org.apache.hudi.utilities.config.CloudSourceConfig.IGNORE_RELATIVE_PATH_PREFIX;
+import static org.apache.hudi.utilities.config.CloudSourceConfig.IGNORE_RELATIVE_PATH_SUBSTR;
 import static org.apache.hudi.utilities.config.CloudSourceConfig.PATH_BASED_PARTITION_FIELDS;
+import static org.apache.hudi.utilities.config.CloudSourceConfig.SELECT_RELATIVE_PATH_PREFIX;
 import static org.apache.hudi.utilities.config.CloudSourceConfig.SOURCE_MAX_BYTES_PER_PARTITION;
 import static org.apache.hudi.utilities.config.CloudSourceConfig.SPARK_DATASOURCE_READER_COMMA_SEPARATED_PATH_FORMAT;
+import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_PREFIX;
+import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_SUBSTRING;
+import static org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig.S3_KEY_PREFIX;
 import static org.apache.spark.sql.functions.input_file_name;
 import static org.apache.spark.sql.functions.split;
 
@@ -71,6 +79,13 @@ public class CloudObjectsSelectorCommon {
 
   private static final Logger LOG = LoggerFactory.getLogger(CloudObjectsSelectorCommon.class);
 
+  public static final String S3_OBJECT_KEY = "s3.object.key";
+  public static final String S3_OBJECT_SIZE = "s3.object.size";
+  public static final String S3_BUCKET_NAME = "s3.bucket.name";
+  public static final String GCS_OBJECT_KEY = "name";
+  public static final String GCS_OBJECT_SIZE = "size";
+  private static final String SPACE_DELIMTER = " ";
+
   /**
    * Return a function that extracts filepaths from a list of Rows.
    * Here Row is assumed to have the schema [bucket_name, filepath_relative_to_bucket, object_size]
@@ -151,6 +166,45 @@ private static boolean checkIfFileExists(String storageUrlSchemePrefix, String b
     }
   }
 
+  public static String generateFilter(Type type,
+                                      TypedProperties props) {
+    String fileFormat = CloudDataFetcher.getFileFormat(props);
+    Option<String> selectRelativePathPrefix = getPropVal(props, SELECT_RELATIVE_PATH_PREFIX);
+    Option<String> ignoreRelativePathPrefix = getPropVal(props, IGNORE_RELATIVE_PATH_PREFIX);
+    Option<String> ignoreRelativePathSubStr = getPropVal(props, IGNORE_RELATIVE_PATH_SUBSTR);
+
+    String objectKey;
+    String objectSizeKey;
+    // This is for backwards compatibility of configs for s3.
+    if (type.equals(Type.S3)) {
+      objectKey = S3_OBJECT_KEY;
+      objectSizeKey = S3_OBJECT_SIZE;
+      selectRelativePathPrefix = selectRelativePathPrefix.or(() -> getPropVal(props, S3_KEY_PREFIX));
+      ignoreRelativePathPrefix = ignoreRelativePathPrefix.or(() -> getPropVal(props, S3_IGNORE_KEY_PREFIX));
+      ignoreRelativePathSubStr = ignoreRelativePathSubStr.or(() -> getPropVal(props, S3_IGNORE_KEY_SUBSTRING));
+    } else {
+      objectKey = GCS_OBJECT_KEY;
+      objectSizeKey = GCS_OBJECT_SIZE;
+    }
+
+    StringBuilder filter = new StringBuilder(String.format("%s > 0", objectSizeKey));
+    if (selectRelativePathPrefix.isPresent()) {
+      filter.append(SPACE_DELIMTER).append(String.format("and %s like '%s%%'", objectKey, selectRelativePathPrefix.get()));
+    }
+    if (ignoreRelativePathPrefix.isPresent()) {
+      filter.append(SPACE_DELIMTER).append(String.format("and %s not like '%s%%'", objectKey, ignoreRelativePathPrefix.get()));
+    }
+    if (ignoreRelativePathSubStr.isPresent()) {
+      filter.append(SPACE_DELIMTER).append(String.format("and %s not like '%%%s%%'", objectKey, ignoreRelativePathSubStr.get()));
+    }
+
+    // Match files with a given extension, or use the fileFormat as the default.
+    getPropVal(props, CLOUD_DATAFILE_EXTENSION).or(() -> Option.of(fileFormat))
+        .map(val -> filter.append(SPACE_DELIMTER).append(String.format("and %s like '%%%s'", objectKey, val)));
+
+    return filter.toString();
+  }
+
   public static Option<Dataset<Row>> loadAsDataset(SparkSession spark, List<CloudObjectMetadata> cloudObjectMetadata,
                                                    TypedProperties props, String fileFormat, Option<SchemaProvider> schemaProviderOption) {
     if (LOG.isDebugEnabled()) {
@@ -204,7 +258,6 @@ public static Option<Dataset<Row>> loadAsDataset(SparkSession spark, List<CloudO
     } else {
       dataset = reader.load(paths.toArray(new String[cloudObjectMetadata.size()]));
     }
-    dataset = dataset.coalesce(numPartitions);
 
     // add partition column from source path if configured
     if (containsConfigProperty(props, PATH_BASED_PARTITION_FIELDS)) {
@@ -216,10 +269,36 @@ public static Option<Dataset<Row>> loadAsDataset(SparkSession spark, List<CloudO
         dataset = dataset.withColumn(partitionKey, split(split(input_file_name(), partitionPathPattern).getItem(1), "/").getItem(0));
       }
     }
+    dataset = coalesceOrRepartition(dataset, numPartitions);
     return Option.of(dataset);
   }
 
+  private static Dataset<Row> coalesceOrRepartition(Dataset dataset, int numPartitions) {
+    int existingNumPartitions = dataset.rdd().getNumPartitions();
+    LOG.info(String.format("existing number of partitions=%d, required number of partitions=%d", existingNumPartitions, numPartitions));
+    if (existingNumPartitions < numPartitions) {
+      dataset = dataset.repartition(numPartitions);
+    } else {
+      dataset = dataset.coalesce(numPartitions);
+    }
+    return dataset;
+  }
+
   public static Option<Dataset<Row>> loadAsDataset(SparkSession spark, List<CloudObjectMetadata> cloudObjectMetadata, TypedProperties props, String fileFormat) {
     return loadAsDataset(spark, cloudObjectMetadata, props, fileFormat, Option.empty());
   }
+
+  private static Option<String> getPropVal(TypedProperties props, ConfigProperty<String> configProperty) {
+    String value = getStringWithAltKeys(props, configProperty, true);
+    if (!StringUtils.isNullOrEmpty(value)) {
+      return Option.of(value);
+    }
+
+    return Option.empty();
+  }
+
+  public enum Type {
+    S3,
+    GCS
+  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
index d5faec3595e1..442046cd948a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
@@ -41,10 +41,10 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -59,6 +59,7 @@
 import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys;
 import static org.apache.hudi.common.util.ConfigUtils.getLongWithAltKeys;
 import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
+import static org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils.checkTopicCheckpoint;
 
 /**
  * Source to read data from Kafka, incrementally.
@@ -68,16 +69,14 @@ public class KafkaOffsetGen {
   private static final Logger LOG = LoggerFactory.getLogger(KafkaOffsetGen.class);
   private static final String METRIC_NAME_KAFKA_DELAY_COUNT = "kafkaDelayCount";
   private static final Comparator<OffsetRange> SORT_BY_PARTITION = Comparator.comparing(OffsetRange::partition);
-
   public static final String KAFKA_CHECKPOINT_TYPE_TIMESTAMP = "timestamp";
 
-  /**
-   * kafka checkpoint Pattern.
-   * Format: topic_name,partition_num:offset,partition_num:offset,....
-   */
-  private final Pattern pattern = Pattern.compile(".*,.*:.*");
-
   public static class CheckpointUtils {
+    /**
+     * kafka checkpoint Pattern.
+     * Format: topic_name,partition_num:offset,partition_num:offset,....
+     */
+    private static final Pattern PATTERN = Pattern.compile(".*,.*:.*");
 
     /**
      * Reconstruct checkpoint from timeline.
@@ -115,6 +114,7 @@ public static String offsetsToStr(OffsetRange[] ranges) {
      * @param fromOffsetMap offsets where we left off last time
      * @param toOffsetMap offsets of where each partitions is currently at
      * @param numEvents maximum number of events to read.
+     * @param minPartitions minimum partitions used for
      */
     public static OffsetRange[] computeOffsetRanges(Map<TopicPartition, Long> fromOffsetMap,
                                                     Map<TopicPartition, Long> toOffsetMap,
@@ -130,62 +130,58 @@ public static OffsetRange[] computeOffsetRanges(Map<TopicPartition, Long> fromOf
           .toArray(new OffsetRange[toOffsetMap.size()]);
       LOG.debug("numEvents {}, minPartitions {}, ranges {}", numEvents, minPartitions, ranges);
 
-      boolean needSplitToMinPartitions = minPartitions > toOffsetMap.size();
-      long totalEvents = totalNewMessages(ranges);
-      long allocedEvents = 0;
-      Set<Integer> exhaustedPartitions = new HashSet<>();
-      List<OffsetRange> finalRanges = new ArrayList<>();
       // choose the actualNumEvents with min(totalEvents, numEvents)
-      long actualNumEvents = Math.min(totalEvents, numEvents);
-
-      // keep going until we have events to allocate and partitions still not exhausted.
-      while (allocedEvents < numEvents && exhaustedPartitions.size() < toOffsetMap.size()) {
-        // Allocate the remaining events to non-exhausted partitions, in round robin fashion
-        Set<Integer> allocatedPartitionsThisLoop = new HashSet<>(exhaustedPartitions);
-        for (int i = 0; i < ranges.length; i++) {
-          long remainingEvents = actualNumEvents - allocedEvents;
-          long remainingPartitions = toOffsetMap.size() - allocatedPartitionsThisLoop.size();
-          // if need tp split into minPartitions, recalculate the remainingPartitions
-          if (needSplitToMinPartitions) {
-            remainingPartitions = minPartitions - finalRanges.size();
+      long actualNumEvents = Math.min(totalNewMessages(ranges), numEvents);
+      minPartitions = Math.max(minPartitions, toOffsetMap.size());
+      // Each OffsetRange computed will have maximum of eventsPerPartition,
+      // this ensures all ranges are evenly distributed and there's no skew in one particular range.
+      long eventsPerPartition = Math.max(1L, actualNumEvents / minPartitions);
+      long allocatedEvents = 0;
+      Map<TopicPartition, List<OffsetRange>> finalRanges = new HashMap<>();
+      Map<TopicPartition, Long> partitionToAllocatedOffset = new HashMap<>();
+      // keep going until we have events to allocate.
+      while (allocatedEvents < actualNumEvents) {
+        // Allocate the remaining events in round-robin fashion.
+        for (OffsetRange range : ranges) {
+          // if we have already allocated required no of events, exit
+          if (allocatedEvents == actualNumEvents) {
+            break;
           }
-          long eventsPerPartition = (long) Math.ceil((1.0 * remainingEvents) / remainingPartitions);
-
-          OffsetRange range = ranges[i];
-          if (exhaustedPartitions.contains(range.partition())) {
-            continue;
+          // Compute startOffset.
+          long startOffset = range.fromOffset();
+          if (partitionToAllocatedOffset.containsKey(range.topicPartition())) {
+            startOffset = partitionToAllocatedOffset.get(range.topicPartition());
           }
-
+          // for last bucket, we may not have full eventsPerPartition msgs.
+          long eventsForThisPartition = Math.min(eventsPerPartition, (actualNumEvents - allocatedEvents));
+          // Compute toOffset.
           long toOffset = -1L;
-          if (range.fromOffset() + eventsPerPartition > range.fromOffset()) {
-            toOffset = Math.min(range.untilOffset(), range.fromOffset() + eventsPerPartition);
+          if (startOffset + eventsForThisPartition > startOffset) {
+            toOffset = Math.min(range.untilOffset(), startOffset + eventsForThisPartition);
           } else {
             // handling Long overflow
             toOffset = range.untilOffset();
           }
-          if (toOffset == range.untilOffset()) {
-            exhaustedPartitions.add(range.partition());
+          allocatedEvents += toOffset - startOffset;
+          OffsetRange thisRange = OffsetRange.create(range.topicPartition(), startOffset, toOffset);
+          // Add the offsetRange(startOffset,toOffset) to finalRanges.
+          if (!finalRanges.containsKey(range.topicPartition())) {
+            finalRanges.put(range.topicPartition(), new ArrayList<>(Collections.singleton(thisRange)));
+            partitionToAllocatedOffset.put(range.topicPartition(), thisRange.untilOffset());
+          } else if (toOffset > startOffset) {
+            finalRanges.get(range.topicPartition()).add(thisRange);
+            partitionToAllocatedOffset.put(range.topicPartition(), thisRange.untilOffset());
           }
-          allocedEvents += toOffset - range.fromOffset();
-          // We need recompute toOffset if allocedEvents larger than actualNumEvents.
-          if (allocedEvents > actualNumEvents) {
-            long offsetsToAdd = Math.min(eventsPerPartition, (actualNumEvents - allocedEvents));
-            toOffset = Math.min(range.untilOffset(), toOffset + offsetsToAdd);
-          }
-          OffsetRange thisRange = OffsetRange.create(range.topicPartition(), range.fromOffset(), toOffset);
-          finalRanges.add(thisRange);
-          ranges[i] = OffsetRange.create(range.topicPartition(), range.fromOffset() + thisRange.count(), range.untilOffset());
-          allocatedPartitionsThisLoop.add(range.partition());
         }
       }
-
-      if (!needSplitToMinPartitions) {
-        LOG.debug("final ranges merged by topic partition {}", Arrays.toString(mergeRangesByTopicPartition(finalRanges.toArray(new OffsetRange[0]))));
-        return mergeRangesByTopicPartition(finalRanges.toArray(new OffsetRange[0]));
+      OffsetRange[] sortedRangeArray = finalRanges.values().stream().flatMap(Collection::stream)
+          .sorted(SORT_BY_PARTITION).toArray(OffsetRange[]::new);
+      if (actualNumEvents == 0) {
+        // We return the same ranges back in case of 0 events for checkpoint computation.
+        sortedRangeArray = ranges;
       }
-      finalRanges.sort(SORT_BY_PARTITION);
-      LOG.debug("final ranges {}", Arrays.toString(finalRanges.toArray(new OffsetRange[0])));
-      return finalRanges.toArray(new OffsetRange[0]);
+      LOG.info("final ranges {}", Arrays.toString(sortedRangeArray));
+      return sortedRangeArray;
     }
 
     /**
@@ -209,6 +205,11 @@ public static OffsetRange[] mergeRangesByTopicPartition(OffsetRange[] oldRanges)
     public static long totalNewMessages(OffsetRange[] ranges) {
       return Arrays.stream(ranges).mapToLong(OffsetRange::count).sum();
     }
+
+    public static boolean checkTopicCheckpoint(Option<String> lastCheckpointStr) {
+      Matcher matcher = PATTERN.matcher(lastCheckpointStr.get());
+      return matcher.matches();
+    }
   }
 
   private final Map<String, Object> kafkaParams;
@@ -241,7 +242,24 @@ public KafkaOffsetGen(TypedProperties props) {
   }
 
   public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long sourceLimit, HoodieIngestionMetrics metrics) {
+    // Come up with final set of OffsetRanges to read (account for new partitions, limit number of events)
+    long maxEventsToReadFromKafka = getLongWithAltKeys(props, KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE);
+
+    long numEvents;
+    if (sourceLimit == Long.MAX_VALUE) {
+      numEvents = maxEventsToReadFromKafka;
+      LOG.info("SourceLimit not configured, set numEvents to default value : " + maxEventsToReadFromKafka);
+    } else {
+      numEvents = sourceLimit;
+    }
+
+    long minPartitions = getLongWithAltKeys(props, KafkaSourceConfig.KAFKA_SOURCE_MIN_PARTITIONS);
+    LOG.info("getNextOffsetRanges set config " + KafkaSourceConfig.KAFKA_SOURCE_MIN_PARTITIONS.key() + " to " + minPartitions);
+
+    return getNextOffsetRanges(lastCheckpointStr, numEvents, minPartitions, metrics);
+  }
 
+  public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long numEvents, long minPartitions, HoodieIngestionMetrics metrics) {
     // Obtain current metadata for the topic
     Map<TopicPartition, Long> fromOffsets;
     Map<TopicPartition, Long> toOffsets;
@@ -279,29 +297,9 @@ public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long
       // Obtain the latest offsets.
       toOffsets = consumer.endOffsets(topicPartitions);
     }
-
-    // Come up with final set of OffsetRanges to read (account for new partitions, limit number of events)
-    long maxEventsToReadFromKafka = getLongWithAltKeys(props, KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE);
-
-    long numEvents;
-    if (sourceLimit == Long.MAX_VALUE) {
-      numEvents = maxEventsToReadFromKafka;
-      LOG.info("SourceLimit not configured, set numEvents to default value : " + maxEventsToReadFromKafka);
-    } else {
-      numEvents = sourceLimit;
-    }
-
-    // TODO(HUDI-4625) remove
-    if (numEvents < toOffsets.size()) {
-      throw new HoodieException("sourceLimit should not be less than the number of kafka partitions");
-    }
-
-    long minPartitions = getLongWithAltKeys(props, KafkaSourceConfig.KAFKA_SOURCE_MIN_PARTITIONS);
-    LOG.info("getNextOffsetRanges set config " + KafkaSourceConfig.KAFKA_SOURCE_MIN_PARTITIONS.key() + " to " + minPartitions);
-
     return CheckpointUtils.computeOffsetRanges(fromOffsets, toOffsets, numEvents, minPartitions);
   }
-
+  
   /**
    * Fetch partition infos for given topic.
    *
@@ -427,11 +425,6 @@ public boolean checkTopicExists(KafkaConsumer consumer)  {
     return result.containsKey(topicName);
   }
 
-  private boolean checkTopicCheckpoint(Option<String> lastCheckpointStr) {
-    Matcher matcher = pattern.matcher(lastCheckpointStr.get());
-    return matcher.matches();
-  }
-
   public String getTopicName() {
     return topicName;
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/gcs/GcsObjectMetadataFetcher.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/gcs/GcsObjectMetadataFetcher.java
index c92901d14cff..29a50e81fb06 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/gcs/GcsObjectMetadataFetcher.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/gcs/GcsObjectMetadataFetcher.java
@@ -18,11 +18,10 @@
 
 package org.apache.hudi.utilities.sources.helpers.gcs;
 
-import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.util.Option;
 import org.apache.hudi.utilities.sources.helpers.CloudObjectMetadata;
+import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
 
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
@@ -34,12 +33,6 @@
 import java.io.Serializable;
 import java.util.List;
 
-import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys;
-import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.CLOUD_DATAFILE_EXTENSION;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.IGNORE_RELATIVE_PATH_PREFIX;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.IGNORE_RELATIVE_PATH_SUBSTR;
-import static org.apache.hudi.utilities.config.CloudSourceConfig.SELECT_RELATIVE_PATH_PREFIX;
 import static org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon.getCloudObjectMetadataPerPartition;
 
 /**
@@ -51,10 +44,6 @@
  */
 public class GcsObjectMetadataFetcher implements Serializable {
 
-  /**
-   * The default file format to assume if {@link GcsIngestionConfig#GCS_INCR_DATAFILE_EXTENSION} is not given.
-   */
-  private final String fileFormat;
   private final TypedProperties props;
 
   private static final String GCS_PREFIX = "gs://";
@@ -62,13 +51,8 @@ public class GcsObjectMetadataFetcher implements Serializable {
 
   private static final Logger LOG = LoggerFactory.getLogger(GcsObjectMetadataFetcher.class);
 
-  /**
-   * @param fileFormat The default file format to assume if {@link GcsIngestionConfig#GCS_INCR_DATAFILE_EXTENSION}
-   *                   is not given.
-   */
-  public GcsObjectMetadataFetcher(TypedProperties props, String fileFormat) {
+  public GcsObjectMetadataFetcher(TypedProperties props) {
     this.props = props;
-    this.fileFormat = fileFormat;
   }
 
   /**
@@ -92,35 +76,9 @@ public List<CloudObjectMetadata> getGcsObjectMetadata(JavaSparkContext jsc, Data
    * @return Dataset<Row> after apply the filtering.
    */
   public Dataset<Row> applyFilter(Dataset<Row> cloudObjectMetadataDF) {
-    String filter = createFilter();
+    String filter = CloudObjectsSelectorCommon.generateFilter(CloudObjectsSelectorCommon.Type.GCS, props);
     LOG.info("Adding filter string to Dataset: " + filter);
 
     return cloudObjectMetadataDF.filter(filter);
   }
-
-  /**
-   * Add optional filters that narrow down the list of GCS objects to fetch.
-   */
-  private String createFilter() {
-    StringBuilder filter = new StringBuilder("size > 0");
-
-    getPropVal(SELECT_RELATIVE_PATH_PREFIX).ifPresent(val -> filter.append(" and name like '" + val + "%'"));
-    getPropVal(IGNORE_RELATIVE_PATH_PREFIX).ifPresent(val -> filter.append(" and name not like '" + val + "%'"));
-    getPropVal(IGNORE_RELATIVE_PATH_SUBSTR).ifPresent(val -> filter.append(" and name not like '%" + val + "%'"));
-
-    // Match files with a given extension, or use the fileFormat as the default.
-    getPropVal(CLOUD_DATAFILE_EXTENSION).or(() -> Option.of(fileFormat))
-        .map(val -> filter.append(" and name like '%" + val + "'"));
-
-    return filter.toString();
-  }
-
-  private Option<String> getPropVal(ConfigProperty<String> configProperty) {
-    String value = getStringWithAltKeys(props, configProperty, true);
-    if (!isNullOrEmpty(value)) {
-      return Option.of(value);
-    }
-
-    return Option.empty();
-  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/DefaultStreamContext.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/DefaultStreamContext.java
new file mode 100644
index 000000000000..f8dabeb89c96
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/DefaultStreamContext.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+
+/**
+ * The default implementation for the StreamContext interface,
+ * composes SchemaProvider and SourceProfileSupplier currently,
+ * can be extended for other arguments in the future.
+ */
+public class DefaultStreamContext implements StreamContext {
+
+  private final SchemaProvider schemaProvider;
+  private final Option<SourceProfileSupplier> sourceProfileSupplier;
+
+  public DefaultStreamContext(SchemaProvider schemaProvider, Option<SourceProfileSupplier> sourceProfileSupplier) {
+    this.schemaProvider = schemaProvider;
+    this.sourceProfileSupplier = sourceProfileSupplier;
+  }
+
+  @Override
+  public SchemaProvider getSchemaProvider() {
+    return schemaProvider;
+  }
+
+  @Override
+  public Option<SourceProfileSupplier> getSourceProfileSupplier() {
+    return sourceProfileSupplier;
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java
index f268464d6f1a..a2f1cb277ec6 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java
@@ -19,6 +19,8 @@
 
 package org.apache.hudi.utilities.streamer;
 
+import java.util.Objects;
+
 /**
  * Error event is an event triggered during write or processing failure of a record.
  */
@@ -40,6 +42,23 @@ public ErrorReason getReason() {
     return reason;
   }
 
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    ErrorEvent<?> that = (ErrorEvent<?>) o;
+    return reason == that.reason && Objects.equals(payload, that.payload);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(reason, payload);
+  }
+
   /**
    * The reason behind write or processing failure of a record
    */
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java
index d7e3bca49897..a637f7fbbff7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java
@@ -66,7 +66,7 @@
 /**
  * Wrapper over HoodieStreamer.java class.
  * Helps with ingesting incremental data into hoodie datasets for multiple tables.
- * Currently supports only COPY_ON_WRITE storage type.
+ * Supports COPY_ON_WRITE and MERGE_ON_READ storage types.
  */
 public class HoodieMultiTableStreamer {
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java
index 8ecc937c5e7c..ef31cc34ab5f 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java
@@ -143,8 +143,12 @@ public HoodieStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configur
     this(cfg, jssc, fs, conf, Option.empty());
   }
 
+  public HoodieStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configuration conf, Option<TypedProperties> propsOverride) throws IOException {
+    this(cfg, jssc, fs, conf, propsOverride, Option.empty());
+  }
+
   public HoodieStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configuration conf,
-                        Option<TypedProperties> propsOverride) throws IOException {
+                        Option<TypedProperties> propsOverride, Option<SourceProfileSupplier> sourceProfileSupplier) throws IOException {
     this.properties = combineProperties(cfg, propsOverride, jssc.hadoopConfiguration());
     if (cfg.initialCheckpointProvider != null && cfg.checkpoint == null) {
       InitialCheckPointProvider checkPointProvider =
@@ -158,7 +162,7 @@ public HoodieStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configur
         cfg.runBootstrap ? new BootstrapExecutor(cfg, jssc, fs, conf, this.properties) : null);
     HoodieSparkEngineContext sparkEngineContext = new HoodieSparkEngineContext(jssc);
     this.ingestionService = Option.ofNullable(
-        cfg.runBootstrap ? null : new StreamSyncService(cfg, sparkEngineContext, fs, conf, Option.ofNullable(this.properties)));
+        cfg.runBootstrap ? null : new StreamSyncService(cfg, sparkEngineContext, fs, conf, Option.ofNullable(this.properties), sourceProfileSupplier));
   }
 
   private static TypedProperties combineProperties(Config cfg, Option<TypedProperties> propsOverride, Configuration hadoopConf) {
@@ -656,7 +660,7 @@ public static class StreamSyncService extends HoodieIngestionService {
     private final Option<ConfigurationHotUpdateStrategy> configurationHotUpdateStrategyOpt;
 
     public StreamSyncService(Config cfg, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf,
-                             Option<TypedProperties> properties) throws IOException {
+                             Option<TypedProperties> properties, Option<SourceProfileSupplier> sourceProfileSupplier) throws IOException {
       super(HoodieIngestionConfig.newBuilder()
           .isContinuous(cfg.continuousMode)
           .withMinSyncInternalSeconds(cfg.minSyncIntervalSeconds).build());
@@ -708,13 +712,18 @@ public StreamSyncService(Config cfg, HoodieSparkEngineContext hoodieSparkContext
           UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, hoodieSparkContext.jsc()),
           props, hoodieSparkContext.jsc(), cfg.transformerClassNames);
 
-      streamSync = new StreamSync(cfg, sparkSession, schemaProvider, props, hoodieSparkContext, fs, conf, this::onInitializingWriteClient);
+      streamSync = new StreamSync(cfg, sparkSession, props, hoodieSparkContext, fs, conf, this::onInitializingWriteClient, new DefaultStreamContext(schemaProvider, sourceProfileSupplier));
 
     }
 
     public StreamSyncService(HoodieStreamer.Config cfg, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf)
         throws IOException {
-      this(cfg, hoodieSparkContext, fs, conf, Option.empty());
+      this(cfg, hoodieSparkContext, fs, conf, Option.empty(), Option.empty());
+    }
+
+    public StreamSyncService(HoodieStreamer.Config cfg, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf, Option<TypedProperties> properties)
+            throws IOException {
+      this(cfg, hoodieSparkContext, fs, conf, properties, Option.empty());
     }
 
     private void initializeTableTypeAndBaseFileFormat() {
@@ -728,7 +737,7 @@ private void reInitDeltaSync() throws IOException {
       if (streamSync != null) {
         streamSync.close();
       }
-      streamSync = new StreamSync(cfg, sparkSession, schemaProvider, props, hoodieSparkContext, fs, hiveConf, this::onInitializingWriteClient);
+      streamSync = new StreamSync(cfg, sparkSession, props, hoodieSparkContext, fs, hiveConf, this::onInitializingWriteClient, new DefaultStreamContext(schemaProvider, Option.empty()));
     }
 
     @Override
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java
index 44c367ba3843..2ecf0b02fb6a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java
@@ -35,7 +35,10 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.common.util.collection.CloseableMappingIterator;
-import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.hudi.exception.HoodieKeyGeneratorException;
+import org.apache.hudi.exception.HoodieRecordCreationException;
 import org.apache.hudi.keygen.BuiltinKeyGenerator;
 import org.apache.hudi.keygen.KeyGenUtils;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
@@ -52,7 +55,6 @@
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.types.StructType;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -105,10 +107,7 @@ public static Option<JavaRDD<HoodieRecord>> createHoodieRecords(HoodieStreamer.C
                       : DataSourceUtils.createPayload(cfg.payloadClassName, gr);
                   avroRecords.add(Either.left(new HoodieAvroRecord<>(hoodieKey, payload)));
                 } catch (Exception e) {
-                  if (!shouldErrorTable) {
-                    throw e;
-                  }
-                  avroRecords.add(Either.right(HoodieAvroUtils.avroToJsonString(genRec, false)));
+                  avroRecords.add(generateErrorRecordOrThrowException(genRec, e, shouldErrorTable));
                 }
               }
               return avroRecords.iterator();
@@ -136,14 +135,7 @@ public static Option<JavaRDD<HoodieRecord>> createHoodieRecords(HoodieStreamer.C
               return Either.left(new HoodieSparkRecord(new HoodieKey(recordKey, partitionPath),
                   HoodieInternalRowUtils.getCachedUnsafeProjection(baseStructType, targetStructType).apply(row), targetStructType, false));
             } catch (Exception e) {
-              if (!shouldErrorTable) {
-                throw e;
-              }
-              try {
-                return Either.right(HoodieAvroUtils.avroToJsonString(rec, false));
-              } catch (IOException ex) {
-                throw new HoodieIOException("Failed to convert illegal record to json", ex);
-              }
+              return generateErrorRecordOrThrowException(rec, e, shouldErrorTable);
             }
           });
 
@@ -159,6 +151,28 @@ public static Option<JavaRDD<HoodieRecord>> createHoodieRecords(HoodieStreamer.C
     });
   }
 
+  /**
+   * @param genRec Avro {@link GenericRecord} instance.
+   * @return the representation of error record (empty {@link HoodieRecord} and the error record
+   * String) for writing to error table.
+   */
+  private static Either<HoodieRecord, String> generateErrorRecordOrThrowException(GenericRecord genRec, Exception e, boolean shouldErrorTable) {
+    if (!shouldErrorTable) {
+      if (e instanceof HoodieKeyException) {
+        throw (HoodieKeyException) e;
+      } else if (e instanceof HoodieKeyGeneratorException) {
+        throw (HoodieKeyGeneratorException) e;
+      } else {
+        throw new HoodieRecordCreationException("Failed to create Hoodie Record", e);
+      }
+    }
+    try {
+      return Either.right(HoodieAvroUtils.safeAvroToJsonString(genRec));
+    } catch (Exception ex) {
+      throw new HoodieException("Failed to convert illegal record to json", ex);
+    }
+  }
+
   /**
    * Set based on hoodie.datasource.write.drop.partition.columns config.
    * When set to true, will not write the partition columns into the table.
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceFormatAdapter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceFormatAdapter.java
index f29404701db9..c379472b26eb 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceFormatAdapter.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceFormatAdapter.java
@@ -23,8 +23,10 @@
 import org.apache.hudi.HoodieSparkUtils;
 import org.apache.hudi.avro.MercifulJsonConverter;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ConfigUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.SchemaCompatibilityException;
 import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
 import org.apache.hudi.utilities.schema.SchemaProvider;
@@ -53,6 +55,7 @@
 
 import scala.util.Either;
 
+import static org.apache.hudi.utilities.config.HoodieStreamerConfig.ROW_THROW_EXPLICIT_EXCEPTIONS;
 import static org.apache.hudi.utilities.config.HoodieStreamerConfig.SANITIZE_SCHEMA_FIELD_NAMES;
 import static org.apache.hudi.utilities.config.HoodieStreamerConfig.SCHEMA_FIELD_NAME_INVALID_CHAR_MASK;
 import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
@@ -62,10 +65,12 @@
 /**
  * Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer).
  */
-public final class SourceFormatAdapter implements Closeable {
+public class SourceFormatAdapter implements Closeable {
 
   private final Source source;
   private boolean shouldSanitize = SANITIZE_SCHEMA_FIELD_NAMES.defaultValue();
+
+  private  boolean wrapWithException = ROW_THROW_EXPLICIT_EXCEPTIONS.defaultValue();
   private String invalidCharMask = SCHEMA_FIELD_NAME_INVALID_CHAR_MASK.defaultValue();
 
   private Option<BaseErrorTableWriter> errorTableWriter = Option.empty();
@@ -80,6 +85,7 @@ public SourceFormatAdapter(Source source, Option<BaseErrorTableWriter> errorTabl
     if (props.isPresent()) {
       this.shouldSanitize = SanitizationUtils.shouldSanitize(props.get());
       this.invalidCharMask = SanitizationUtils.getInvalidCharMask(props.get());
+      this.wrapWithException = ConfigUtils.getBooleanWithAltKeys(props.get(), ROW_THROW_EXPLICIT_EXCEPTIONS);
     }
     if (this.shouldSanitize && source.getSourceType() == Source.SourceType.PROTO) {
       throw new IllegalArgumentException("PROTO cannot be sanitized");
@@ -244,7 +250,8 @@ public InputBatch<Dataset<Row>> fetchNewDataInRowFormat(Option<String> lastCkptS
           StructType dataType = AvroConversionUtils.convertAvroSchemaToStructType(sourceSchema);
           return new InputBatch<>(
               Option.ofNullable(
-                  r.getBatch().map(rdd -> source.getSparkSession().read().schema(dataType).json(rdd)).orElse(null)),
+                  r.getBatch().map(rdd -> HoodieSparkUtils.maybeWrapDataFrameWithException(source.getSparkSession().read().schema(dataType).json(rdd),
+                      SchemaCompatibilityException.class.getName(), "Schema does not match json data", wrapWithException)).orElse(null)),
               r.getCheckpointForNextBatch(), r.getSchemaProvider());
         }
       }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfile.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfile.java
new file mode 100644
index 000000000000..d830cf5dee3c
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfile.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.ApiMaturityLevel;
+import org.apache.hudi.PublicAPIClass;
+import org.apache.hudi.PublicAPIMethod;
+
+/**
+ * A profile containing details about how the next input batch in StreamSync should be consumed and written.
+ * For eg: KafkaSourceProfile contains number of events to consume in this sync round.
+ * S3SourceProfile contains the list of files to consume in this sync round.
+ * HudiIncrementalSourceProfile contains the beginInstant and endInstant commit times to consume in this sync round etc.
+ *
+ * @param <T> The type for source context, varies based on sourceType as described above.
+ */
+@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
+public interface SourceProfile<T> {
+
+  /**
+   * @return The maxBytes that will be consumed from the source in this sync round.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  long getMaxSourceBytes();
+
+  /**
+   * @return The number of output partitions required in source RDD.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  int getSourcePartitions();
+
+  /**
+   * @return The source specific context based on sourceType as described above.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  T getSourceSpecificContext();
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfileSupplier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfileSupplier.java
new file mode 100644
index 000000000000..34bfb8dff945
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SourceProfileSupplier.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.ApiMaturityLevel;
+import org.apache.hudi.PublicAPIClass;
+import org.apache.hudi.PublicAPIMethod;
+
+/**
+ * Supplier for SourceProfile
+ */
+@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
+public interface SourceProfileSupplier {
+  @SuppressWarnings("rawtypes")
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  SourceProfile getSourceProfile();
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamContext.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamContext.java
new file mode 100644
index 000000000000..bfe337ee3f25
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.ApiMaturityLevel;
+import org.apache.hudi.PublicAPIClass;
+import org.apache.hudi.PublicAPIMethod;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+
+/**
+ * The context required to sync one batch of data to hoodie table using StreamSync.
+ */
+@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
+public interface StreamContext {
+
+  /**
+   * The schema provider used for reading data from source and also writing to hoodie table.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  SchemaProvider getSchemaProvider();
+
+  /**
+   * An optional stream profile supplying details regarding how the next input batch in StreamSync should be consumed and written.
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  Option<SourceProfileSupplier> getSourceProfileSupplier();
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java
index 63eccbf5dc64..ded5348ed8f9 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java
@@ -55,6 +55,7 @@
 import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.VisibleForTesting;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
@@ -255,25 +256,50 @@ public class StreamSync implements Serializable, Closeable {
 
   private final boolean useRowWriter;
 
+  @VisibleForTesting
+  StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession,
+             TypedProperties props, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf,
+             Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient, SchemaProvider userProvidedSchemaProvider,
+             Option<BaseErrorTableWriter> errorTableWriter, SourceFormatAdapter formatAdapter, Option<Transformer> transformer,
+             boolean useRowWriter, boolean autoGenerateRecordKeys) {
+    this.cfg = cfg;
+    this.hoodieSparkContext = hoodieSparkContext;
+    this.sparkSession = sparkSession;
+    this.fs = fs;
+    this.onInitializingHoodieWriteClient = onInitializingHoodieWriteClient;
+    this.props = props;
+    this.userProvidedSchemaProvider = userProvidedSchemaProvider;
+    this.processedSchema = new SchemaSet();
+    this.autoGenerateRecordKeys = autoGenerateRecordKeys;
+    this.keyGenClassName = getKeyGeneratorClassName(new TypedProperties(props));
+    this.conf = conf;
+
+    this.errorTableWriter = errorTableWriter;
+    this.formatAdapter = formatAdapter;
+    this.transformer = transformer;
+    this.useRowWriter = useRowWriter;
+
+  }
+
   @Deprecated
   public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaProvider schemaProvider,
                     TypedProperties props, JavaSparkContext jssc, FileSystem fs, Configuration conf,
                     Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient) throws IOException {
-    this(cfg, sparkSession, schemaProvider, props, new HoodieSparkEngineContext(jssc), fs, conf, onInitializingHoodieWriteClient);
+    this(cfg, sparkSession, props, new HoodieSparkEngineContext(jssc), fs, conf, onInitializingHoodieWriteClient, new DefaultStreamContext(schemaProvider, Option.empty()));
   }
 
-  public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaProvider schemaProvider,
+  public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession,
                     TypedProperties props, HoodieSparkEngineContext hoodieSparkContext, FileSystem fs, Configuration conf,
-                    Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient) throws IOException {
+                    Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient, StreamContext streamContext) throws IOException {
     this.cfg = cfg;
     this.hoodieSparkContext = hoodieSparkContext;
     this.sparkSession = sparkSession;
     this.fs = fs;
     this.onInitializingHoodieWriteClient = onInitializingHoodieWriteClient;
     this.props = props;
-    this.userProvidedSchemaProvider = schemaProvider;
+    this.userProvidedSchemaProvider = streamContext.getSchemaProvider();
     this.processedSchema = new SchemaSet();
-    this.autoGenerateRecordKeys = KeyGenUtils.enableAutoGenerateRecordKeys(props);
+    this.autoGenerateRecordKeys = KeyGenUtils.isAutoGeneratedRecordKeysEnabled(props);
     this.keyGenClassName = getKeyGeneratorClassName(new TypedProperties(props));
     this.conf = conf;
 
@@ -285,7 +311,7 @@ public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaPr
       this.errorWriteFailureStrategy = ErrorTableUtils.getErrorWriteFailureStrategy(props);
     }
     refreshTimeline();
-    Source source = UtilHelpers.createSource(cfg.sourceClassName, props, hoodieSparkContext.jsc(), sparkSession, schemaProvider, metrics);
+    Source source = UtilHelpers.createSource(cfg.sourceClassName, props, hoodieSparkContext.jsc(), sparkSession, metrics, streamContext);
     this.formatAdapter = new SourceFormatAdapter(source, this.errorTableWriter, Option.of(props));
 
     Supplier<Option<Schema>> schemaSupplier = schemaProvider == null ? Option::empty : () -> Option.ofNullable(schemaProvider.getSourceSchema());
@@ -414,7 +440,7 @@ public Pair<Option<String>, JavaRDD<WriteStatus>> syncOnce() throws IOException
             || (newTargetSchema != null && !processedSchema.isSchemaPresent(newTargetSchema))) {
           String sourceStr = newSourceSchema == null ? NULL_PLACEHOLDER : newSourceSchema.toString(true);
           String targetStr = newTargetSchema == null ? NULL_PLACEHOLDER : newTargetSchema.toString(true);
-          LOG.info("Seeing new schema. Source: {0}, Target: {1}", sourceStr, targetStr);
+          LOG.info("Seeing new schema. Source: {}, Target: {}", sourceStr, targetStr);
           // We need to recreate write client with new schema and register them.
           reInitWriteClient(newSourceSchema, newTargetSchema, inputBatch.getBatch());
           if (newSourceSchema != null) {
@@ -513,6 +539,7 @@ public InputBatch readFromSource(String instantTime, HoodieTableMetaClient metaC
 
   private InputBatch fetchFromSourceAndPrepareRecords(Option<String> resumeCheckpointStr, String instantTime,
         HoodieTableMetaClient metaClient) {
+    hoodieSparkContext.setJobStatus(this.getClass().getSimpleName(), "Fetching next batch: " + cfg.targetTableName);
     HoodieRecordType recordType = createRecordMerger(props).getRecordType();
     if (recordType == HoodieRecordType.SPARK && HoodieTableType.valueOf(cfg.tableType) == HoodieTableType.MERGE_ON_READ
         && !cfg.operation.equals(WriteOperationType.BULK_INSERT)
@@ -535,7 +562,7 @@ private InputBatch fetchFromSourceAndPrepareRecords(Option<String> resumeCheckpo
     }
 
     // handle empty batch with change in checkpoint
-    hoodieSparkContext.setJobStatus(this.getClass().getSimpleName(), "Checking if input is empty");
+    hoodieSparkContext.setJobStatus(this.getClass().getSimpleName(), "Checking if input is empty: " + cfg.targetTableName);
 
 
     if (useRowWriter) { // no additional processing required for row writer.
@@ -552,7 +579,8 @@ private InputBatch fetchFromSourceAndPrepareRecords(Option<String> resumeCheckpo
    * @param resumeCheckpointStr checkpoint to resume from source.
    * @return {@link InputBatch} containing the new batch of data from source along with new checkpoint and schema provider instance to use.
    */
-  private InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr, HoodieTableMetaClient metaClient) {
+  @VisibleForTesting
+  InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr, HoodieTableMetaClient metaClient) {
     Option<JavaRDD<GenericRecord>> avroRDDOptional = null;
     String checkpointStr = null;
     SchemaProvider schemaProvider = null;
@@ -573,12 +601,12 @@ private InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr,
       checkpointStr = dataAndCheckpoint.getCheckpointForNextBatch();
       if (this.userProvidedSchemaProvider != null && this.userProvidedSchemaProvider.getTargetSchema() != null
           && this.userProvidedSchemaProvider.getTargetSchema() != InputBatch.NULL_SCHEMA) {
+        // Let's deduce the schema provider for writer side first!
+        schemaProvider = getDeducedSchemaProvider(this.userProvidedSchemaProvider.getTargetSchema(), this.userProvidedSchemaProvider, metaClient);
         if (useRowWriter) {
-          inputBatchForWriter = new InputBatch(transformed, checkpointStr, this.userProvidedSchemaProvider);
+          inputBatchForWriter = new InputBatch(transformed, checkpointStr, schemaProvider);
         } else {
           // non row writer path
-          // Let's deduce the schema provider for writer side first!
-          schemaProvider = getDeducedSchemaProvider(this.userProvidedSchemaProvider.getTargetSchema(), this.userProvidedSchemaProvider, metaClient);
           SchemaProvider finalSchemaProvider = schemaProvider;
           // If the target schema is specified through Avro schema,
           // pass in the schema for the Row-to-Avro conversion
@@ -606,11 +634,10 @@ private InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr,
       } else {
         // Deduce proper target (writer's) schema for the input dataset, reconciling its
         // schema w/ the table's one
-        Option<Schema> incomingSchemaOpt = transformed.map(df ->
-            AvroConversionUtils.convertStructTypeToAvroSchema(df.schema(), getAvroRecordQualifiedName(cfg.targetTableName)));
-
-        schemaProvider = incomingSchemaOpt.map(incomingSchema -> getDeducedSchemaProvider(incomingSchema, dataAndCheckpoint.getSchemaProvider(), metaClient))
-            .orElseGet(dataAndCheckpoint::getSchemaProvider);
+        Schema incomingSchema = transformed.map(df ->
+                AvroConversionUtils.convertStructTypeToAvroSchema(df.schema(), getAvroRecordQualifiedName(cfg.targetTableName)))
+            .orElseGet(dataAndCheckpoint.getSchemaProvider()::getTargetSchema);
+        schemaProvider = getDeducedSchemaProvider(incomingSchema, dataAndCheckpoint.getSchemaProvider(), metaClient);
 
         if (useRowWriter) {
           inputBatchForWriter = new InputBatch(transformed, checkpointStr, schemaProvider);
@@ -622,7 +649,9 @@ private InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr,
       }
     } else {
       if (useRowWriter) {
-        inputBatchForWriter = formatAdapter.fetchNewDataInRowFormat(resumeCheckpointStr, cfg.sourceLimit);
+        InputBatch inputBatchNeedsDeduceSchema = formatAdapter.fetchNewDataInRowFormat(resumeCheckpointStr, cfg.sourceLimit);
+        inputBatchForWriter = new InputBatch<>(inputBatchNeedsDeduceSchema.getBatch(), inputBatchNeedsDeduceSchema.getCheckpointForNextBatch(),
+            getDeducedSchemaProvider(inputBatchNeedsDeduceSchema.getSchemaProvider().getTargetSchema(), inputBatchNeedsDeduceSchema.getSchemaProvider(), metaClient));
       } else {
         // Pull the data from the source & prepare the write
         InputBatch<JavaRDD<GenericRecord>> dataAndCheckpoint = formatAdapter.fetchNewDataInAvroFormat(resumeCheckpointStr, cfg.sourceLimit);
@@ -661,17 +690,17 @@ private InputBatch fetchNextBatchFromSource(Option<String> resumeCheckpointStr,
    * @param sourceSchemaProvider Source schema provider.
    * @return the SchemaProvider that can be used as writer schema.
    */
-  private SchemaProvider getDeducedSchemaProvider(Schema incomingSchema, SchemaProvider sourceSchemaProvider, HoodieTableMetaClient metaClient) {
+  @VisibleForTesting
+  SchemaProvider getDeducedSchemaProvider(Schema incomingSchema, SchemaProvider sourceSchemaProvider, HoodieTableMetaClient metaClient) {
     Option<Schema> latestTableSchemaOpt = UtilHelpers.getLatestTableSchema(hoodieSparkContext.jsc(), fs, cfg.targetBasePath, metaClient);
     Option<InternalSchema> internalSchemaOpt = HoodieConversionUtils.toJavaOption(
         HoodieSchemaUtils.getLatestTableInternalSchema(
             new HoodieConfig(HoodieStreamer.Config.getProps(fs, cfg)), metaClient));
     // Deduce proper target (writer's) schema for the input dataset, reconciling its
     // schema w/ the table's one
-    Schema targetSchema = HoodieSparkSqlWriter.deduceWriterSchema(
-          HoodieAvroUtils.removeMetadataFields(incomingSchema),
-          HoodieConversionUtils.toScalaOption(latestTableSchemaOpt),
-          HoodieConversionUtils.toScalaOption(internalSchemaOpt), props);
+    Schema targetSchema = HoodieSchemaUtils.deduceWriterSchema(
+        HoodieAvroUtils.removeMetadataFields(incomingSchema),
+          latestTableSchemaOpt, internalSchemaOpt, props);
 
     // Override schema provider with the reconciled target schema
     return new DelegatingSchemaProvider(props, hoodieSparkContext.jsc(), sourceSchemaProvider,
@@ -865,10 +894,10 @@ private Pair<Option<String>, JavaRDD<WriteStatus>> writeToSinkAndDoMetaSync(Stri
       writeClient.rollback(instantTime);
       throw new HoodieStreamerWriteException("Commit " + instantTime + " failed and rolled-back !");
     }
-    long overallTimeMs = overallTimerContext != null ? overallTimerContext.stop() : 0;
+    long overallTimeNanos = overallTimerContext != null ? overallTimerContext.stop() : 0;
 
     // Send DeltaStreamer Metrics
-    metrics.updateStreamerMetrics(overallTimeMs);
+    metrics.updateStreamerMetrics(overallTimeNanos);
     return Pair.of(scheduledCompactionInstant, writeStatusRDD);
   }
 
@@ -988,13 +1017,14 @@ public void runMetaSync() {
           SyncUtilHelpers.runHoodieMetaSync(impl.trim(), metaProps, conf, fs, cfg.targetBasePath, cfg.baseFileFormat);
           success = true;
         } catch (HoodieMetaSyncException e) {
-          LOG.error("SyncTool class {0} failed with exception {1}",  impl.trim(), e);
+          LOG.error("SyncTool class {} failed with exception {}", impl.trim(), e);
           failedMetaSyncs.put(impl, e);
         }
-        long metaSyncTimeMs = syncContext != null ? syncContext.stop() : 0;
-        metrics.updateStreamerMetaSyncMetrics(getSyncClassShortName(impl), metaSyncTimeMs);
+        long metaSyncTimeNanos = syncContext != null ? syncContext.stop() : 0;
+        metrics.updateStreamerMetaSyncMetrics(getSyncClassShortName(impl), metaSyncTimeNanos);
         if (success) {
-          LOG.info("[MetaSync] SyncTool class {0} completed successfully and took {1} ", impl.trim(), metaSyncTimeMs);
+          long timeMs = metaSyncTimeNanos / 1000000L;
+          LOG.info("[MetaSync] SyncTool class {} completed successfully and took {} s {} ms ", impl.trim(), timeMs / 1000L, timeMs % 1000L);
         }
       }
       if (!failedMetaSyncs.isEmpty()) {
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
index e3724aee48a7..f637413b63d8 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
@@ -38,6 +38,8 @@
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.metadata.HoodieBackedTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 import org.apache.hudi.metadata.MetadataPartitionType;
@@ -76,6 +78,7 @@
 import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE_AND_EXECUTE;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class TestHoodieIndexer extends SparkClientFunctionalTestHarness implements SparkProvider {
@@ -289,7 +292,10 @@ public void testIndexerWithWriterFinishingLast() throws IOException {
     // start the indexer and validate files index is completely built out
     HoodieIndexer indexer = new HoodieIndexer(jsc(), config);
     // The catchup won't finish due to inflight delta commit, and this is expected
-    assertEquals(-1, indexer.start(0));
+    Throwable cause = assertThrows(RuntimeException.class, () ->  indexer.start(0))
+        .getCause();
+    assertTrue(cause instanceof HoodieMetadataException);
+    assertTrue(cause.getMessage().contains("Failed to index partition"));
 
     // Now, make sure that the inflight delta commit happened before the async indexer
     // is intact
@@ -365,7 +371,10 @@ public void testIndexerForExceptionWithNonFilesPartition() {
     config.propsFilePath = propsPath;
     // start the indexer and validate index building fails
     HoodieIndexer indexer = new HoodieIndexer(jsc(), config);
-    assertEquals(-1, indexer.start(0));
+    Throwable cause = assertThrows(RuntimeException.class, () ->  indexer.start(0))
+        .getCause();
+    assertTrue(cause instanceof HoodieException);
+    assertTrue(cause.getMessage().contains("Metadata table is not yet initialized"));
 
     // validate table config
     metaClient = reload(metaClient);
@@ -373,7 +382,7 @@ public void testIndexerForExceptionWithNonFilesPartition() {
     assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
     assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
     // validate metadata partitions actually exist
-    assertFalse(metadataPartitionExists(basePath(), context(), FILES));
+    assertFalse(metadataPartitionExists(basePath(), context(), FILES.getPartitionPath()));
 
     // trigger FILES partition and indexing should succeed.
     indexMetadataPartitionsAndAssert(FILES, Collections.emptyList(), Arrays.asList(new MetadataPartitionType[] {COLUMN_STATS, BLOOM_FILTERS}), tableName, "streamer-config/indexer.properties");
@@ -432,8 +441,8 @@ private void indexMetadataPartitionsAndAssert(MetadataPartitionType partitionTyp
     nonExistentPartitions.forEach(entry -> assertFalse(completedPartitions.contains(entry.getPartitionPath())));
 
     // validate metadata partitions actually exist
-    assertTrue(metadataPartitionExists(basePath(), context(), partitionTypeToIndex));
-    alreadyCompletedPartitions.forEach(entry -> assertTrue(metadataPartitionExists(basePath(), context(), entry)));
+    assertTrue(metadataPartitionExists(basePath(), context(), partitionTypeToIndex.getPartitionPath()));
+    alreadyCompletedPartitions.forEach(entry -> assertTrue(metadataPartitionExists(basePath(), context(), entry.getPartitionPath())));
   }
 
   @Test
@@ -455,9 +464,9 @@ public void testIndexerDropPartitionDeletesInstantFromTimeline() {
 
     // validate partitions built successfully
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), FILES));
+    assertTrue(metadataPartitionExists(basePath(), context(), FILES.getPartitionPath()));
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS));
+    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS.getPartitionPath()));
 
     // build indexer config which has only column_stats enabled (files is enabled by default)
     HoodieIndexer.Config config = new HoodieIndexer.Config();
@@ -481,13 +490,13 @@ public void testIndexerDropPartitionDeletesInstantFromTimeline() {
     assertEquals(0, indexer.start(0));
     indexInstantInTimeline = metaClient.reloadActiveTimeline().filterPendingIndexTimeline().lastInstant();
     assertFalse(indexInstantInTimeline.isPresent());
-    assertFalse(metadataPartitionExists(basePath(), context(), COLUMN_STATS));
+    assertFalse(metadataPartitionExists(basePath(), context(), COLUMN_STATS.getPartitionPath()));
 
     // check other partitions are intact
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), FILES));
+    assertTrue(metadataPartitionExists(basePath(), context(), FILES.getPartitionPath()));
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS));
+    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS.getPartitionPath()));
   }
 
   @Test
@@ -509,7 +518,7 @@ public void testTwoIndexersOneCreateOneDropPartition() {
 
     // validate files partition built successfully
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), FILES));
+    assertTrue(metadataPartitionExists(basePath(), context(), FILES.getPartitionPath()));
 
     // build indexer config which has only bloom_filters enabled
     HoodieIndexer.Config config = getHoodieIndexConfig(BLOOM_FILTERS.name(), SCHEDULE_AND_EXECUTE, "streamer-config/indexer-only-bloom.properties", tableName);
@@ -517,7 +526,7 @@ public void testTwoIndexersOneCreateOneDropPartition() {
     HoodieIndexer indexer = new HoodieIndexer(jsc(), config);
     assertEquals(0, indexer.start(0));
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS));
+    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS.getPartitionPath()));
 
     // completed index timeline for later validation
     Option<HoodieInstant> bloomIndexInstant = metaClient.reloadActiveTimeline().filterCompletedIndexTimeline().lastInstant();
@@ -540,9 +549,9 @@ public void testTwoIndexersOneCreateOneDropPartition() {
 
     // check other partitions are intact
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), FILES));
+    assertTrue(metadataPartitionExists(basePath(), context(), FILES.getPartitionPath()));
     assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
-    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS));
+    assertTrue(metadataPartitionExists(basePath(), context(), BLOOM_FILTERS.getPartitionPath()));
 
     // drop bloom filter partition. timeline files should not be deleted since the index building is complete.
     dropIndexAndAssert(BLOOM_FILTERS, "streamer-config/indexer-only-bloom.properties", bloomIndexInstant, tableName);
@@ -554,7 +563,7 @@ private void dropIndexAndAssert(MetadataPartitionType indexType, String resource
     assertEquals(0, indexer.start(0));
     Option<HoodieInstant> pendingFlights = metaClient.reloadActiveTimeline().filterPendingIndexTimeline().lastInstant();
     assertFalse(pendingFlights.isPresent());
-    assertFalse(metadataPartitionExists(basePath(), context(), indexType));
+    assertFalse(metadataPartitionExists(basePath(), context(), indexType.getPartitionPath()));
     if (completedIndexInstant.isPresent()) {
       assertEquals(completedIndexInstant, metaClient.reloadActiveTimeline().filterCompletedIndexTimeline().lastInstant());
     }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java
new file mode 100644
index 000000000000..e87f6257c54b
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.testutils.HoodieSparkClientTestBase;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.testutils.RawTripTestPayload.recordToString;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestHoodieMetadataTableValidator extends HoodieSparkClientTestBase {
+
+  @Test
+  public void testMetadataTableValidation() {
+
+    Map<String,String> writeOptions = new HashMap<>();
+    writeOptions.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
+    writeOptions.put("hoodie.table.name", "test_table");
+    writeOptions.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
+    writeOptions.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
+    writeOptions.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
+    writeOptions.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
+
+    Dataset<Row> inserts = makeInsertDf("000", 5).cache();
+    inserts.write().format("hudi").options(writeOptions)
+        .option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value())
+        .mode(SaveMode.Overwrite)
+        .save(basePath);
+    Dataset<Row> updates = makeUpdateDf("001", 5).cache();
+    updates.write().format("hudi").options(writeOptions)
+        .option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value())
+        .mode(SaveMode.Append)
+        .save(basePath);
+
+    // validate MDT
+    HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
+    config.basePath = basePath;
+    config.validateLatestFileSlices = true;
+    config.validateAllFileGroups = true;
+    HoodieMetadataTableValidator validator = new HoodieMetadataTableValidator(jsc, config);
+    assertTrue(validator.run());
+    assertFalse(validator.hasValidationFailure());
+    assertTrue(validator.getThrowables().isEmpty());
+  }
+
+  protected Dataset<Row> makeInsertDf(String instantTime, Integer n) {
+    List<String> records = dataGen.generateInserts(instantTime, n).stream()
+        .map(r -> recordToString(r).get()).collect(Collectors.toList());
+    JavaRDD<String> rdd = jsc.parallelize(records);
+    return sparkSession.read().json(rdd);
+  }
+
+  protected Dataset<Row> makeUpdateDf(String instantTime, Integer n) {
+    try {
+      List<String> records = dataGen.generateUpdates(instantTime, n).stream()
+          .map(r -> recordToString(r).get()).collect(Collectors.toList());
+      JavaRDD<String> rdd = jsc.parallelize(records);
+      return sparkSession.read().json(rdd);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/checkpointing/TestKafkaConnectHdfsProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/checkpointing/TestKafkaConnectHdfsProvider.java
index fb6f5d649cba..e90cfdb6856c 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/checkpointing/TestKafkaConnectHdfsProvider.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/checkpointing/TestKafkaConnectHdfsProvider.java
@@ -62,7 +62,7 @@ public void testValidKafkaConnectPath() throws Exception {
     new File(topicPath + "/year=2016/month=05/day=02/"
         + "random_snappy_2" + BASE_FILE_EXTENSION).createNewFile();
     final TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.checkpoint.provider.path", topicPath.toString());
+    props.put("hoodie.streamer.checkpoint.provider.path", topicPath.toString());
     final InitialCheckPointProvider provider = new KafkaConnectHdfsProvider(props);
     provider.init(HoodieTestUtils.getDefaultHadoopConf());
     assertEquals("topic1,0:300,1:200", provider.getCheckpoint());
@@ -83,7 +83,7 @@ public void testMissingPartition() throws Exception {
     new File(topicPath + "/year=2016/month=05/day=02/"
         + "topic1+0+201+300" + BASE_FILE_EXTENSION).createNewFile();
     final TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.checkpoint.provider.path", topicPath.toString());
+    props.put("hoodie.streamer.checkpoint.provider.path", topicPath.toString());
     final InitialCheckPointProvider provider = new KafkaConnectHdfsProvider(props);
     provider.init(HoodieTestUtils.getDefaultHadoopConf());
     assertThrows(HoodieException.class, provider::getCheckpoint);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/config/SourceTestConfig.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/config/SourceTestConfig.java
index 450d6e8dc3ae..760e7ed7ff41 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/config/SourceTestConfig.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/config/SourceTestConfig.java
@@ -21,29 +21,36 @@
 
 import org.apache.hudi.common.config.ConfigProperty;
 
+import static org.apache.hudi.common.util.ConfigUtils.DELTA_STREAMER_CONFIG_PREFIX;
+import static org.apache.hudi.common.util.ConfigUtils.STREAMER_CONFIG_PREFIX;
+
 /**
  * Configurations for Test Data Sources.
  */
 public class SourceTestConfig {
 
   public static final ConfigProperty<Integer> NUM_SOURCE_PARTITIONS_PROP = ConfigProperty
-      .key("hoodie.deltastreamer.source.test.num_partitions")
+      .key(STREAMER_CONFIG_PREFIX + "source.test.num_partitions")
       .defaultValue(10)
+      .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.test.num_partitions")
       .withDocumentation("Used by DistributedTestDataSource only. Number of partitions where each partitions generates test-data");
 
   public static final ConfigProperty<Integer> MAX_UNIQUE_RECORDS_PROP = ConfigProperty
-      .key("hoodie.deltastreamer.source.test.max_unique_records")
+      .key(STREAMER_CONFIG_PREFIX + "source.test.max_unique_records")
       .defaultValue(Integer.MAX_VALUE)
+      .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.test.max_unique_records")
       .withDocumentation("Maximum number of unique records generated for the run");
 
   public static final ConfigProperty<Boolean> USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS = ConfigProperty
-      .key("hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys")
+      .key(STREAMER_CONFIG_PREFIX + "source.test.datagen.use_rocksdb_for_storing_existing_keys")
       .defaultValue(false)
+      .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.test.datagen.use_rocksdb_for_storing_existing_keys")
       .withDocumentation("If true, uses Rocks DB for storing datagen keys");
 
   public static final ConfigProperty<String> ROCKSDB_BASE_DIR_FOR_TEST_DATAGEN_KEYS = ConfigProperty
-      .key("hoodie.deltastreamer.source.test.datagen.rocksdb_base_dir")
+      .key(STREAMER_CONFIG_PREFIX + "source.test.datagen.rocksdb_base_dir")
       .noDefaultValue()
+      .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.test.datagen.rocksdb_base_dir")
       .withDocumentation("Base Dir for storing datagen keys");
 
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
index 864c3502825b..2b2013d04cd0 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.utilities.deltastreamer;
 
+import org.apache.hudi.common.config.HoodieCommonConfig;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -26,11 +27,15 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieClusteringConfig;
+import org.apache.hudi.hive.HiveSyncConfigHolder;
 import org.apache.hudi.hive.MultiPartKeysValueExtractor;
+import org.apache.hudi.hive.testutils.HiveTestService;
+import org.apache.hudi.sync.common.HoodieSyncConfig;
 import org.apache.hudi.utilities.config.HoodieStreamerConfig;
 import org.apache.hudi.utilities.config.KafkaSourceConfig;
 import org.apache.hudi.utilities.config.SourceTestConfig;
@@ -38,6 +43,7 @@
 import org.apache.hudi.utilities.sources.HoodieIncrSource;
 import org.apache.hudi.utilities.sources.TestDataSource;
 import org.apache.hudi.utilities.sources.TestParquetDFSSourceEmptyBatch;
+import org.apache.hudi.utilities.streamer.HoodieStreamer;
 import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
 
 import org.apache.avro.Schema;
@@ -68,17 +74,6 @@
 import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 
-import static org.apache.hudi.common.config.HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS;
-import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
-import static org.apache.hudi.common.util.StringUtils.nonEmpty;
-import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
-import static org.apache.hudi.hive.testutils.HiveTestService.HS2_JDBC_URL;
-import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
-import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS;
-import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS;
-import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
-import static org.apache.hudi.utilities.config.KafkaSourceConfig.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS;
-import static org.apache.hudi.utilities.streamer.HoodieStreamer.CHECKPOINT_KEY;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -137,9 +132,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
 
   @BeforeEach
   protected void prepareTestSetup() throws IOException {
-    PARQUET_SOURCE_ROOT = basePath + "/parquetFiles";
-    ORC_SOURCE_ROOT = basePath + "/orcFiles";
-    JSON_KAFKA_SOURCE_ROOT = basePath + "/jsonKafkaFiles";
+    setupTest();
     testUtils = new KafkaTestUtils();
     testUtils.setup();
     topicName = "topic" + testNum;
@@ -148,6 +141,36 @@ protected void prepareTestSetup() throws IOException {
     prepareORCDFSFiles(ORC_NUM_RECORDS, ORC_SOURCE_ROOT);
   }
 
+  @AfterEach
+  public void cleanupKafkaTestUtils() {
+    if (testUtils != null) {
+      testUtils.teardown();
+      testUtils = null;
+    }
+    if (hudiOpts != null) {
+      hudiOpts = null;
+    }
+  }
+
+  @BeforeAll
+  public static void initClass() throws Exception {
+    UtilitiesTestBase.initTestServices(false, true, false);
+    // basePath is defined in UtilitiesTestBase.initTestServices
+    PARQUET_SOURCE_ROOT = basePath + "/parquetFiles";
+    ORC_SOURCE_ROOT = basePath + "/orcFiles";
+    JSON_KAFKA_SOURCE_ROOT = basePath + "/jsonKafkaFiles";
+  }
+
+  @AfterAll
+  public static void tearDown() {
+    UtilitiesTestBase.cleanUpUtilitiesTestServices();
+  }
+
+  public void setupTest() {
+    TestDataSource.returnEmptyBatch = false;
+    hudiOpts = new HashMap<>();
+  }
+
   protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath, String brokerAddress) throws IOException {
     // prepare the configs.
     UtilitiesTestBase.Helpers.copyToDFS("streamer-config/base.properties", dfs, dfsBasePath + "/base.properties");
@@ -181,8 +204,8 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
 
     // Source schema is the target schema of upstream table
-    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
-    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
+    downstreamProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
+    downstreamProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
     UtilitiesTestBase.Helpers.savePropsToDFS(downstreamProps, dfs, dfsBasePath + "/test-downstream-source.properties");
 
     // Properties used for testing invalid key generator
@@ -191,8 +214,8 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     invalidProps.setProperty("hoodie.datasource.write.keygenerator.class", "invalid");
     invalidProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
-    invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
+    invalidProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
+    invalidProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
     UtilitiesTestBase.Helpers.savePropsToDFS(invalidProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_INVALID);
 
     // Properties used for testing inferring key generator for complex key generator
@@ -200,8 +223,8 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     inferKeygenProps.setProperty("include", "base.properties");
     inferKeygenProps.setProperty("hoodie.datasource.write.recordkey.field", "timestamp,_row_key");
     inferKeygenProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    inferKeygenProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
-    inferKeygenProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
+    inferKeygenProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
+    inferKeygenProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
     UtilitiesTestBase.Helpers.savePropsToDFS(inferKeygenProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_INFER_COMPLEX_KEYGEN);
 
     // Properties used for testing inferring key generator for non-partitioned key generator
@@ -217,8 +240,8 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     UtilitiesTestBase.Helpers.savePropsToDFS(properties, dfs, dfsBasePath + "/" + PROPS_INVALID_TABLE_CONFIG_FILE);
 
     TypedProperties invalidHiveSyncProps = new TypedProperties();
-    invalidHiveSyncProps.setProperty("hoodie.deltastreamer.ingestion.tablesToBeIngested", "uber_db.dummy_table_uber");
-    invalidHiveSyncProps.setProperty("hoodie.deltastreamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/invalid_hive_sync_uber_config.properties");
+    invalidHiveSyncProps.setProperty("hoodie.streamer.ingestion.tablesToBeIngested", "uber_db.dummy_table_uber");
+    invalidHiveSyncProps.setProperty("hoodie.streamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/invalid_hive_sync_uber_config.properties");
     UtilitiesTestBase.Helpers.savePropsToDFS(invalidHiveSyncProps, dfs, dfsBasePath + "/" + PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1);
   }
 
@@ -228,47 +251,24 @@ protected static void writeCommonPropsToFile(FileSystem dfs, String dfsBasePath)
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
+    props.setProperty("hoodie.streamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
+    props.setProperty("hoodie.streamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
 
     // Hive Configs
-    props.setProperty(HIVE_URL.key(), HS2_JDBC_URL);
-    props.setProperty(META_SYNC_DATABASE_NAME.key(), "testdb1");
-    props.setProperty(META_SYNC_TABLE_NAME.key(), "hive_trips");
-    props.setProperty(META_SYNC_PARTITION_FIELDS.key(), "datestr");
-    props.setProperty(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
+    props.setProperty(HiveSyncConfigHolder.HIVE_URL.key(), HiveTestService.HS2_JDBC_URL);
+    props.setProperty(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "testdb1");
+    props.setProperty(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "hive_trips");
+    props.setProperty(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr");
+    props.setProperty(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
         MultiPartKeysValueExtractor.class.getName());
     UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE);
   }
 
-  @BeforeAll
-  public static void initClass() throws Exception {
-    UtilitiesTestBase.initTestServices(false, true, false);
-  }
-
-  @AfterAll
-  public static void tearDown() throws IOException {
-    UtilitiesTestBase.cleanUpUtilitiesTestServices();
-  }
-
-  @AfterEach
-  public void cleanupKafkaTestUtils() {
-    if (testUtils != null) {
-      testUtils.teardown();
-    }
-  }
-
-  @BeforeEach
-  public void setupTest() {
-    TestDataSource.returnEmptyBatch = false;
-    hudiOpts = new HashMap<>();
-  }
-
   protected static void populateInvalidTableConfigFilePathProps(TypedProperties props, String dfsBasePath) {
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
-    props.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyyMMdd");
-    props.setProperty("hoodie.deltastreamer.ingestion.tablesToBeIngested", "uber_db.dummy_table_uber");
-    props.setProperty("hoodie.deltastreamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/invalid_uber_config.properties");
+    props.setProperty("hoodie.keygen.timebased.output.dateformat", "yyyyMMdd");
+    props.setProperty("hoodie.streamer.ingestion.tablesToBeIngested", "uber_db.dummy_table_uber");
+    props.setProperty("hoodie.streamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/invalid_uber_config.properties");
   }
 
   protected static void populateAllCommonProps(TypedProperties props, String dfsBasePath, String brokerAddress) {
@@ -279,10 +279,10 @@ protected static void populateAllCommonProps(TypedProperties props, String dfsBa
 
   protected static void populateCommonProps(TypedProperties props, String dfsBasePath) {
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
-    props.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyyMMdd");
-    props.setProperty("hoodie.deltastreamer.ingestion.tablesToBeIngested", "short_trip_db.dummy_table_short_trip,uber_db.dummy_table_uber");
-    props.setProperty("hoodie.deltastreamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/uber_config.properties");
-    props.setProperty("hoodie.deltastreamer.ingestion.short_trip_db.dummy_table_short_trip.configFile", dfsBasePath + "/config/short_trip_uber_config.properties");
+    props.setProperty("hoodie.keygen.timebased.output.dateformat", "yyyyMMdd");
+    props.setProperty("hoodie.streamer.ingestion.tablesToBeIngested", "short_trip_db.dummy_table_short_trip,uber_db.dummy_table_uber");
+    props.setProperty("hoodie.streamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/uber_config.properties");
+    props.setProperty("hoodie.streamer.ingestion.short_trip_db.dummy_table_short_trip.configFile", dfsBasePath + "/config/short_trip_uber_config.properties");
   }
 
   protected static void populateCommonKafkaProps(TypedProperties props, String brokerAddress) {
@@ -291,15 +291,15 @@ protected static void populateCommonKafkaProps(TypedProperties props, String bro
     props.setProperty("auto.offset.reset", "earliest");
     props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
     props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents", String.valueOf(5000));
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents", String.valueOf(5000));
   }
 
   protected static void populateCommonHiveProps(TypedProperties props) {
     // Hive Configs
-    props.setProperty(HIVE_URL.key(), HS2_JDBC_URL);
-    props.setProperty(META_SYNC_DATABASE_NAME.key(), "testdb2");
-    props.setProperty(META_SYNC_PARTITION_FIELDS.key(), "datestr");
-    props.setProperty(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
+    props.setProperty(HiveSyncConfigHolder.HIVE_URL.key(), HiveTestService.HS2_JDBC_URL);
+    props.setProperty(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "testdb2");
+    props.setProperty(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr");
+    props.setProperty(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
         MultiPartKeysValueExtractor.class.getName());
   }
 
@@ -312,9 +312,14 @@ protected static void prepareParquetDFSFiles(int numRecords, String baseParquetP
   }
 
   protected static HoodieTestDataGenerator prepareParquetDFSFiles(int numRecords, String baseParquetPath, String fileName, boolean useCustomSchema,
-                                                                        String schemaStr, Schema schema) throws IOException {
+                                                                  String schemaStr, Schema schema) throws IOException {
+    return prepareParquetDFSFiles(numRecords, baseParquetPath, fileName, useCustomSchema, schemaStr, schema, false);
+  }
+
+  protected static HoodieTestDataGenerator prepareParquetDFSFiles(int numRecords, String baseParquetPath, String fileName, boolean useCustomSchema,
+                                                                        String schemaStr, Schema schema, boolean makeDatesAmbiguous) throws IOException {
     String path = baseParquetPath + "/" + fileName;
-    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
+    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(makeDatesAmbiguous);
     if (useCustomSchema) {
       Helpers.saveParquetToDFS(Helpers.toGenericRecords(
           dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr),
@@ -379,12 +384,12 @@ protected void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTra
     parquetProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     parquetProps.setProperty("hoodie.datasource.write.partitionpath.field", partitionPath);
     if (useSchemaProvider) {
-      parquetProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/" + sourceSchemaFile);
+      parquetProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/" + sourceSchemaFile);
       if (hasTransformer) {
-        parquetProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/" + targetSchemaFile);
+        parquetProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/" + targetSchemaFile);
       }
     }
-    parquetProps.setProperty("hoodie.deltastreamer.source.dfs.root", parquetSourceRoot);
+    parquetProps.setProperty("hoodie.streamer.source.dfs.root", parquetSourceRoot);
     if (!StringUtils.isNullOrEmpty(emptyBatchParam)) {
       parquetProps.setProperty(TestParquetDFSSourceEmptyBatch.RETURN_EMPTY_BATCH, emptyBatchParam);
     }
@@ -400,11 +405,11 @@ protected void prepareAvroKafkaDFSSource(String propsFileName,  Long maxEventsTo
     props.setProperty("hoodie.embed.timeline.server", "false");
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     props.setProperty("hoodie.datasource.write.partitionpath.field", partitionPath);
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName);
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents", String.valueOf(5000));
+    props.setProperty("hoodie.streamer.source.kafka.topic", topicName);
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents", String.valueOf(5000));
     props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    props.setProperty(KAFKA_AVRO_VALUE_DESERIALIZER_CLASS.key(),  ByteArrayDeserializer.class.getName());
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents",
+    props.setProperty(KafkaSourceConfig.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS.key(),  ByteArrayDeserializer.class.getName());
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents",
         maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) :
             String.valueOf(KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE.defaultValue()));
     props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
@@ -437,19 +442,19 @@ static List<String> getTableServicesConfigs(int totalRecords, String autoClean,
                                               String inlineClusterMaxCommit, String asyncCluster, String asyncClusterMaxCommit) {
     List<String> configs = new ArrayList<>();
     configs.add(String.format("%s=%d", SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.key(), totalRecords));
-    if (nonEmpty(autoClean)) {
+    if (StringUtils.nonEmpty(autoClean)) {
       configs.add(String.format("%s=%s", HoodieCleanConfig.AUTO_CLEAN.key(), autoClean));
     }
-    if (nonEmpty(inlineCluster)) {
+    if (StringUtils.nonEmpty(inlineCluster)) {
       configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING.key(), inlineCluster));
     }
-    if (nonEmpty(inlineClusterMaxCommit)) {
+    if (StringUtils.nonEmpty(inlineClusterMaxCommit)) {
       configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS.key(), inlineClusterMaxCommit));
     }
-    if (nonEmpty(asyncCluster)) {
+    if (StringUtils.nonEmpty(asyncCluster)) {
       configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), asyncCluster));
     }
-    if (nonEmpty(asyncClusterMaxCommit)) {
+    if (StringUtils.nonEmpty(asyncClusterMaxCommit)) {
       configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_MAX_COMMITS.key(), asyncClusterMaxCommit));
     }
     return configs;
@@ -477,7 +482,7 @@ static void addCommitToTimeline(HoodieTableMetaClient metaClient, WriteOperation
     metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime));
     metaClient.getActiveTimeline().saveAsComplete(
         new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime),
-        serializeCommitMetadata(commitMetadata));
+        TimelineMetadataUtils.serializeCommitMetadata(commitMetadata));
   }
 
   void assertRecordCount(long expected, String tablePath, SQLContext sqlContext) {
@@ -611,11 +616,11 @@ static HoodieDeltaStreamer.Config makeConfigForHudiIncrSrc(String srcBasePath, S
         cfg.schemaProviderClassName = schemaProviderClassName;
       }
       List<String> cfgs = new ArrayList<>();
-      cfgs.add(SET_NULL_FOR_MISSING_COLUMNS.key() + "=true");
-      cfgs.add("hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=" + addReadLatestOnMissingCkpt);
-      cfgs.add("hoodie.deltastreamer.source.hoodieincr.path=" + srcBasePath);
+      cfgs.add(HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS.key() + "=true");
+      cfgs.add("hoodie.streamer.source.hoodieincr.read_latest_on_missing_ckpt=" + addReadLatestOnMissingCkpt);
+      cfgs.add("hoodie.streamer.source.hoodieincr.path=" + srcBasePath);
       // No partition
-      cfgs.add("hoodie.deltastreamer.source.hoodieincr.partition.fields=datestr");
+      cfgs.add("hoodie.streamer.source.hoodieincr.partition.fields=datestr");
       cfg.configs = cfgs;
       return cfg;
     }
@@ -660,7 +665,7 @@ static String assertCommitMetadata(String expected, String tablePath, FileSystem
       HoodieCommitMetadata commitMetadata =
           HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(lastInstant).get(), HoodieCommitMetadata.class);
       assertEquals(totalCommits, timeline.countInstants());
-      assertEquals(expected, commitMetadata.getMetadata(CHECKPOINT_KEY));
+      assertEquals(expected, commitMetadata.getMetadata(HoodieStreamer.CHECKPOINT_KEY));
       return lastInstant.getTimestamp();
     }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
index 5294ae1b4c4a..34486a07ab8b 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
@@ -167,6 +167,10 @@
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_OFFSET_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_PARTITION_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_TIMESTAMP_COLUMN;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_DRIVER;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_PASS;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_URL;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_USER;
 import static org.apache.hudi.utilities.testutils.UtilitiesTestBase.Helpers.jsonifyRecordsByPartitions;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -372,7 +376,7 @@ public void testKafkaConnectCheckpointProvider() throws IOException {
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeDropAllConfig(tableBasePath, WriteOperationType.UPSERT);
     TypedProperties props =
         new DFSPropertiesConfiguration(fs.getConf(), new Path(basePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getProps();
-    props.put("hoodie.deltastreamer.checkpoint.provider.path", bootstrapPath);
+    props.put("hoodie.streamer.checkpoint.provider.path", bootstrapPath);
     cfg.initialCheckpointProvider = checkpointProviderClass;
     // create regular kafka connect hdfs dirs
     fs.mkdirs(new Path(bootstrapPath));
@@ -564,8 +568,8 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, Collections.singletonList(TestIdentityTransformer.class.getName()),
         PROPS_FILENAME_TEST_SOURCE, false, true, false, null, tableType);
     addRecordMerger(recordType, cfg.configs);
-    cfg.configs.add("hoodie.deltastreamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
-    cfg.configs.add("hoodie.deltastreamer.schemaprovider.target.schema.file=" + basePath + "/source.avsc");
+    cfg.configs.add("hoodie.streamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
+    cfg.configs.add("hoodie.streamer.schemaprovider.target.schema.file=" + basePath + "/source.avsc");
     cfg.configs.add(DataSourceWriteOptions.RECONCILE_SCHEMA().key() + "=true");
     if (!useSchemaPostProcessor) {
       cfg.configs.add(HoodieSchemaProviderConfig.SPARK_AVRO_POST_PROCESSOR_ENABLE.key() + "=false");
@@ -578,8 +582,8 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
     cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, Collections.singletonList(TripsWithEvolvedOptionalFieldTransformer.class.getName()),
         PROPS_FILENAME_TEST_SOURCE, false, true, false, null, tableType);
     addRecordMerger(recordType, cfg.configs);
-    cfg.configs.add("hoodie.deltastreamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
-    cfg.configs.add("hoodie.deltastreamer.schemaprovider.target.schema.file=" + basePath + "/source_evolved.avsc");
+    cfg.configs.add("hoodie.streamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
+    cfg.configs.add("hoodie.streamer.schemaprovider.target.schema.file=" + basePath + "/source_evolved.avsc");
     cfg.configs.add(DataSourceWriteOptions.RECONCILE_SCHEMA().key() + "=true");
     if (!useSchemaPostProcessor) {
       cfg.configs.add(HoodieSchemaProviderConfig.SPARK_AVRO_POST_PROCESSOR_ENABLE.key() + "=false");
@@ -603,9 +607,9 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
     cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, Collections.singletonList(TestIdentityTransformer.class.getName()),
         PROPS_FILENAME_TEST_SOURCE, false, true, false, null, tableType);
     addRecordMerger(recordType, cfg.configs);
-    cfg.configs.add("hoodie.deltastreamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
+    cfg.configs.add("hoodie.streamer.schemaprovider.source.schema.file=" + basePath + "/source.avsc");
     if (useUserProvidedSchema) {
-      cfg.configs.add("hoodie.deltastreamer.schemaprovider.target.schema.file=" + basePath + "/source_evolved.avsc");
+      cfg.configs.add("hoodie.streamer.schemaprovider.target.schema.file=" + basePath + "/source_evolved.avsc");
     }
     if (!useSchemaPostProcessor) {
       cfg.configs.add(HoodieSchemaProviderConfig.SPARK_AVRO_POST_PROCESSOR_ENABLE.key() + "=false");
@@ -644,12 +648,10 @@ public void testUpsertsCOWContinuousMode(HoodieRecordType recordType) throws Exc
     testUpsertsContinuousMode(HoodieTableType.COPY_ON_WRITE, "continuous_cow", recordType);
   }
 
-  @ParameterizedTest
-  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
-  public void testUpsertsCOW_ContinuousModeDisabled(HoodieRecordType recordType) throws Exception {
+  @Test
+  public void testUpsertsCOW_ContinuousModeDisabled() throws Exception {
     String tableBasePath = basePath + "/non_continuous_cow";
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT);
-    addRecordMerger(recordType, cfg.configs);
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     cfg.configs.add(String.format("%s=%s", TURN_METRICS_ON.key(), "true"));
     cfg.configs.add(String.format("%s=%s", METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.INMEMORY.name()));
@@ -675,12 +677,10 @@ public void testUpsertsMORContinuousMode(HoodieRecordType recordType) throws Exc
     testUpsertsContinuousMode(HoodieTableType.MERGE_ON_READ, "continuous_mor", recordType);
   }
 
-  @ParameterizedTest
-  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
-  public void testUpsertsMOR_ContinuousModeDisabled(HoodieRecordType recordType) throws Exception {
+  @Test
+  public void testUpsertsMOR_ContinuousModeDisabled() throws Exception {
     String tableBasePath = basePath + "/non_continuous_mor";
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT);
-    addRecordMerger(recordType, cfg.configs);
     cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
     cfg.configs.add(String.format("%s=%s", TURN_METRICS_ON.key(), "true"));
     cfg.configs.add(String.format("%s=%s", METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.INMEMORY.name()));
@@ -878,8 +878,8 @@ public void testDeltaSyncWithPendingCompaction() throws Exception {
   }
 
   @ParameterizedTest
-  @CsvSource(value = {"true, AVRO", "true, SPARK", "false, AVRO", "false, SPARK"})
-  public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean, HoodieRecordType recordType) throws Exception {
+  @ValueSource(booleans = {true, false})
+  public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws Exception {
     String tableBasePath = basePath + "/cleanerDeleteReplacedDataWithArchive" + asyncClean;
 
     int totalRecords = 3000;
@@ -887,7 +887,7 @@ public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean, HoodieR
     // Step 1 : Prepare and insert data without archival and cleaner.
     // Make sure that there are 6 commits including 2 replacecommits completed.
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
-    addRecordMerger(recordType, cfg.configs);
+    addRecordMerger(HoodieRecordType.AVRO, cfg.configs);
     cfg.continuousMode = true;
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     cfg.configs.addAll(getTableServicesConfigs(totalRecords, "false", "true", "2", "", ""));
@@ -956,7 +956,7 @@ public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean, HoodieR
       configs.add(String.format("%s=%s", HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
           InProcessLockProvider.class.getName()));
     }
-    addRecordMerger(recordType, configs);
+    addRecordMerger(HoodieRecordType.AVRO, configs);
     cfg.configs = configs;
     cfg.continuousMode = false;
     // timeline as of now. no cleaner and archival kicked in.
@@ -1137,6 +1137,7 @@ private void testAsyncClusteringService(HoodieRecordType recordType) throws Exce
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     cfg.configs.addAll(getTableServicesConfigs(totalRecords, "false", "", "", "true", "3"));
     cfg.configs.add(String.format("%s=%s", "hoodie.datasource.write.row.writer.enable", "false"));
+    cfg.configs.add(String.format("%s=%s", "hoodie.merge.allow.duplicate.on.inserts", "false"));
     HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
     deltaStreamerTestRunner(ds, cfg, (r) -> {
       TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, fs);
@@ -1187,19 +1188,19 @@ private void testAsyncClusteringServiceWithConflicts(HoodieRecordType recordType
   }
 
   @Timeout(600)
-  @ParameterizedTest
-  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
-  public void testAsyncClusteringServiceWithCompaction(HoodieRecordType recordType) throws Exception {
+  @Test
+  public void testAsyncClusteringServiceWithCompaction() throws Exception {
     String tableBasePath = basePath + "/asyncClusteringCompaction";
     // Keep it higher than batch-size to test continuous mode
     int totalRecords = 2000;
 
     // Initial bulk insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
-    addRecordMerger(recordType, cfg.configs);
+    addRecordMerger(HoodieRecordType.AVRO, cfg.configs);
     cfg.continuousMode = true;
     cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
     cfg.configs.addAll(getTableServicesConfigs(totalRecords, "false", "", "", "true", "3"));
+    cfg.configs.add(String.format("%s=%s", "hoodie.merge.allow.duplicate.on.inserts", "false"));
     HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
     deltaStreamerTestRunner(ds, cfg, (r) -> {
       TestHelpers.assertAtleastNCompactionCommits(2, tableBasePath, fs);
@@ -1214,14 +1215,14 @@ public void testAsyncClusteringServiceWithCompaction(HoodieRecordType recordType
   }
 
   @ParameterizedTest
-  @CsvSource(value = {"true, AVRO", "true, SPARK", "false, AVRO", "false, SPARK"})
-  public void testAsyncClusteringJobWithRetry(boolean retryLastFailedClusteringJob, HoodieRecordType recordType) throws Exception {
+  @ValueSource(booleans = {true, false})
+  public void testAsyncClusteringJobWithRetry(boolean retryLastFailedClusteringJob) throws Exception {
     String tableBasePath = basePath + "/asyncClustering3";
 
     // ingest data
     int totalRecords = 3000;
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
-    addRecordMerger(recordType, cfg.configs);
+    addRecordMerger(HoodieRecordType.AVRO, cfg.configs);
     cfg.continuousMode = false;
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     cfg.configs.addAll(getTableServicesConfigs(totalRecords, "false", "false", "0", "false", "0"));
@@ -1249,7 +1250,7 @@ public void testAsyncClusteringJobWithRetry(boolean retryLastFailedClusteringJob
     // trigger a scheduleAndExecute clustering job
     // when retryFailedClustering true => will rollback and re-execute failed clustering plan with same instant timestamp.
     // when retryFailedClustering false => will make and execute a new clustering plan with new instant timestamp.
-    HoodieClusteringJob scheduleAndExecute = initialHoodieClusteringJob(tableBasePath, null, false, "scheduleAndExecute", retryLastFailedClusteringJob, recordType);
+    HoodieClusteringJob scheduleAndExecute = initialHoodieClusteringJob(tableBasePath, null, false, "scheduleAndExecute", retryLastFailedClusteringJob, HoodieRecordType.AVRO);
     scheduleAndExecute.cluster(0);
 
     String completeClusteringTimeStamp = meta.getActiveTimeline().reload().getCompletedReplaceTimeline().lastInstant().get().getTimestamp();
@@ -1263,11 +1264,11 @@ public void testAsyncClusteringJobWithRetry(boolean retryLastFailedClusteringJob
   }
 
   @ParameterizedTest
-  @CsvSource(value = {"execute, AVRO", "schedule, AVRO", "scheduleAndExecute, AVRO", "execute, SPARK", "schedule, SPARK", "scheduleAndExecute, SPARK"})
-  public void testHoodieAsyncClusteringJobWithScheduleAndExecute(String runningMode, HoodieRecordType recordType) throws Exception {
+  @ValueSource(strings = {"execute", "schedule", "scheduleAndExecute"})
+  public void testHoodieAsyncClusteringJobWithScheduleAndExecute(String runningMode) throws Exception {
     String tableBasePath = basePath + "/asyncClustering2";
-    HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "false", recordType, WriteOperationType.BULK_INSERT);
-    HoodieClusteringJob scheduleClusteringJob = initialHoodieClusteringJob(tableBasePath, null, true, runningMode, recordType);
+    HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "false", HoodieRecordType.AVRO, WriteOperationType.BULK_INSERT);
+    HoodieClusteringJob scheduleClusteringJob = initialHoodieClusteringJob(tableBasePath, null, true, runningMode, HoodieRecordType.AVRO);
 
     deltaStreamerTestRunner(ds, (r) -> {
       Exception exception = null;
@@ -1406,20 +1407,34 @@ private void testBulkInsertRowWriterMultiBatches(Boolean useSchemaProvider, List
   @Test
   public void testBulkInsertRowWriterContinuousModeWithAsyncClustering() throws Exception {
     testBulkInsertRowWriterContinuousMode(false, null, false,
-        getTableServicesConfigs(2000, "false", "", "", "true", "3"));
+        getTableServicesConfigs(2000, "false", "", "", "true", "3"), false);
   }
 
   @Test
   public void testBulkInsertRowWriterContinuousModeWithInlineClustering() throws Exception {
     testBulkInsertRowWriterContinuousMode(false, null, false,
-        getTableServicesConfigs(2000, "false", "true", "3", "false", ""));
+        getTableServicesConfigs(2000, "false", "true", "3", "false", ""), false);
   }
 
-  private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, List<String> transformerClassNames, boolean testEmptyBatch, List<String> customConfigs) throws Exception {
+  @Test
+  public void testBulkInsertRowWriterContinuousModeWithInlineClusteringAmbiguousDates() throws Exception {
+    sparkSession.sqlContext().setConf("spark.sql.parquet.datetimeRebaseModeInWrite", "LEGACY");
+    sparkSession.sqlContext().setConf("spark.sql.avro.datetimeRebaseModeInWrite", "LEGACY");
+    sparkSession.sqlContext().setConf("spark.sql.parquet.int96RebaseModeInWrite", "LEGACY");
+    sparkSession.sqlContext().setConf("spark.sql.parquet.datetimeRebaseModeInRead", "LEGACY");
+    sparkSession.sqlContext().setConf("spark.sql.avro.datetimeRebaseModeInRead", "LEGACY");
+    sparkSession.sqlContext().setConf("spark.sql.parquet.int96RebaseModeInRead", "LEGACY");
+    testBulkInsertRowWriterContinuousMode(false, null, false,
+        getTableServicesConfigs(2000, "false", "true", "3",
+            "false", ""), true);
+  }
+
+  private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, List<String> transformerClassNames,
+                                                     boolean testEmptyBatch, List<String> customConfigs, boolean makeDatesAmbiguous) throws Exception {
     PARQUET_SOURCE_ROOT = basePath + "/parquetFilesDfs" + testNum;
     int parquetRecordsCount = 100;
     boolean hasTransformer = transformerClassNames != null && !transformerClassNames.isEmpty();
-    prepareParquetDFSFiles(parquetRecordsCount, PARQUET_SOURCE_ROOT, FIRST_PARQUET_FILE_NAME, false, null, null);
+    prepareParquetDFSFiles(parquetRecordsCount, PARQUET_SOURCE_ROOT, FIRST_PARQUET_FILE_NAME, false, null, null, makeDatesAmbiguous);
     prepareParquetDFSSource(useSchemaProvider, hasTransformer, "source.avsc", "target.avsc", PROPS_FILENAME_TEST_PARQUET,
         PARQUET_SOURCE_ROOT, false, "partition_path", testEmptyBatch ? "1" : "");
 
@@ -1429,7 +1444,7 @@ private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, Li
         int counter = 2;
         while (counter < 100) { // lets keep going. if the test times out, we will cancel the future within finally. So, safe to generate 100 batches.
           LOG.info("Generating data for batch " + counter);
-          prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, Integer.toString(counter) + ".parquet", false, null, null);
+          prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, Integer.toString(counter) + ".parquet", false, null, null, makeDatesAmbiguous);
           counter++;
           Thread.sleep(2000);
         }
@@ -1473,9 +1488,9 @@ private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, Li
    * step involves using a SQL template to transform a source TEST-DATA-SOURCE ============================> HUDI TABLE
    * 1 ===============> HUDI TABLE 2 (incr-pull with transform) (incr-pull) Hudi Table 1 is synced with Hive.
    */
-  @ParameterizedTest
-  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
-  public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline(HoodieRecordType recordType) throws Exception {
+  @Test
+  public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() throws Exception {
+    HoodieRecordType recordType = HoodieRecordType.AVRO;
     String tableBasePath = basePath + "/" + recordType.toString() + "/test_table2";
     String downstreamTableBasePath = basePath + "/" + recordType.toString() + "/test_downstream_table2";
 
@@ -1635,14 +1650,12 @@ public void testPayloadClassUpdateWithCOWTable() throws Exception {
     assertFalse(props.containsKey(HoodieTableConfig.PAYLOAD_CLASS_NAME.key()));
   }
 
-  @ParameterizedTest
-  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
-  public void testFilterDupes(HoodieRecordType recordType) throws Exception {
+  @Test
+  public void testFilterDupes() throws Exception {
     String tableBasePath = basePath + "/test_dupes_table";
 
     // Initial bulk insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
-    addRecordMerger(recordType, cfg.configs);
     new HoodieDeltaStreamer(cfg, jsc).sync();
     assertRecordCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, fs, 1);
@@ -1663,7 +1676,7 @@ public void testFilterDupes(HoodieRecordType recordType) throws Exception {
     HoodieTableMetaClient mClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(tableBasePath).setLoadActiveTimelineOnLoad(true).build();
     HoodieInstant lastFinished = mClient.getCommitsTimeline().filterCompletedInstants().lastInstant().get();
     HoodieDeltaStreamer.Config cfg2 = TestHelpers.makeDropAllConfig(tableBasePath, WriteOperationType.UPSERT);
-    addRecordMerger(recordType, cfg2.configs);
+    addRecordMerger(HoodieRecordType.AVRO, cfg2.configs);
     cfg2.filterDupes = false;
     cfg2.sourceLimit = 2000;
     cfg2.operation = WriteOperationType.UPSERT;
@@ -1809,12 +1822,12 @@ private void testORCDFSSource(boolean useSchemaProvider, List<String> transforme
     orcProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     if (useSchemaProvider) {
-      orcProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/" + "source.avsc");
+      orcProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/" + "source.avsc");
       if (transformerClassNames != null) {
-        orcProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/" + "target.avsc");
+        orcProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/" + "target.avsc");
       }
     }
-    orcProps.setProperty("hoodie.deltastreamer.source.dfs.root", ORC_SOURCE_ROOT);
+    orcProps.setProperty("hoodie.streamer.source.dfs.root", ORC_SOURCE_ROOT);
     UtilitiesTestBase.Helpers.savePropsToDFS(orcProps, fs, basePath + "/" + PROPS_FILENAME_TEST_ORC);
 
     String tableBasePath = basePath + "/test_orc_source_table" + testNum;
@@ -1839,11 +1852,11 @@ private void prepareJsonKafkaDFSSource(String propsFileName, String autoResetVal
     props.setProperty("hoodie.embed.timeline.server", "false");
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     props.setProperty("hoodie.datasource.write.partitionpath.field", "driver");
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", JSON_KAFKA_SOURCE_ROOT);
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName);
-    props.setProperty("hoodie.deltastreamer.source.kafka.checkpoint.type", kafkaCheckpointType);
-    props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source_uber.avsc");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target_uber.avsc");
+    props.setProperty("hoodie.streamer.source.dfs.root", JSON_KAFKA_SOURCE_ROOT);
+    props.setProperty("hoodie.streamer.source.kafka.topic", topicName);
+    props.setProperty("hoodie.streamer.source.kafka.checkpoint.type", kafkaCheckpointType);
+    props.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/source_uber.avsc");
+    props.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/target_uber.avsc");
     props.setProperty("auto.offset.reset", autoResetValue);
     if (extraProps != null && !extraProps.isEmpty()) {
       extraProps.forEach(props::setProperty);
@@ -2121,8 +2134,8 @@ public void testEmptyBatchWithNullSchemaFirstBatch() throws Exception {
 
     String tableBasePath = basePath + "/test_parquet_table" + testNum;
     HoodieDeltaStreamer.Config config = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, ParquetDFSSource.class.getName(),
-        null, PROPS_FILENAME_TEST_PARQUET, false,
-        false, 100000, false, null, null, "timestamp", null);
+        Collections.singletonList(TestIdentityTransformer.class.getName()), PROPS_FILENAME_TEST_PARQUET, false,
+        false, 100000, false, null, "MERGE_ON_READ", "timestamp", null);
 
     config.schemaProviderClassName = NullValueSchemaProvider.class.getName();
     config.sourceClassName = TestParquetDFSSourceEmptyBatch.class.getName();
@@ -2242,22 +2255,22 @@ private void prepareCsvDFSSource(
     csvProps.setProperty("hoodie.datasource.write.recordkey.field", recordKeyField);
     csvProps.setProperty("hoodie.datasource.write.partitionpath.field", partitionPath);
     if (useSchemaProvider) {
-      csvProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source-flattened.avsc");
+      csvProps.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/source-flattened.avsc");
       if (hasTransformer) {
-        csvProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target-flattened.avsc");
+        csvProps.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/target-flattened.avsc");
       }
     }
-    csvProps.setProperty("hoodie.deltastreamer.source.dfs.root", sourceRoot);
+    csvProps.setProperty("hoodie.streamer.source.dfs.root", sourceRoot);
 
     if (sep != ',') {
       if (sep == '\t') {
-        csvProps.setProperty("hoodie.deltastreamer.csv.sep", "\\t");
+        csvProps.setProperty("hoodie.streamer.csv.sep", "\\t");
       } else {
-        csvProps.setProperty("hoodie.deltastreamer.csv.sep", Character.toString(sep));
+        csvProps.setProperty("hoodie.streamer.csv.sep", Character.toString(sep));
       }
     }
     if (hasHeader) {
-      csvProps.setProperty("hoodie.deltastreamer.csv.header", Boolean.toString(hasHeader));
+      csvProps.setProperty("hoodie.streamer.csv.header", Boolean.toString(hasHeader));
     }
 
     UtilitiesTestBase.Helpers.savePropsToDFS(csvProps, fs, basePath + "/" + PROPS_FILENAME_TEST_CSV);
@@ -2378,7 +2391,7 @@ private void prepareSqlSource() throws IOException {
     sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
     sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query", "select * from test_sql_table");
+    sqlSourceProps.setProperty("hoodie.streamer.source.sql.sql.query", "select * from test_sql_table");
 
     UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, fs, basePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
 
@@ -2406,21 +2419,19 @@ public void testSqlSourceSource() throws Exception {
     assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath, sqlContext);
   }
 
-  @Disabled
   @Test
   public void testJdbcSourceIncrementalFetchInContinuousMode() {
-    try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
+    try (Connection connection = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS)) {
       TypedProperties props = new TypedProperties();
-      props.setProperty("hoodie.deltastreamer.jdbc.url", "jdbc:h2:mem:test_mem");
-      props.setProperty("hoodie.deltastreamer.jdbc.driver.class", "org.h2.Driver");
-      props.setProperty("hoodie.deltastreamer.jdbc.user", "test");
-      props.setProperty("hoodie.deltastreamer.jdbc.password", "jdbc");
-      props.setProperty("hoodie.deltastreamer.jdbc.table.name", "triprec");
-      props.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-      props.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "id");
+      props.setProperty("hoodie.streamer.jdbc.url", JDBC_URL);
+      props.setProperty("hoodie.streamer.jdbc.driver.class", JDBC_DRIVER);
+      props.setProperty("hoodie.streamer.jdbc.user", JDBC_USER);
+      props.setProperty("hoodie.streamer.jdbc.password", JDBC_PASS);
+      props.setProperty("hoodie.streamer.jdbc.table.name", "triprec");
+      props.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+      props.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "id");
 
       props.setProperty("hoodie.datasource.write.recordkey.field", "ID");
-      props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
 
       UtilitiesTestBase.Helpers.savePropsToDFS(props, fs, basePath + "/test-jdbc-source.properties");
 
@@ -2454,7 +2465,7 @@ public void testHoodieIncrFallback() throws Exception {
     HoodieDeltaStreamer.Config downstreamCfg =
         TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath,
             WriteOperationType.BULK_INSERT, true, null);
-    downstreamCfg.configs.add("hoodie.deltastreamer.source.hoodieincr.num_instants=1");
+    downstreamCfg.configs.add("hoodie.streamer.source.hoodieincr.num_instants=1");
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
 
     insertInTable(tableBasePath, 9, WriteOperationType.UPSERT);
@@ -2470,7 +2481,7 @@ public void testHoodieIncrFallback() throws Exception {
     downstreamCfg.configs.remove(downstreamCfg.configs.size() - 1);
     downstreamCfg.configs.add(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN().key() + "=true");
     //Adding this conf to make testing easier :)
-    downstreamCfg.configs.add("hoodie.deltastreamer.source.hoodieincr.num_instants=10");
+    downstreamCfg.configs.add("hoodie.streamer.source.hoodieincr.num_instants=10");
     downstreamCfg.operation = WriteOperationType.UPSERT;
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
@@ -2825,9 +2836,9 @@ public void testAutoGenerateRecordKeys() throws Exception {
   }
 
   @ParameterizedTest
-  @CsvSource(value = {"COPY_ON_WRITE, AVRO", "MERGE_ON_READ, AVRO",
-      "COPY_ON_WRITE, SPARK", "MERGE_ON_READ, SPARK"})
-  public void testConfigurationHotUpdate(HoodieTableType tableType, HoodieRecordType recordType) throws Exception {
+  @EnumSource(HoodieTableType.class)
+  public void testConfigurationHotUpdate(HoodieTableType tableType) throws Exception {
+    HoodieRecordType recordType = HoodieRecordType.AVRO;
     String tableBasePath = basePath + String.format("/configurationHotUpdate_%s_%s", tableType.name(), recordType.name());
 
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java
index 096ddf14cc76..5bbeb006029f 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java
@@ -24,7 +24,7 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.SchemaCompatibilityException;
+import org.apache.hudi.exception.MissingSchemaFieldException;
 import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.streamer.HoodieStreamer;
 
@@ -126,6 +126,7 @@ protected static Stream<Arguments> testParamsWithSchemaTransformer() {
       b.add(Arguments.of("COPY_ON_WRITE", true, true, true, true, true));
       b.add(Arguments.of("COPY_ON_WRITE", true, false, false, false, true));
       b.add(Arguments.of("MERGE_ON_READ", true, true, true, false, false));
+      b.add(Arguments.of("MERGE_ON_READ", true, true, false, false, false));
       b.add(Arguments.of("MERGE_ON_READ", true, false, true, true, false));
     }
     return b.build();
@@ -221,8 +222,7 @@ public void testBase(String tableType,
       addData(df, false);
       deltaStreamer.sync();
       assertTrue(allowNullForDeletedCols);
-    } catch (SchemaCompatibilityException e) {
-      assertTrue(e.getMessage().contains("Incoming batch schema is not compatible with the table's one"));
+    } catch (MissingSchemaFieldException e) {
       assertFalse(allowNullForDeletedCols);
       return;
     }
@@ -405,10 +405,8 @@ public void testDroppedColumn(String tableType,
       assertTrue(latestTableSchemaOpt.get().getField("rider").schema().getTypes()
           .stream().anyMatch(t -> t.getType().equals(Schema.Type.STRING)));
       assertTrue(metaClient.reloadActiveTimeline().lastInstant().get().compareTo(lastInstant) > 0);
-    } catch (SchemaCompatibilityException e) {
+    } catch (MissingSchemaFieldException e) {
       assertFalse(allowNullForDeletedCols || targetSchemaSameAsTableSchema);
-      assertTrue(e.getMessage().contains("Incoming batch schema is not compatible with the table's one"));
-      assertFalse(allowNullForDeletedCols);
     }
   }
 
@@ -488,7 +486,7 @@ public void testNonNullableColumnDrop(String tableType,
       assertTrue(metaClient.reloadActiveTimeline().lastInstant().get().compareTo(lastInstant) > 0);
     } catch (Exception e) {
       assertTrue(containsErrorMessage(e, "java.lang.NullPointerException",
-          "Incoming batch schema is not compatible with the table's one"));
+          "Schema validation failed due to missing field."));
     }
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerWithMultiWriter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerWithMultiWriter.java
index 2745edef5846..635b57c9fa67 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerWithMultiWriter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerWithMultiWriter.java
@@ -320,8 +320,8 @@ private static TypedProperties prepareMultiWriterProps(FileSystem fs, String bas
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source.avsc");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target.avsc");
+    props.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/source.avsc");
+    props.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/target.avsc");
 
     props.setProperty("include", "base.properties");
     props.setProperty("hoodie.write.concurrency.mode", "optimistic_concurrency_control");
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieMultiTableDeltaStreamer.java
index 26ea61e31fe6..0c5de8634368 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieMultiTableDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieMultiTableDeltaStreamer.java
@@ -178,16 +178,16 @@ public void testMultiTableExecutionWithKafkaSource() throws IOException {
     HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
     List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
     TypedProperties properties = executionContexts.get(1).getProperties();
-    properties.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source_uber.avsc");
-    properties.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target_uber.avsc");
+    properties.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/source_uber.avsc");
+    properties.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/target_uber.avsc");
     properties.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
-    properties.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName2);
+    properties.setProperty("hoodie.streamer.source.kafka.topic", topicName2);
     executionContexts.get(1).setProperties(properties);
     TypedProperties properties1 = executionContexts.get(0).getProperties();
-    properties1.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source_short_trip_uber.avsc");
-    properties1.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target_short_trip_uber.avsc");
+    properties1.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/source_short_trip_uber.avsc");
+    properties1.setProperty("hoodie.streamer.schemaprovider.target.schema.file", basePath + "/target_short_trip_uber.avsc");
     properties1.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
-    properties1.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName1);
+    properties1.setProperty("hoodie.streamer.source.kafka.topic", topicName1);
     executionContexts.get(0).setProperties(properties1);
     String targetBasePath1 = executionContexts.get(0).getConfig().targetBasePath;
     String targetBasePath2 = executionContexts.get(1).getConfig().targetBasePath;
@@ -288,7 +288,7 @@ private TypedProperties getParquetProps(String parquetSourceRoot) {
     props.setProperty("include", "base.properties");
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
     props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", parquetSourceRoot);
+    props.setProperty("hoodie.streamer.source.dfs.root", parquetSourceRoot);
     return props;
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java
index 1d6f2f110b2b..788105c20284 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java
@@ -49,6 +49,7 @@
 
 import java.util.stream.Stream;
 
+import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -64,9 +65,7 @@ public class TestSourceFormatAdapter {
   public static void start() {
     spark = SparkSession
         .builder()
-        .master("local[*]")
-        .appName(TestSourceFormatAdapter.class.getName())
-        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+        .config(getSparkConfForTest(TestSourceFormatAdapter.class.getName()))
         .getOrCreate();
     jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
   }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHiveSchemaProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHiveSchemaProvider.java
index e2ae67aae23c..75e812acf374 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHiveSchemaProvider.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHiveSchemaProvider.java
@@ -55,8 +55,8 @@ public class TestHiveSchemaProvider extends SparkClientFunctionalTestHarnessWith
   @BeforeAll
   public static void init() {
     Pair<String, String> dbAndTableName = paresDBAndTableName(SOURCE_SCHEMA_TABLE_NAME);
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.hive.database", dbAndTableName.getLeft());
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.hive.table", dbAndTableName.getRight());
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.hive.database", dbAndTableName.getLeft());
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.hive.table", dbAndTableName.getRight());
   }
 
   @Disabled
@@ -84,8 +84,8 @@ public void testSourceSchema() throws Exception {
   public void testTargetSchema() throws Exception {
     try {
       Pair<String, String> dbAndTableName = paresDBAndTableName(TARGET_SCHEMA_TABLE_NAME);
-      PROPS.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.hive.database", dbAndTableName.getLeft());
-      PROPS.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.hive.table", dbAndTableName.getRight());
+      PROPS.setProperty("hoodie.streamer.schemaprovider.target.schema.hive.database", dbAndTableName.getLeft());
+      PROPS.setProperty("hoodie.streamer.schemaprovider.target.schema.hive.table", dbAndTableName.getRight());
       createSchemaTable(SOURCE_SCHEMA_TABLE_NAME);
       createSchemaTable(TARGET_SCHEMA_TABLE_NAME);
       Schema targetSchema = UtilHelpers.createSchemaProvider(HiveSchemaProvider.class.getName(), PROPS, jsc()).getTargetSchema();
@@ -105,7 +105,7 @@ public void testTargetSchema() throws Exception {
   @Test
   public void testNotExistTable() {
     String wrongName = "wrong_schema_tab";
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.hive.table", wrongName);
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.hive.table", wrongName);
     Assertions.assertThrows(NoSuchTableException.class, () -> {
       try {
         UtilHelpers.createSchemaProvider(HiveSchemaProvider.class.getName(), PROPS, jsc()).getSourceSchema();
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
index eb1ac22d69ad..9facedb81413 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
@@ -45,6 +46,7 @@
 @Tag("functional")
 public class TestHoodieSnapshotCopier extends FunctionalTestHarness {
 
+  private static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
   private static final String TEST_WRITE_TOKEN = "1-0-1";
 
   private String basePath;
@@ -100,27 +102,27 @@ public void testSnapshotCopy() throws Exception {
     HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
         basePath);
     // Make commit1
-    File file11 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id11"));
+    File file11 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id11", BASE_FILE_EXTENSION));
     file11.createNewFile();
-    File file12 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id12"));
+    File file12 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id12", BASE_FILE_EXTENSION));
     file12.createNewFile();
-    File file13 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id13"));
+    File file13 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN, "id13", BASE_FILE_EXTENSION));
     file13.createNewFile();
 
     // Make commit2
-    File file21 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id21"));
+    File file21 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id21", BASE_FILE_EXTENSION));
     file21.createNewFile();
-    File file22 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id22"));
+    File file22 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id22", BASE_FILE_EXTENSION));
     file22.createNewFile();
-    File file23 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id23"));
+    File file23 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime2, TEST_WRITE_TOKEN, "id23", BASE_FILE_EXTENSION));
     file23.createNewFile();
 
     // Make commit3
-    File file31 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id31"));
+    File file31 = new File(basePath + "/2016/05/01/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id31", BASE_FILE_EXTENSION));
     file31.createNewFile();
-    File file32 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id32"));
+    File file32 = new File(basePath + "/2016/05/02/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id32", BASE_FILE_EXTENSION));
     file32.createNewFile();
-    File file33 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id33"));
+    File file33 = new File(basePath + "/2016/05/06/" + FSUtils.makeBaseFileName(commitTime3, TEST_WRITE_TOKEN, "id33", BASE_FILE_EXTENSION));
     file33.createNewFile();
 
     // Do a snapshot copy
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestJdbcbasedSchemaProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestJdbcbasedSchemaProvider.java
index 46400dda48da..82588429db5c 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestJdbcbasedSchemaProvider.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestJdbcbasedSchemaProvider.java
@@ -37,6 +37,10 @@
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_DRIVER;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_PASS;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_URL;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_USER;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 @Tag("functional")
@@ -47,13 +51,13 @@ public class TestJdbcbasedSchemaProvider extends SparkClientFunctionalTestHarnes
 
   @BeforeAll
   public static void init() {
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.connection.url", "jdbc:h2:mem:test_mem");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.driver.type", "org.h2.Driver");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.username", "sa");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.password", "");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.dbtable", "triprec");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.timeout", "0");
-    PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.nullable", "false");
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.connection.url", JDBC_URL);
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.driver.type", JDBC_DRIVER);
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.username", JDBC_USER);
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.password", JDBC_PASS);
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.dbtable", "triprec");
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.timeout", "0");
+    PROPS.setProperty("hoodie.streamer.schemaprovider.source.schema.jdbc.nullable", "false");
   }
 
   @Test
@@ -73,7 +77,7 @@ public void testJdbcbasedSchemaProvider() throws Exception {
    * @throws SQLException
    */
   private void initH2Database() throws SQLException {
-    try (Connection conn = DriverManager.getConnection("jdbc:h2:mem:test_mem", "sa", "")) {
+    try (Connection conn = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS)) {
       PreparedStatement ps = conn.prepareStatement(UtilitiesTestBase.Helpers.readFile("streamer-config/triprec.sql"));
       ps.executeUpdate();
     }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestKafkaOffsetPostProcessor.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestKafkaOffsetPostProcessor.java
new file mode 100644
index 000000000000..aac441609ca3
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestKafkaOffsetPostProcessor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.schema;
+
+import org.apache.avro.Schema;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class TestKafkaOffsetPostProcessor {
+  private static final List<String>
+      EXPECTED_FIELD_NAMES = Arrays.asList("existing_field", "_hoodie_kafka_source_offset", "_hoodie_kafka_source_partition", "_hoodie_kafka_source_timestamp", "_hoodie_kafka_source_key");
+
+  @ParameterizedTest
+  @MethodSource("cases")
+  void testProcessSchema(Schema inputSchema) {
+    KafkaOffsetPostProcessor kafkaOffsetPostProcessor = new KafkaOffsetPostProcessor(null, null);
+    Schema actual = kafkaOffsetPostProcessor.processSchema(inputSchema);
+    List<String> actualFieldNames = actual.getFields().stream().map(Schema.Field::name).collect(Collectors.toList());
+    assertEquals(EXPECTED_FIELD_NAMES, actualFieldNames);
+  }
+
+  private static Stream<Arguments> cases() {
+    String offsetField = "{\"name\": \"_hoodie_kafka_source_offset\", \"type\": \"long\", \"doc\": \"offset column\", \"default\": 0}";
+    String partitionField = "{\"name\": \"_hoodie_kafka_source_partition\", \"type\": \"int\", \"doc\": \"partition column\", \"default\": 0}";
+    String timestampField = "{\"name\": \"_hoodie_kafka_source_timestamp\", \"type\": \"long\", \"doc\": \"timestamp column\", \"default\": 0}";
+    String keyField = "{\"name\": \"_hoodie_kafka_source_key\", \"type\": [\"null\", \"string\"], \"doc\": \"kafka key column\", \"default\": null}";
+    return Stream.of(
+        Arguments.of(new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"example\", \"fields\": [{\"name\": \"existing_field\", \"type\": \"string\"}]}")),
+        Arguments.of(new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"example\", \"fields\": [{\"name\": \"existing_field\", \"type\": \"string\"}, "
+                + offsetField + "]}")),
+        Arguments.of(new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"example\", \"fields\": [{\"name\": \"existing_field\", \"type\": \"string\"}, "
+                + offsetField + ", " + partitionField + "]}")),
+        Arguments.of(
+            new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"example\", \"fields\": [{\"name\": \"existing_field\", \"type\": \"string\"}, "
+                + offsetField + ", " + partitionField + ", " + timestampField + "]}")),
+        Arguments.of(
+            new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"example\", \"fields\": [{\"name\": \"existing_field\", \"type\": \"string\"}, "
+                + offsetField + ", " + partitionField + ", " + timestampField + ", " + keyField + "]}"))
+    );
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java
index 397e72a0ec4a..88f67723c858 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java
@@ -64,10 +64,10 @@ private static Schema getExpectedConvertedSchema() {
   private static TypedProperties getProps() {
     return new TypedProperties() {
       {
-        put("hoodie.deltastreamer.schemaprovider.registry.baseUrl", "http://" + BASIC_AUTH + "@localhost");
-        put("hoodie.deltastreamer.schemaprovider.registry.urlSuffix", "-value");
-        put("hoodie.deltastreamer.schemaprovider.registry.url", "http://foo:bar@localhost");
-        put("hoodie.deltastreamer.source.kafka.topic", "foo");
+        put("hoodie.streamer.schemaprovider.registry.baseUrl", "http://" + BASIC_AUTH + "@localhost");
+        put("hoodie.streamer.schemaprovider.registry.urlSuffix", "-value");
+        put("hoodie.streamer.schemaprovider.registry.url", "http://foo:bar@localhost");
+        put("hoodie.streamer.source.kafka.topic", "foo");
       }
     };
   }
@@ -102,8 +102,8 @@ public void testGetTargetSchemaShouldRequestSchemaWithCreds() throws IOException
   @Test
   public void testGetSourceSchemaShouldRequestSchemaWithoutCreds() throws IOException {
     TypedProperties props = getProps();
-    props.put("hoodie.deltastreamer.schemaprovider.registry.url", "http://localhost");
-    props.put("hoodie.deltastreamer.schemaprovider.registry.schemaconverter", DummySchemaConverter.class.getName());
+    props.put("hoodie.streamer.schemaprovider.registry.url", "http://localhost");
+    props.put("hoodie.streamer.schemaprovider.registry.schemaconverter", DummySchemaConverter.class.getName());
     SchemaRegistryProvider spyUnderTest = getUnderTest(props);
     Schema actual = spyUnderTest.getSourceSchema();
     assertNotNull(actual);
@@ -114,8 +114,8 @@ public void testGetSourceSchemaShouldRequestSchemaWithoutCreds() throws IOExcept
   @Test
   public void testGetTargetSchemaShouldRequestSchemaWithoutCreds() throws IOException {
     TypedProperties props = getProps();
-    props.put("hoodie.deltastreamer.schemaprovider.registry.url", "http://localhost");
-    props.put("hoodie.deltastreamer.schemaprovider.registry.schemaconverter", DummySchemaConverter.class.getName());
+    props.put("hoodie.streamer.schemaprovider.registry.url", "http://localhost");
+    props.put("hoodie.streamer.schemaprovider.registry.schemaconverter", DummySchemaConverter.class.getName());
     SchemaRegistryProvider spyUnderTest = getUnderTest(props);
     Schema actual = spyUnderTest.getTargetSchema();
     assertNotNull(actual);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/BaseTestKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/BaseTestKafkaSource.java
index b5cbf2738f65..c5fc7bfaafae 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/BaseTestKafkaSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/BaseTestKafkaSource.java
@@ -28,6 +28,8 @@
 import org.apache.hudi.utilities.ingestion.HoodieIngestionMetrics;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.streamer.SourceFormatAdapter;
+import org.apache.hudi.utilities.streamer.SourceProfile;
+import org.apache.hudi.utilities.streamer.SourceProfileSupplier;
 
 import org.apache.avro.generic.GenericRecord;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
@@ -52,6 +54,7 @@
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 /**
  * Generic tests for all {@link KafkaSource} to ensure all implementations properly handle offsets, fetch limits, failure modes, etc.
@@ -60,6 +63,7 @@ abstract class BaseTestKafkaSource extends SparkClientFunctionalTestHarness {
   protected static final String TEST_TOPIC_PREFIX = "hoodie_test_";
 
   protected final HoodieIngestionMetrics metrics = mock(HoodieIngestionMetrics.class);
+  protected final Option<SourceProfileSupplier> sourceProfile = Option.of(mock(SourceProfileSupplier.class));
 
   protected SchemaProvider schemaProvider;
   protected KafkaTestUtils testUtils;
@@ -165,7 +169,7 @@ public void testProtoKafkaSourceInsertRecordsLessSourceLimit() {
     testUtils.createTopic(topic, 2);
     TypedProperties props = createPropsForKafkaSource(topic, Long.MAX_VALUE, "earliest");
     SourceFormatAdapter kafkaSource = createSource(props);
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents", "500");
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents", "500");
 
     /*
      1. maxEventsFromKafkaSourceProp set to more than generated insert records
@@ -277,4 +281,51 @@ public void testFailOnDataLoss() throws Exception {
             + " either the data was aged out by Kafka or the topic may have been deleted before all the data in the topic was processed.",
         t.getMessage());
   }
+
+  @Test
+  public void testKafkaSourceWithOffsetsFromSourceProfile() {
+    // topic setup.
+    final String topic = TEST_TOPIC_PREFIX + "testKafkaSourceWithOffsetRanges";
+    testUtils.createTopic(topic, 2);
+    TypedProperties props = createPropsForKafkaSource(topic, null, "earliest");
+
+    when(sourceProfile.get().getSourceProfile()).thenReturn(new TestSourceProfile(Long.MAX_VALUE, 4, 500));
+    SourceFormatAdapter kafkaSource = createSource(props);
+
+    // Test for empty data.
+    assertEquals(Option.empty(), kafkaSource.fetchNewDataInAvroFormat(Option.empty(), Long.MAX_VALUE).getBatch());
+
+    // Publish messages and assert source has picked up all messages in offsetRanges supplied by input batch profile.
+    sendMessagesToKafka(topic, 1000, 2);
+    InputBatch<JavaRDD<GenericRecord>> fetch1 = kafkaSource.fetchNewDataInAvroFormat(Option.empty(), 900);
+    assertEquals(500, fetch1.getBatch().get().count());
+  }
+
+  static class TestSourceProfile implements SourceProfile<Long> {
+
+    private final long maxSourceBytes;
+    private final int sourcePartitions;
+    private final long numEvents;
+
+    public TestSourceProfile(long maxSourceBytes, int sourcePartitions, long numEvents) {
+      this.maxSourceBytes = maxSourceBytes;
+      this.sourcePartitions = sourcePartitions;
+      this.numEvents = numEvents;
+    }
+
+    @Override
+    public long getMaxSourceBytes() {
+      return maxSourceBytes;
+    }
+
+    @Override
+    public int getSourcePartitions() {
+      return sourcePartitions;
+    }
+
+    @Override
+    public Long getSourceSpecificContext() {
+      return numEvents;
+    }
+  }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroDFSSource.java
index 1cda910b707b..808a4ca57cea 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroDFSSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroDFSSource.java
@@ -39,9 +39,8 @@ public void setup() throws Exception {
   }
 
   @Override
-  protected Source prepareDFSSource() {
-    TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
+  protected Source prepareDFSSource(TypedProperties props) {
+    props.setProperty("hoodie.streamer.source.dfs.root", dfsRoot);
     try {
       return new AvroDFSSource(props, jsc, sparkSession, schemaProvider);
     } catch (IOException e) {
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroKafkaSource.java
index 558181f42586..497757ab3787 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroKafkaSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestAvroKafkaSource.java
@@ -97,11 +97,11 @@ public void tearDown() {
 
   protected TypedProperties createPropsForKafkaSource(String topic, Long maxEventsToReadFromKafkaSource, String resetStrategy) {
     TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", topic);
+    props.setProperty("hoodie.streamer.source.kafka.topic", topic);
     props.setProperty("bootstrap.servers", testUtils.brokerAddress());
     props.setProperty("auto.offset.reset", resetStrategy);
     props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents",
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents",
         maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) :
             String.valueOf(KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE.defaultValue()));
     props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
@@ -160,8 +160,8 @@ public void testAppendKafkaOffsets() throws IOException {
         "test", dataGen.generateGenericRecord());
     JavaRDD<ConsumerRecord<Object, Object>> rdd = jsc().parallelize(Arrays.asList(recordConsumerRecord));
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.source.kafka.topic", "test");
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", SCHEMA_PATH);
+    props.put("hoodie.streamer.source.kafka.topic", "test");
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", SCHEMA_PATH);
     SchemaProvider schemaProvider = UtilHelpers.wrapSchemaProviderWithPostProcessor(
         UtilHelpers.createSchemaProvider(FilebasedSchemaProvider.class.getName(), props, jsc()), props, jsc(), new ArrayList<>());
 
@@ -191,11 +191,11 @@ public void testAppendKafkaOffsetsSourceFormatAdapter() throws IOException {
     final String topic = TEST_TOPIC_PREFIX + "testKafkaOffsetAppend";
     TypedProperties props = createPropsForKafkaSource(topic, null, "earliest");
 
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", SCHEMA_PATH);
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", SCHEMA_PATH);
     SchemaProvider schemaProvider = UtilHelpers.wrapSchemaProviderWithPostProcessor(
         UtilHelpers.createSchemaProvider(FilebasedSchemaProvider.class.getName(), props, jsc()), props, jsc(), new ArrayList<>());
 
-    props.put("hoodie.deltastreamer.source.kafka.value.deserializer.class", ByteArrayDeserializer.class.getName());
+    props.put("hoodie.streamer.source.kafka.value.deserializer.class", ByteArrayDeserializer.class.getName());
     int numPartitions = 2;
     int numMessages = 30;
     testUtils.createTopic(topic,numPartitions);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestCsvDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestCsvDFSSource.java
index 8eaa1d95b239..c4bb59ff812f 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestCsvDFSSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestCsvDFSSource.java
@@ -46,11 +46,10 @@ public void setup() throws Exception {
   }
 
   @Override
-  public Source prepareDFSSource() {
-    TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
-    props.setProperty("hoodie.deltastreamer.csv.header", Boolean.toString(true));
-    props.setProperty("hoodie.deltastreamer.csv.sep", "\t");
+  public Source prepareDFSSource(TypedProperties props) {
+    props.setProperty("hoodie.streamer.source.dfs.root", dfsRoot);
+    props.setProperty("hoodie.streamer.csv.header", Boolean.toString(true));
+    props.setProperty("hoodie.streamer.csv.sep", "\t");
     return new CsvDFSSource(props, jsc, sparkSession, schemaProvider);
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource.java
index bc2906d251fc..3b018473dc4b 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource.java
@@ -36,6 +36,7 @@
 import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
+import org.apache.hudi.utilities.config.CloudSourceConfig;
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.CloudDataFetcher;
@@ -59,6 +60,7 @@
 import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
@@ -85,6 +87,7 @@ public class TestGcsEventsHoodieIncrSource extends SparkClientFunctionalTestHarn
 
   private static final Schema GCS_METADATA_SCHEMA = SchemaTestUtil.getSchemaFromResource(
       TestGcsEventsHoodieIncrSource.class, "/streamer-config/gcs-metadata.avsc", true);
+  private static final String IGNORE_FILE_EXTENSION = ".ignore";
 
   private ObjectMapper mapper = new ObjectMapper();
 
@@ -111,8 +114,8 @@ public void setUp() throws IOException {
     jsc = JavaSparkContext.fromSparkContext(spark().sparkContext());
     String schemaFilePath = TestGcsEventsHoodieIncrSource.class.getClassLoader().getResource("schema/sample_gcs_data.avsc").getPath();
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
-    props.put("hoodie.deltastreamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
     this.schemaProvider = Option.of(new FilebasedSchemaProvider(props, jsc));
     MockitoAnnotations.initMocks(this);
   }
@@ -195,28 +198,44 @@ public void largeBootstrapWithFilters() throws IOException {
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file10006.json"), 250L, "1#path/to/file10007.json");
   }
 
-  @Test
-  public void testTwoFilesAndContinueAcrossCommits() throws IOException {
+  @ParameterizedTest
+  @ValueSource(strings = {
+      ".json",
+      ".gz"
+  })
+  public void testTwoFilesAndContinueAcrossCommits(String extension) throws IOException {
     String commitTimeForWrites = "2";
     String commitTimeForReads = "1";
 
     Pair<String, List<HoodieRecord>> inserts = writeGcsMetadataRecords(commitTimeForWrites);
+
+    TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
+    // In the case the extension is explicitly set to something other than the file format.
+    if (!extension.endsWith("json")) {
+      typedProperties.setProperty(CloudSourceConfig.CLOUD_DATAFILE_EXTENSION.key(), extension);
+    }
+
     List<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<>();
-    // Add file paths and sizes to the list
-    filePathSizeAndCommitTime.add(Triple.of("path/to/file1.json", 100L, "1"));
-    filePathSizeAndCommitTime.add(Triple.of("path/to/file3.json", 200L, "1"));
-    filePathSizeAndCommitTime.add(Triple.of("path/to/file2.json", 150L, "1"));
-    filePathSizeAndCommitTime.add(Triple.of("path/to/file4.json", 50L, "2"));
-    filePathSizeAndCommitTime.add(Triple.of("path/to/file5.json", 150L, "2"));
+    // Add file paths and sizes to the list.
+    // Check with a couple of invalid file extensions to ensure they are filtered out.
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file1%s", extension), 100L, "1"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file2%s", IGNORE_FILE_EXTENSION), 800L, "1"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file3%s", extension), 200L, "1"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file2%s", extension), 150L, "1"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file4%s", extension), 50L, "2"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file4%s", IGNORE_FILE_EXTENSION), 200L, "2"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file5%s", extension), 150L, "2"));
 
     Dataset<Row> inputDs = generateDataset(filePathSizeAndCommitTime);
 
     setMockQueryRunner(inputDs);
 
-    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of(commitTimeForReads), 100L, "1#path/to/file1.json");
-    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file1.json"), 100L, "1#path/to/file2.json");
-    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file2.json"), 1000L, "2#path/to/file5.json");
-    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of(commitTimeForReads), 100L, "1#path/to/file1.json");
+    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1"), 100L,
+                  "1#path/to/file1" + extension, typedProperties);
+    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file1" + extension), 100L,
+                  "1#path/to/file2" + extension, typedProperties);
+    readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file2" + extension), 1000L,
+                  "2#path/to/file5" + extension, typedProperties);
   }
 
   @ParameterizedTest
@@ -244,14 +263,14 @@ public void testSplitSnapshotLoad(String snapshotCheckPoint, String exptected1,
 
     setMockQueryRunner(inputDs, Option.of(snapshotCheckPoint));
     TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
-    typedProperties.setProperty("hoodie.deltastreamer.source.cloud.data.ignore.relpath.prefix", "path/to/skip");
+    typedProperties.setProperty("hoodie.streamer.source.cloud.data.ignore.relpath.prefix", "path/to/skip");
     //1. snapshot query, read all records
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50000L, exptected1, typedProperties);
     //2. incremental query, as commit is present in timeline
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of(exptected1), 10L, exptected2, typedProperties);
     //3. snapshot query with source limit less than first commit size
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50L, exptected3, typedProperties);
-    typedProperties.setProperty("hoodie.deltastreamer.source.cloud.data.ignore.relpath.prefix", "path/to");
+    typedProperties.setProperty("hoodie.streamer.source.cloud.data.ignore.relpath.prefix", "path/to");
     //4. As snapshotQuery will return 1 -> same would be return as nextCheckpoint (dataset is empty due to ignore prefix).
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50L, exptected4, typedProperties);
   }
@@ -283,7 +302,7 @@ private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingChe
                              TypedProperties typedProperties) {
 
     GcsEventsHoodieIncrSource incrSource = new GcsEventsHoodieIncrSource(typedProperties, jsc(),
-        spark(), schemaProvider.orElse(null), new GcsObjectMetadataFetcher(typedProperties, "json"), gcsObjectDataFetcher, queryRunner);
+        spark(), schemaProvider.orElse(null), new GcsObjectMetadataFetcher(typedProperties), gcsObjectDataFetcher, queryRunner);
 
     Pair<Option<Dataset<Row>>, String> dataAndCheckpoint = incrSource.fetchNextBatch(checkpointToPull, sourceLimit);
 
@@ -297,7 +316,7 @@ private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingChe
   private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy,
                              Option<String> checkpointToPull, long sourceLimit, String expectedCheckpoint) {
     TypedProperties typedProperties = setProps(missingCheckpointStrategy);
-    typedProperties.put("hoodie.deltastreamer.source.hoodieincr.file.format", "json");
+    typedProperties.put("hoodie.streamer.source.hoodieincr.file.format", "json");
     readAndAssert(missingCheckpointStrategy, checkpointToPull, sourceLimit, expectedCheckpoint, typedProperties);
   }
 
@@ -369,12 +388,12 @@ private Pair<String, List<HoodieRecord>> writeGcsMetadataRecords(String commitTi
   private TypedProperties setProps(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy) {
     Properties properties = new Properties();
     //String schemaFilePath = TestGcsEventsHoodieIncrSource.class.getClassLoader().getResource("schema/sample_gcs_data.avsc").getPath();
-    //properties.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
-    properties.put("hoodie.deltastreamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath());
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy",
+    //properties.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
+    properties.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
+    properties.setProperty("hoodie.streamer.source.hoodieincr.path", basePath());
+    properties.setProperty("hoodie.streamer.source.hoodieincr.missing.checkpoint.strategy",
         missingCheckpointStrategy.name());
-    properties.setProperty("hoodie.deltastreamer.source.gcsincr.datafile.format", "json");
+    properties.setProperty(CloudSourceConfig.DATAFILE_FORMAT.key(), "json");
     return new TypedProperties(properties);
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
index b9e20fb3a192..3d9f3362a15e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
@@ -337,8 +337,8 @@ private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingChe
                              String expectedCheckpoint, Option<String> snapshotCheckPointImplClassOpt) {
 
     Properties properties = new Properties();
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath());
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
+    properties.setProperty("hoodie.streamer.source.hoodieincr.path", basePath());
+    properties.setProperty("hoodie.streamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
     // TODO: [HUDI-7081] get rid of this
     properties.setProperty(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(), "false");
     snapshotCheckPointImplClassOpt.map(className ->
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJdbcSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJdbcSource.java
index 4c8b264fe168..ade781e6c8bd 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJdbcSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJdbcSource.java
@@ -46,6 +46,10 @@
 import java.sql.SQLException;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_DRIVER;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_PASS;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_URL;
+import static org.apache.hudi.utilities.testutils.JdbcTestUtils.JDBC_USER;
 import static org.apache.hudi.utilities.testutils.JdbcTestUtils.clearAndInsert;
 import static org.apache.hudi.utilities.testutils.JdbcTestUtils.close;
 import static org.apache.hudi.utilities.testutils.JdbcTestUtils.count;
@@ -73,12 +77,12 @@ public static void beforeAll() throws Exception {
   @BeforeEach
   public void setup() throws Exception {
     super.setup();
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.url", "jdbc:h2:mem:test_mem");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.driver.class", "org.h2.Driver");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.user", "test");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.password", "jdbc");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.name", "triprec");
-    connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc");
+    PROPS.setProperty("hoodie.streamer.jdbc.url", JDBC_URL);
+    PROPS.setProperty("hoodie.streamer.jdbc.driver.class", JDBC_DRIVER);
+    PROPS.setProperty("hoodie.streamer.jdbc.user", JDBC_USER);
+    PROPS.setProperty("hoodie.streamer.jdbc.password", JDBC_PASS);
+    PROPS.setProperty("hoodie.streamer.jdbc.table.name", "triprec");
+    connection = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS);
   }
 
   @AfterEach
@@ -89,8 +93,8 @@ public void teardown() throws Exception {
 
   @Test
   public void testSingleCommit() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       int numRecords = 100;
@@ -112,8 +116,8 @@ public void testSingleCommit() {
 
   @Test
   public void testInsertAndUpdate() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       final String commitTime = "000";
@@ -146,8 +150,8 @@ public void testInsertAndUpdate() {
 
   @Test
   public void testTwoCommits() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       // Add 10 records with commit time "000"
@@ -174,8 +178,8 @@ public void testTwoCommits() {
 
   @Test
   public void testIncrementalFetchWithCommitTime() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       // Add 10 records with commit time "000"
@@ -200,8 +204,8 @@ public void testIncrementalFetchWithCommitTime() {
 
   @Test
   public void testIncrementalFetchWithNoMatchingRows() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       // Add 10 records with commit time "000"
@@ -222,8 +226,8 @@ public void testIncrementalFetchWithNoMatchingRows() {
 
   @Test
   public void testIncrementalFetchWhenTableRecordsMoreThanSourceLimit() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "id");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "id");
 
     try {
       // Add 100 records with commit time "000"
@@ -253,8 +257,8 @@ public void testIncrementalFetchWhenTableRecordsMoreThanSourceLimit() {
 
   @Test
   public void testIncrementalFetchWhenLastCheckpointMoreThanTableRecords() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "id");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "id");
 
     try {
       // Add 100 records with commit time "000"
@@ -280,8 +284,8 @@ public void testIncrementalFetchWhenLastCheckpointMoreThanTableRecords() {
 
   @Test
   public void testIncrementalFetchFallbackToFullFetchWhenError() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "true");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       // Add 10 records with commit time "000"
@@ -295,14 +299,14 @@ public void testIncrementalFetchFallbackToFullFetchWhenError() {
       // Add 10 records with commit time "001"
       insert("001", 10, connection, DATA_GENERATOR, PROPS);
 
-      PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "dummy_col");
+      PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "dummy_col");
       assertThrows(HoodieException.class, () -> {
         // Start incremental scan with a dummy column that does not exist.
         // This will throw an exception as the default behavior is to not fallback to full fetch.
         runSource(Option.of(batch.getCheckpointForNextBatch()), -1);
       });
 
-      PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.fallback.to.full.fetch", "true");
+      PROPS.setProperty("hoodie.streamer.jdbc.incr.fallback.to.full.fetch", "true");
 
       // Start incremental scan with a dummy column that does not exist.
       // This will fallback to full fetch mode but still throw an exception checkpointing will fail.
@@ -317,7 +321,7 @@ public void testIncrementalFetchFallbackToFullFetchWhenError() {
 
   @Test
   public void testFullFetchWithCommitTime() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
 
     try {
       // Add 10 records with commit time "000"
@@ -341,8 +345,8 @@ public void testFullFetchWithCommitTime() {
 
   @Test
   public void testFullFetchWithCheckpoint() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "last_insert");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
+    PROPS.setProperty("hoodie.streamer.jdbc.table.incr.column.name", "last_insert");
 
     try {
       // Add 10 records with commit time "000"
@@ -356,7 +360,7 @@ public void testFullFetchWithCheckpoint() {
 
       // Get max of incremental column
       Column incrementalColumn = rowDataset
-          .col(PROPS.getString("hoodie.deltastreamer.jdbc.table.incr.column.name"));
+          .col(PROPS.getString("hoodie.streamer.jdbc.table.incr.column.name"));
       final String max = rowDataset.agg(functions.max(incrementalColumn).cast(DataTypes.StringType)).first()
           .getString(0);
 
@@ -378,10 +382,10 @@ public void testSourceWithPasswordOnFs() {
       // Write secret string to fs in a file
       writeSecretToFs();
       // Remove secret string from props
-      PROPS.remove("hoodie.deltastreamer.jdbc.password");
+      PROPS.remove("hoodie.streamer.jdbc.password");
       // Set property to read secret from fs file
-      PROPS.setProperty("hoodie.deltastreamer.jdbc.password.file", "file:///tmp/hudi/config/secret");
-      PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
+      PROPS.setProperty("hoodie.streamer.jdbc.password.file", "file:///tmp/hudi/config/secret");
+      PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
       // Add 10 records with commit time 000
       clearAndInsert("000", 10, connection, DATA_GENERATOR, PROPS);
       Dataset<Row> rowDataset = runSource(Option.empty(), 10).getBatch().get();
@@ -397,8 +401,8 @@ public void testSourceWithNoPasswordThrowsException() {
       // Write secret string to fs in a file
       writeSecretToFs();
       // Remove secret string from props
-      PROPS.remove("hoodie.deltastreamer.jdbc.password");
-      PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
+      PROPS.remove("hoodie.streamer.jdbc.password");
+      PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
       // Add 10 records with commit time 000
       clearAndInsert("000", 10, connection, DATA_GENERATOR, PROPS);
       runSource(Option.empty(), 10);
@@ -407,9 +411,9 @@ public void testSourceWithNoPasswordThrowsException() {
 
   @Test
   public void testSourceWithExtraOptions() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.extra.options.fetchsize", "10");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
-    PROPS.remove("hoodie.deltastreamer.jdbc.table.incr.column.name");
+    PROPS.setProperty("hoodie.streamer.jdbc.extra.options.fetchsize", "10");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
+    PROPS.remove("hoodie.streamer.jdbc.table.incr.column.name");
     try {
       // Add 20 records with commit time 000
       clearAndInsert("000", 20, connection, DATA_GENERATOR, PROPS);
@@ -422,8 +426,8 @@ public void testSourceWithExtraOptions() {
 
   @Test
   public void testSourceWithStorageLevel() {
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.storage.level", "NONE");
-    PROPS.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "false");
+    PROPS.setProperty("hoodie.streamer.jdbc.storage.level", "NONE");
+    PROPS.setProperty("hoodie.streamer.jdbc.incr.pull", "false");
     try {
       // Add 10 records with commit time 000
       clearAndInsert("000", 10, connection, DATA_GENERATOR, PROPS);
@@ -438,7 +442,7 @@ public void testSourceWithStorageLevel() {
   private void writeSecretToFs() throws IOException {
     FileSystem fs = FileSystem.get(new Configuration());
     FSDataOutputStream outputStream = fs.create(new Path("file:///tmp/hudi/config/secret"));
-    outputStream.writeBytes("jdbc");
+    outputStream.writeBytes(JDBC_PASS);
     outputStream.close();
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonDFSSource.java
index fde10b2d9a59..ae134e862bea 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonDFSSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonDFSSource.java
@@ -20,15 +20,29 @@
 
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.SchemaCompatibilityException;
+import org.apache.hudi.utilities.config.HoodieStreamerConfig;
+import org.apache.hudi.utilities.streamer.SourceFormatAdapter;
 import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
 import org.apache.hudi.utilities.testutils.sources.AbstractDFSSourceTestBase;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.List;
 
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 /**
  * Basic tests for {@link JsonDFSSource}.
  */
@@ -42,9 +56,8 @@ public void setup() throws Exception {
   }
 
   @Override
-  public Source prepareDFSSource() {
-    TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
+  public Source prepareDFSSource(TypedProperties props) {
+    props.setProperty("hoodie.streamer.source.dfs.root", dfsRoot);
     return new JsonDFSSource(props, jsc, sparkSession, schemaProvider);
   }
 
@@ -53,4 +66,36 @@ public void writeNewDataToFile(List<HoodieRecord> records, Path path) throws IOE
     UtilitiesTestBase.Helpers.saveStringsToDFS(
         Helpers.jsonifyRecords(records), fs, path.toString());
   }
+
+  @Test
+  public void testCorruptedSourceFile() throws IOException {
+    fs.mkdirs(new Path(dfsRoot));
+    TypedProperties props = new TypedProperties();
+    props.setProperty(HoodieStreamerConfig.ROW_THROW_EXPLICIT_EXCEPTIONS.key(), "true");
+    SourceFormatAdapter sourceFormatAdapter = new SourceFormatAdapter(prepareDFSSource(props), Option.empty(), Option.of(props));
+    generateOneFile("1", "000", 10);
+    generateOneFile("2", "000", 10);
+    RemoteIterator<LocatedFileStatus> files = fs.listFiles(generateOneFile("3", "000", 10), true);
+
+    FileStatus file1Status = files.next();
+    InputBatch<Dataset<Row>> batch = sourceFormatAdapter.fetchNewDataInRowFormat(Option.empty(), Long.MAX_VALUE);
+    corruptFile(file1Status.getPath());
+    assertTrue(batch.getBatch().isPresent());
+    Throwable t = assertThrows(Exception.class,
+        () -> batch.getBatch().get().show(30));
+    while (t != null) {
+      if (t instanceof SchemaCompatibilityException) {
+        return;
+      }
+      t = t.getCause();
+    }
+    throw new AssertionError("Exception does not have SchemaCompatibility in its trace", t);
+  }
+
+  protected void corruptFile(Path path) throws IOException {
+    PrintStream os = new PrintStream(fs.appendFile(path).build());
+    os.println("🤷‍");
+    os.flush();
+    os.close();
+  }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSource.java
index 14ffd31582a1..4b615c50ee19 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSource.java
@@ -23,14 +23,16 @@
 import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.testutils.InProcessTimeGenerator;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.testutils.InProcessTimeGenerator;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.config.HoodieStreamerConfig;
 import org.apache.hudi.utilities.config.KafkaSourceConfig;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
 import org.apache.hudi.utilities.streamer.BaseErrorTableWriter;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
 import org.apache.hudi.utilities.streamer.ErrorEvent;
 import org.apache.hudi.utilities.streamer.SourceFormatAdapter;
 
@@ -60,10 +62,10 @@
 import static org.apache.hudi.config.HoodieErrorTableConfig.ERROR_TABLE_BASE_PATH;
 import static org.apache.hudi.config.HoodieErrorTableConfig.ERROR_TARGET_TABLE;
 import static org.apache.hudi.utilities.config.KafkaSourceConfig.ENABLE_KAFKA_COMMIT_OFFSET;
+import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_KEY_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_OFFSET_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_PARTITION_COLUMN;
 import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_TIMESTAMP_COLUMN;
-import static org.apache.hudi.utilities.schema.KafkaOffsetPostProcessor.KAFKA_SOURCE_KEY_COLUMN;
 import static org.apache.hudi.utilities.testutils.UtilitiesTestBase.Helpers.jsonifyRecords;
 import static org.apache.hudi.utilities.testutils.UtilitiesTestBase.Helpers.jsonifyRecordsByPartitions;
 import static org.apache.hudi.utilities.testutils.UtilitiesTestBase.Helpers.jsonifyRecordsByPartitionsWithNullKafkaKey;
@@ -80,7 +82,7 @@ public class TestJsonKafkaSource extends BaseTestKafkaSource {
   public void init() throws Exception {
     String schemaFilePath = Objects.requireNonNull(SCHEMA_FILE_URL).toURI().getPath();
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
     schemaProvider = new FilebasedSchemaProvider(props, jsc());
   }
 
@@ -91,11 +93,11 @@ TypedProperties createPropsForKafkaSource(String topic, Long maxEventsToReadFrom
 
   static TypedProperties createPropsForJsonKafkaSource(String brokerAddress, String topic, Long maxEventsToReadFromKafkaSource, String resetStrategy) {
     TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", topic);
+    props.setProperty("hoodie.streamer.source.kafka.topic", topic);
     props.setProperty("bootstrap.servers", brokerAddress);
     props.setProperty("auto.offset.reset", resetStrategy);
     props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents",
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents",
         maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) :
             String.valueOf(KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE.defaultValue()));
     props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
@@ -104,7 +106,7 @@ static TypedProperties createPropsForJsonKafkaSource(String brokerAddress, Strin
 
   @Override
   SourceFormatAdapter createSource(TypedProperties props) {
-    return new SourceFormatAdapter(new JsonKafkaSource(props, jsc(), spark(), schemaProvider, metrics));
+    return new SourceFormatAdapter(new JsonKafkaSource(props, jsc(), spark(), metrics, new DefaultStreamContext(schemaProvider, sourceProfile)));
   }
 
   // test whether empty messages can be filtered
@@ -350,10 +352,22 @@ public void testAppendKafkaOffset() {
     jsonSource = new JsonKafkaSource(props, jsc(), spark(), schemaProvider, metrics);
     kafkaSource = new SourceFormatAdapter(jsonSource);
     Dataset<Row> dfWithOffsetInfoAndNullKafkaKey = kafkaSource.fetchNewDataInRowFormat(Option.empty(), Long.MAX_VALUE).getBatch().get().cache();
+    // total of 2 * numMessages are in the topic at this point, half with a key and half with a null key. All should have the source offset.
     assertEquals(numMessages, dfWithOffsetInfoAndNullKafkaKey.toDF().filter("_hoodie_kafka_source_key is null").count());
+    assertEquals(numMessages, dfWithOffsetInfoAndNullKafkaKey.toDF().filter("_hoodie_kafka_source_key is not null").count());
+    assertEquals(numMessages * 2, dfWithOffsetInfoAndNullKafkaKey.toDF().filter("_hoodie_kafka_source_offset is not null").count());
 
     dfNoOffsetInfo.unpersist();
     dfWithOffsetInfo.unpersist();
     dfWithOffsetInfoAndNullKafkaKey.unpersist();
   }
+
+  @Test
+  public void testCreateSource() throws IOException {
+    final String topic = TEST_TOPIC_PREFIX + "testJsonKafkaSourceCreation";
+    testUtils.createTopic(topic, 2);
+    TypedProperties props = createPropsForKafkaSource(topic, null, "earliest");
+    Source jsonKafkaSource = UtilHelpers.createSource(JsonKafkaSource.class.getName(), props, jsc(), spark(), metrics, new DefaultStreamContext(schemaProvider, sourceProfile));
+    assertEquals(Source.SourceType.JSON, jsonKafkaSource.getSourceType());
+  }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSourcePostProcessor.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSourcePostProcessor.java
index b6bc3480e3d2..1f1a4e2b5c1f 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSourcePostProcessor.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestJsonKafkaSourcePostProcessor.java
@@ -80,7 +80,7 @@ public static void cleanupClass() {
   public void init() throws Exception {
     String schemaFilePath = Objects.requireNonNull(TestJsonKafkaSource.SCHEMA_FILE_URL).toURI().getPath();
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
     schemaProvider = new FilebasedSchemaProvider(props, jsc());
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestParquetDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestParquetDFSSource.java
index 44489037e823..a9c448748c91 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestParquetDFSSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestParquetDFSSource.java
@@ -41,9 +41,8 @@ public void setup() throws Exception {
   }
 
   @Override
-  public Source prepareDFSSource() {
-    TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
+  public Source prepareDFSSource(TypedProperties props) {
+    props.setProperty("hoodie.streamer.source.dfs.root", dfsRoot);
     return new ParquetDFSSource(props, jsc, sparkSession, schemaProvider);
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java
index 52376f897419..f96792111445 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.utilities.config.ProtoClassBasedSchemaProviderConfig;
 import org.apache.hudi.utilities.schema.ProtoClassBasedSchemaProvider;
 import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.streamer.DefaultStreamContext;
 import org.apache.hudi.utilities.streamer.SourceFormatAdapter;
 import org.apache.hudi.utilities.test.proto.Nested;
 import org.apache.hudi.utilities.test.proto.Sample;
@@ -74,11 +75,11 @@ public class TestProtoKafkaSource extends BaseTestKafkaSource {
 
   protected TypedProperties createPropsForKafkaSource(String topic, Long maxEventsToReadFromKafkaSource, String resetStrategy) {
     TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", topic);
+    props.setProperty("hoodie.streamer.source.kafka.topic", topic);
     props.setProperty("bootstrap.servers", testUtils.brokerAddress());
     props.setProperty("auto.offset.reset", resetStrategy);
     props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents",
+    props.setProperty("hoodie.streamer.kafka.source.maxEvents",
         maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) :
             String.valueOf(KafkaSourceConfig.MAX_EVENTS_FROM_KAFKA_SOURCE.defaultValue()));
     props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
@@ -89,7 +90,7 @@ protected TypedProperties createPropsForKafkaSource(String topic, Long maxEvents
   @Override
   SourceFormatAdapter createSource(TypedProperties props) {
     this.schemaProvider = new ProtoClassBasedSchemaProvider(props, jsc());
-    Source protoKafkaSource = new ProtoKafkaSource(props, jsc(), spark(), schemaProvider, metrics);
+    Source protoKafkaSource = new ProtoKafkaSource(props, jsc(), spark(), metrics, new DefaultStreamContext(schemaProvider, sourceProfile));
     return new SourceFormatAdapter(protoKafkaSource);
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestS3EventsHoodieIncrSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestS3EventsHoodieIncrSource.java
index 33faac5361f7..a9dd11c55440 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestS3EventsHoodieIncrSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestS3EventsHoodieIncrSource.java
@@ -87,6 +87,7 @@ public class TestS3EventsHoodieIncrSource extends SparkClientFunctionalTestHarne
   private ObjectMapper mapper = new ObjectMapper();
 
   private static final String MY_BUCKET = "some-bucket";
+  private static final String IGNORE_FILE_EXTENSION = ".ignore";
 
   private Option<SchemaProvider> schemaProvider;
   @Mock
@@ -104,8 +105,8 @@ public void setUp() throws IOException {
     metaClient = getHoodieMetaClient(hadoopConf(), basePath());
     String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_gcs_data.avsc").getPath();
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
-    props.put("hoodie.deltastreamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
     this.schemaProvider = Option.of(new FilebasedSchemaProvider(props, jsc));
   }
 
@@ -185,10 +186,10 @@ private HoodieRecord generateS3EventMetadata(String commitTime, String bucketNam
 
   private TypedProperties setProps(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy) {
     Properties properties = new Properties();
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath());
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy",
+    properties.setProperty("hoodie.streamer.source.hoodieincr.path", basePath());
+    properties.setProperty("hoodie.streamer.source.hoodieincr.missing.checkpoint.strategy",
         missingCheckpointStrategy.name());
-    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.file.format", "json");
+    properties.setProperty("hoodie.streamer.source.hoodieincr.file.format", "json");
     return new TypedProperties(properties);
   }
 
@@ -308,11 +309,14 @@ public void testTwoFilesAndContinueAcrossCommits(String extension) throws IOExce
     }
 
     List<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<>();
-    // Add file paths and sizes to the list
+    // Add file paths and sizes to the list.
+    // Check with a couple of invalid file extensions to ensure they are filtered out.
     filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file1%s", extension), 100L, "1"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file2%s", IGNORE_FILE_EXTENSION), 800L, "1"));
     filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file3%s", extension), 200L, "1"));
     filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file2%s", extension), 150L, "1"));
     filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file4%s", extension), 50L, "2"));
+    filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file4%s", IGNORE_FILE_EXTENSION), 200L, "2"));
     filePathSizeAndCommitTime.add(Triple.of(String.format("path/to/file5%s", extension), 150L, "2"));
 
     Dataset<Row> inputDs = generateDataset(filePathSizeAndCommitTime);
@@ -350,7 +354,7 @@ public void testEmptyDataAfterFilter() throws IOException {
 
     setMockQueryRunner(inputDs);
     TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
-    typedProperties.setProperty("hoodie.deltastreamer.source.s3incr.ignore.key.prefix", "path/to/skip");
+    typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
 
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1"), 1000L, "2", typedProperties);
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file3.json"), 1000L, "2", typedProperties);
@@ -384,7 +388,7 @@ public void testFilterAnEntireCommit() throws IOException {
     when(mockCloudDataFetcher.getCloudObjectDataDF(Mockito.any(), Mockito.any(), Mockito.any(), eq(schemaProvider)))
         .thenReturn(Option.empty());
     TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
-    typedProperties.setProperty("hoodie.deltastreamer.source.s3incr.ignore.key.prefix", "path/to/skip");
+    typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
 
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1"), 50L, "2#path/to/file4.json", typedProperties);
   }
@@ -416,7 +420,7 @@ public void testFilterAnEntireMiddleCommit() throws IOException {
     when(mockCloudDataFetcher.getCloudObjectDataDF(Mockito.any(), Mockito.any(), Mockito.any(), eq(schemaProvider)))
         .thenReturn(Option.empty());
     TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
-    typedProperties.setProperty("hoodie.deltastreamer.source.s3incr.ignore.key.prefix", "path/to/skip");
+    typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
 
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of("1#path/to/file3.json"), 50L, "3#path/to/file4.json", typedProperties);
 
@@ -453,14 +457,14 @@ public void testSplitSnapshotLoad(String snapshotCheckPoint, String exptected1,
     when(mockCloudDataFetcher.getCloudObjectDataDF(Mockito.any(), Mockito.any(), Mockito.any(), eq(schemaProvider)))
         .thenReturn(Option.empty());
     TypedProperties typedProperties = setProps(READ_UPTO_LATEST_COMMIT);
-    typedProperties.setProperty("hoodie.deltastreamer.source.s3incr.ignore.key.prefix", "path/to/skip");
+    typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
     //1. snapshot query, read all records
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50000L, exptected1, typedProperties);
     //2. incremental query, as commit is present in timeline
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.of(exptected1), 10L, exptected2, typedProperties);
     //3. snapshot query with source limit less than first commit size
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50L, exptected3, typedProperties);
-    typedProperties.setProperty("hoodie.deltastreamer.source.s3incr.ignore.key.prefix", "path/to");
+    typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to");
     //4. As snapshotQuery will return 1 -> same would be return as nextCheckpoint (dataset is empty due to ignore prefix).
     readAndAssert(READ_UPTO_LATEST_COMMIT, Option.empty(), 50L, exptected4, typedProperties);
   }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlFileBasedSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlFileBasedSource.java
index 89769954d386..ee488e38c6ac 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlFileBasedSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlFileBasedSource.java
@@ -51,8 +51,8 @@
 public class TestSqlFileBasedSource extends UtilitiesTestBase {
 
   private final boolean useFlattenedSchema = false;
-  private final String sqlFileSourceConfig = "hoodie.deltastreamer.source.sql.file";
-  private final String sqlFileSourceConfigEmitChkPointConf = "hoodie.deltastreamer.source.sql.checkpoint.emit";
+  private final String sqlFileSourceConfig = "hoodie.streamer.source.sql.file";
+  private final String sqlFileSourceConfigEmitChkPointConf = "hoodie.streamer.source.sql.checkpoint.emit";
   protected FilebasedSchemaProvider schemaProvider;
   protected HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
   private String dfsRoot;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
index 64578f3bae36..a738003a3fcd 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
@@ -50,7 +50,7 @@
 public class TestSqlSource extends UtilitiesTestBase {
 
   private final boolean useFlattenedSchema = false;
-  private final String sqlSourceConfig = "hoodie.deltastreamer.source.sql.sql.query";
+  private final String sqlSourceConfig = "hoodie.streamer.source.sql.sql.query";
   protected FilebasedSchemaProvider schemaProvider;
   protected HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
   private String dfsRoot;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java
index c9f46144e96a..a57383c43b24 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java
@@ -86,12 +86,12 @@ public static void cleanupClass() throws IOException {
 
   private TypedProperties createPropsForJsonSource() {
     TypedProperties props = new TypedProperties();
-    props.setProperty("hoodie.deltastreamer.source.kafka.topic", testTopicName);
+    props.setProperty("hoodie.streamer.source.kafka.topic", testTopicName);
     props.setProperty("bootstrap.servers", testUtils.brokerAddress());
     props.setProperty("auto.offset.reset", "earliest");
     props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    props.setProperty("hoodie.deltastreamer.schemaprovider.registry.url", "localhost");
-    props.setProperty("hoodie.deltastreamer.source.kafka.value.deserializer.class", StringDeserializer.class.getName());
+    props.setProperty("hoodie.streamer.schemaprovider.registry.url", "localhost");
+    props.setProperty("hoodie.streamer.source.kafka.value.deserializer.class", StringDeserializer.class.getName());
     props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
 
     return props;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCheckpointUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCheckpointUtils.java
index 49e27d0191bd..7e8b263de331 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCheckpointUtils.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCheckpointUtils.java
@@ -24,11 +24,13 @@
 import org.apache.spark.streaming.kafka010.OffsetRange;
 import org.junit.jupiter.api.Test;
 
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 /**
@@ -103,26 +105,35 @@ public void testComputeOffsetRangesWithoutMinPartitions() {
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {200000, 250000}),
         makeOffsetMap(new int[] {0, 1, 2}, new long[] {200010, 350000, 10000}), 100000, 0);
     assertEquals(100000, CheckpointUtils.totalNewMessages(ranges));
+    assertEquals(5, ranges.length);
+    assertEquals(0, ranges[0].partition());
     assertEquals(10, ranges[0].count());
-    assertEquals(89990, ranges[1].count());
-    assertEquals(10000, ranges[2].count());
+    assertEquals(1, ranges[1].partition());
+    assertEquals(33333, ranges[1].count());
+    assertEquals(33333, ranges[2].count());
+    assertEquals(23324, ranges[3].count());
+    assertEquals(2, ranges[4].partition());
+    assertEquals(10000, ranges[4].count());
 
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {200000, 250000}),
         makeOffsetMap(new int[] {0, 1, 2}, new long[] {200010, 350000, 10000}), 1000000, 0);
     assertEquals(110010, CheckpointUtils.totalNewMessages(ranges));
     assertEquals(10, ranges[0].count());
-    assertEquals(100000, ranges[1].count());
-    assertEquals(10000, ranges[2].count());
+    assertEquals(36670, ranges[1].count());
+    assertEquals(36670, ranges[2].count());
+    assertEquals(26660, ranges[3].count());
+    assertEquals(10000, ranges[4].count());
 
     // not all partitions consume same entries.
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1, 2, 3, 4}, new long[] {0, 0, 0, 0, 0}),
         makeOffsetMap(new int[] {0, 1, 2, 3, 4}, new long[] {100, 1000, 1000, 1000, 1000}), 1001, 0);
     assertEquals(1001, CheckpointUtils.totalNewMessages(ranges));
     assertEquals(100, ranges[0].count());
-    assertEquals(226, ranges[1].count());
-    assertEquals(225, ranges[2].count());
-    assertEquals(225, ranges[3].count());
-    assertEquals(225, ranges[4].count());
+    assertEquals(200, ranges[1].count());
+    assertEquals(101, ranges[2].count());
+    assertEquals(200, ranges[3].count());
+    assertEquals(200, ranges[4].count());
+    assertEquals(200, ranges[5].count());
   }
 
   @Test
@@ -166,38 +177,44 @@ public void testComputeOffsetRangesWithMinPartitions() {
     // N skewed TopicPartitions to M offset ranges
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {0, 0}),
         makeOffsetMap(new int[] {0, 1}, new long[] {100, 500}), 600, 3);
-    assertEquals(3, ranges.length);
+    assertEquals(4, ranges.length);
     assertEquals(0, ranges[0].fromOffset());
     assertEquals(100, ranges[0].untilOffset());
     assertEquals(0, ranges[1].fromOffset());
-    assertEquals(250, ranges[1].untilOffset());
-    assertEquals(250, ranges[2].fromOffset());
-    assertEquals(500, ranges[2].untilOffset());
+    assertEquals(200, ranges[1].untilOffset());
+    assertEquals(200, ranges[2].fromOffset());
+    assertEquals(400, ranges[2].untilOffset());
+    assertEquals(400, ranges[3].fromOffset());
+    assertEquals(500, ranges[3].untilOffset());
 
     // range inexact multiple of minPartitions
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0}, new long[] {0}),
         makeOffsetMap(new int[] {0}, new long[] {100}), 600, 3);
-    assertEquals(3, ranges.length);
+    assertEquals(4, ranges.length);
     assertEquals(0, ranges[0].fromOffset());
-    assertEquals(34, ranges[0].untilOffset());
-    assertEquals(34, ranges[1].fromOffset());
-    assertEquals(67, ranges[1].untilOffset());
-    assertEquals(67, ranges[2].fromOffset());
-    assertEquals(100, ranges[2].untilOffset());
+    assertEquals(33, ranges[0].untilOffset());
+    assertEquals(33, ranges[1].fromOffset());
+    assertEquals(66, ranges[1].untilOffset());
+    assertEquals(66, ranges[2].fromOffset());
+    assertEquals(99, ranges[2].untilOffset());
+    assertEquals(99, ranges[3].fromOffset());
+    assertEquals(100, ranges[3].untilOffset());
 
     // do not ignore empty ranges
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {100, 0}),
         makeOffsetMap(new int[] {0, 1}, new long[] {100, 600}), 600, 3);
-    assertEquals(3, ranges.length);
+    assertEquals(4, ranges.length);
     assertEquals(0, ranges[0].partition());
     assertEquals(100, ranges[0].fromOffset());
     assertEquals(100, ranges[0].untilOffset());
     assertEquals(1, ranges[1].partition());
     assertEquals(0, ranges[1].fromOffset());
-    assertEquals(300, ranges[1].untilOffset());
+    assertEquals(200, ranges[1].untilOffset());
     assertEquals(1, ranges[2].partition());
-    assertEquals(300, ranges[2].fromOffset());
-    assertEquals(600, ranges[2].untilOffset());
+    assertEquals(200, ranges[2].fromOffset());
+    assertEquals(400, ranges[2].untilOffset());
+    assertEquals(400, ranges[3].fromOffset());
+    assertEquals(600, ranges[3].untilOffset());
 
     // all empty ranges, do not ignore empty ranges
     ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {100, 0}),
@@ -226,7 +243,7 @@ public void testSplitAndMergeRanges() {
     OffsetRange range = OffsetRange.apply(TEST_TOPIC_NAME, 0, 0, 100);
     OffsetRange[] ranges = CheckpointUtils.computeOffsetRanges(makeOffsetMap(new int[] {0, 1}, new long[] {0, 0}),
         makeOffsetMap(new int[] {0, 1}, new long[] {100, 500}), 600, 4);
-    assertEquals(4, ranges.length);
+    assertEquals(5, ranges.length);
     OffsetRange[] mergedRanges = CheckpointUtils.mergeRangesByTopicPartition(ranges);
     assertEquals(2, mergedRanges.length);
     assertEquals(0, mergedRanges[0].partition());
@@ -245,6 +262,134 @@ public void testSplitAndMergeRanges() {
     assertEquals(300, mergedRanges[0].untilOffset());
   }
 
+  @Test
+  public void testNumAllocatedEventsGreaterThanNumActualEvents() {
+    int[] partitions = new int[] {0, 1, 2, 3, 4};
+    long[] committedOffsets =
+        new long[] {76888767, 76725043, 76899767, 76833267, 76952055};
+    long[] latestOffsets =
+        new long[] {77005407, 76768151, 76985456, 76917973, 77080447};
+    long numEvents = 400000;
+    long minPartitions = 20;
+    OffsetRange[] ranges =
+        KafkaOffsetGen.CheckpointUtils.computeOffsetRanges(
+            makeOffsetMap(partitions, committedOffsets),
+            makeOffsetMap(partitions, latestOffsets),
+            numEvents,
+            minPartitions);
+
+    long totalNewMsgs = KafkaOffsetGen.CheckpointUtils.totalNewMessages(ranges);
+    assertEquals(400000, totalNewMsgs);
+    for (OffsetRange range : ranges) {
+      if (range.fromOffset() > range.untilOffset()) {
+        throw new IllegalArgumentException("Invalid offset range " + range);
+      }
+    }
+    long eventPerPartition = numEvents / minPartitions;
+    long rangesWhereDiffIsLessThanEventsPerPartition = Arrays.stream(ranges).filter(offsetRange -> offsetRange.untilOffset() - offsetRange.fromOffset() <= eventPerPartition).count();
+    assertEquals(ranges.length, rangesWhereDiffIsLessThanEventsPerPartition);
+    OffsetRange[] expectedRanges = new OffsetRange[] {
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 76888767, 76908767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 76908767, 76928767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 76928767, 76948767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 76948767, 76968767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 76968767, 76988767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 76725043, 76745043),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 76745043, 76765043),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 76765043, 76768151),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 76899767, 76919767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 76919767, 76939767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 76939767, 76959767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 76959767, 76979767),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 76979767, 76985456),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 76833267, 76853267),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 76853267, 76873267),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 76873267, 76893267),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 76893267, 76913267),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 76913267, 76917973),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 76952055, 76972055),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 76972055, 76992055),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 76992055, 77012055),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 77012055, 77032055),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 77032055, 77038552),
+    };
+    assertArrayEquals(expectedRanges, ranges);
+  }
+
+  @Test
+  public void testNumAllocatedEventsLesserThanNumActualEvents() {
+    int[] partitions = new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
+    long[] committedOffsets =
+        new long[] {788543084, 787746335, 788016034, 788171708, 788327954, 788055939, 788179691, 788004145, 788105897, 788496138, 788317057, 788325907, 788287519, 787958075, 788403560, 788118894,
+            788383733, 787273821};
+    long[] latestOffsets =
+        new long[] {788946534, 788442557, 788712188, 788867819, 789023943, 788752030, 788875648, 788700234, 788802091, 789192155, 789013192, 789021874, 788983544, 788654092, 789099516, 788814985,
+            789079650, 787273821};
+    long numEvents = 10000000;
+    long minPartitions = 36;
+
+    OffsetRange[] ranges =
+        KafkaOffsetGen.CheckpointUtils.computeOffsetRanges(
+            makeOffsetMap(partitions, committedOffsets),
+            makeOffsetMap(partitions, latestOffsets),
+            numEvents,
+            minPartitions);
+    for (OffsetRange range : ranges) {
+      if (range.fromOffset() > range.untilOffset()) {
+        throw new IllegalArgumentException("Invalid offset range " + range);
+      }
+    }
+    assertEquals(10000000, KafkaOffsetGen.CheckpointUtils.totalNewMessages(ranges));
+    assertEquals(41, ranges.length);
+    long eventPerPartition = numEvents / minPartitions;
+    long rangesWhereDiffIsLessThanEventsPerPartition = Arrays.stream(ranges).filter(offsetRange -> offsetRange.untilOffset() - offsetRange.fromOffset() <= eventPerPartition).count();
+    assertEquals(ranges.length, rangesWhereDiffIsLessThanEventsPerPartition);
+    OffsetRange[] expectedRanges = new OffsetRange[] {
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 788543084, 788820861),
+        OffsetRange.apply(TEST_TOPIC_NAME, 0, 788820861, 788946534),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 787746335, 788024112),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 788024112, 788301889),
+        OffsetRange.apply(TEST_TOPIC_NAME, 1, 788301889, 788442557),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 788016034, 788293811),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 788293811, 788571588),
+        OffsetRange.apply(TEST_TOPIC_NAME, 2, 788571588, 788712188),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 788171708, 788449485),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 788449485, 788727262),
+        OffsetRange.apply(TEST_TOPIC_NAME, 3, 788727262, 788867819),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 788327954, 788605731),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 788605731, 788883508),
+        OffsetRange.apply(TEST_TOPIC_NAME, 4, 788883508, 789023943),
+        OffsetRange.apply(TEST_TOPIC_NAME, 5, 788055939, 788333716),
+        OffsetRange.apply(TEST_TOPIC_NAME, 5, 788333716, 788611493),
+        OffsetRange.apply(TEST_TOPIC_NAME, 5, 788611493, 788752030),
+        OffsetRange.apply(TEST_TOPIC_NAME, 6, 788179691, 788457468),
+        OffsetRange.apply(TEST_TOPIC_NAME, 6, 788457468, 788735245),
+        OffsetRange.apply(TEST_TOPIC_NAME, 6, 788735245, 788740134),
+        OffsetRange.apply(TEST_TOPIC_NAME, 7, 788004145, 788281922),
+        OffsetRange.apply(TEST_TOPIC_NAME, 7, 788281922, 788559699),
+        OffsetRange.apply(TEST_TOPIC_NAME, 8, 788105897, 788383674),
+        OffsetRange.apply(TEST_TOPIC_NAME, 8, 788383674, 788661451),
+        OffsetRange.apply(TEST_TOPIC_NAME, 9, 788496138, 788773915),
+        OffsetRange.apply(TEST_TOPIC_NAME, 9, 788773915, 789051692),
+        OffsetRange.apply(TEST_TOPIC_NAME, 10, 788317057, 788594834),
+        OffsetRange.apply(TEST_TOPIC_NAME, 10, 788594834, 788872611),
+        OffsetRange.apply(TEST_TOPIC_NAME, 11, 788325907, 788603684),
+        OffsetRange.apply(TEST_TOPIC_NAME, 11, 788603684, 788881461),
+        OffsetRange.apply(TEST_TOPIC_NAME, 12, 788287519, 788565296),
+        OffsetRange.apply(TEST_TOPIC_NAME, 12, 788565296, 788843073),
+        OffsetRange.apply(TEST_TOPIC_NAME, 13, 787958075, 788235852),
+        OffsetRange.apply(TEST_TOPIC_NAME, 13, 788235852, 788513629),
+        OffsetRange.apply(TEST_TOPIC_NAME, 14, 788403560, 788681337),
+        OffsetRange.apply(TEST_TOPIC_NAME, 14, 788681337, 788959114),
+        OffsetRange.apply(TEST_TOPIC_NAME, 15, 788118894, 788396671),
+        OffsetRange.apply(TEST_TOPIC_NAME, 15, 788396671, 788674448),
+        OffsetRange.apply(TEST_TOPIC_NAME, 16, 788383733, 788661510),
+        OffsetRange.apply(TEST_TOPIC_NAME, 16, 788661510, 788939287),
+        OffsetRange.apply(TEST_TOPIC_NAME, 17, 787273821, 787273821),
+    };
+    assertArrayEquals(expectedRanges, ranges);
+  }
+
   private static Map<TopicPartition, Long> makeOffsetMap(int[] partitions, long[] offsets) {
     Map<TopicPartition, Long> map = new HashMap<>();
     for (int i = 0; i < partitions.length; i++) {
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCloudObjectsSelectorCommon.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCloudObjectsSelectorCommon.java
index b4b6507e074c..79f15975cb51 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCloudObjectsSelectorCommon.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestCloudObjectsSelectorCommon.java
@@ -21,18 +21,19 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.testutils.HoodieSparkClientTestHarness;
-
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
+
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
-
 import org.apache.spark.sql.RowFactory;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
 
 public class TestCloudObjectsSelectorCommon extends HoodieSparkClientTestHarness {
@@ -68,7 +69,7 @@ public void partitionValueAddedToRow() {
     List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1));
 
     TypedProperties properties = new TypedProperties();
-    properties.put("hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", "country,state");
+    properties.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
     Option<Dataset<Row>> result = CloudObjectsSelectorCommon.loadAsDataset(sparkSession, input, properties, "json");
     Assertions.assertTrue(result.isPresent());
     Assertions.assertEquals(1, result.get().count());
@@ -81,9 +82,9 @@ public void loadDatasetWithSchema() {
     TypedProperties props = new TypedProperties();
     TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
     String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
-    props.put("hoodie.deltastreamer.schemaprovider.source.schema.file", schemaFilePath);
-    props.put("hoodie.deltastreamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
-    props.put("hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", "country,state");
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
+    props.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
     List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1));
     Option<Dataset<Row>> result = CloudObjectsSelectorCommon.loadAsDataset(sparkSession, input, props, "json", Option.of(new FilebasedSchemaProvider(props, jsc)));
     Assertions.assertTrue(result.isPresent());
@@ -96,12 +97,34 @@ public void loadDatasetWithSchema() {
   public void partitionKeyNotPresentInPath() {
     List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1));
     TypedProperties properties = new TypedProperties();
-    properties.put("hoodie.deltastreamer.source.cloud.data.reader.comma.separated.path.format", "false");
-    properties.put("hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", "unknown");
+    properties.put("hoodie.streamer.source.cloud.data.reader.comma.separated.path.format", "false");
+    properties.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "unknown");
     Option<Dataset<Row>> result = CloudObjectsSelectorCommon.loadAsDataset(sparkSession, input, properties, "json");
     Assertions.assertTrue(result.isPresent());
     Assertions.assertEquals(1, result.get().count());
     Row expected = RowFactory.create("some data", null);
     Assertions.assertEquals(Collections.singletonList(expected), result.get().collectAsList());
   }
+
+  @Test
+  public void loadDatasetWithSchemaAndRepartition() {
+    TypedProperties props = new TypedProperties();
+    TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
+    String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
+    props.put("hoodie.streamer.schemaprovider.source.schema.file", schemaFilePath);
+    props.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
+    props.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
+    // Setting this config so that dataset repartition happens inside `loadAsDataset`
+    props.put("hoodie.streamer.source.cloud.data.partition.max.size", "1");
+    List<CloudObjectMetadata> input = Arrays.asList(
+        new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1000),
+        new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/data.json", 1000),
+        new CloudObjectMetadata("src/test/resources/data/partitioned/country=IND/state=TS/data.json", 1000)
+    );
+    Option<Dataset<Row>> result = CloudObjectsSelectorCommon.loadAsDataset(sparkSession, input, props, "json", Option.of(new FilebasedSchemaProvider(props, jsc)));
+    Assertions.assertTrue(result.isPresent());
+    List<Row> expected = Arrays.asList(RowFactory.create("some data", "US", "CA"), RowFactory.create("some data", "US", "TX"), RowFactory.create("some data", "IND", "TS"));
+    List<Row> actual = result.get().collectAsList();
+    Assertions.assertEquals(new HashSet<>(expected), new HashSet<>(actual));
+  }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestKafkaOffsetGen.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestKafkaOffsetGen.java
index 6ad6a4c09dbf..fc3ab90a0364 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestKafkaOffsetGen.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestKafkaOffsetGen.java
@@ -65,9 +65,9 @@ public void teardown() throws Exception {
 
   private TypedProperties getConsumerConfigs(String autoOffsetReset, String kafkaCheckpointType) {
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.source.kafka.checkpoint.type", kafkaCheckpointType);
+    props.put("hoodie.streamer.source.kafka.checkpoint.type", kafkaCheckpointType);
     props.put("auto.offset.reset", autoOffsetReset);
-    props.put("hoodie.deltastreamer.source.kafka.topic", testTopicName);
+    props.put("hoodie.streamer.source.kafka.topic", testTopicName);
     props.setProperty("bootstrap.servers", testUtils.brokerAddress());
     props.setProperty("key.deserializer", StringDeserializer.class.getName());
     props.setProperty("value.deserializer", StringDeserializer.class.getName());
@@ -140,11 +140,13 @@ public void testGetNextOffsetRangesFromMultiplePartitions() {
     testUtils.sendMessages(testTopicName, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
     KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("earliest", "string"));
     OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 499, metrics);
-    assertEquals(2, nextOffsetRanges.length);
+    assertEquals(3, nextOffsetRanges.length);
     assertEquals(0, nextOffsetRanges[0].fromOffset());
-    assertEquals(250, nextOffsetRanges[0].untilOffset());
-    assertEquals(0, nextOffsetRanges[1].fromOffset());
-    assertEquals(249, nextOffsetRanges[1].untilOffset());
+    assertEquals(249, nextOffsetRanges[0].untilOffset());
+    assertEquals(249, nextOffsetRanges[1].fromOffset());
+    assertEquals(250, nextOffsetRanges[1].untilOffset());
+    assertEquals(0, nextOffsetRanges[2].fromOffset());
+    assertEquals(249, nextOffsetRanges[2].untilOffset());
   }
 
   @Test
@@ -248,7 +250,7 @@ public void testCheckTopicExists() {
     testUtils.createTopic(testTopicName, 1);
     boolean topicExists = kafkaOffsetGen.checkTopicExists(new KafkaConsumer(props));
     assertTrue(topicExists);
-    props.put("hoodie.deltastreamer.source.kafka.topic", "random");
+    props.put("hoodie.streamer.source.kafka.topic", "random");
     kafkaOffsetGen = new KafkaOffsetGen(props);
     topicExists = kafkaOffsetGen.checkTopicExists(new KafkaConsumer(props));
     assertFalse(topicExists);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java
index 1a660ac71353..39dfa430268e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java
@@ -22,7 +22,6 @@
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
-import org.apache.hudi.utilities.deltastreamer.TestSourceFormatAdapter;
 import org.apache.hudi.utilities.testutils.SanitizationTestUtils;
 
 import org.apache.avro.Schema;
@@ -45,6 +44,7 @@
 import java.io.InputStream;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest;
 import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateProperFormattedSchema;
 import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateRenamedSchemaWithConfiguredReplacement;
 import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateRenamedSchemaWithDefaultReplacement;
@@ -61,9 +61,7 @@ public class TestSanitizationUtils {
   public static void start() {
     spark = SparkSession
         .builder()
-        .master("local[*]")
-        .appName(TestSourceFormatAdapter.class.getName())
-        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+        .config(getSparkConfForTest(TestSanitizationUtils.class.getName()))
         .getOrCreate();
     jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
   }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieStreamerUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieStreamerUtils.java
new file mode 100644
index 000000000000..e6c388b3e3b1
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieStreamerUtils.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.schema.SimpleSchemaProvider;
+import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.Mockito.doNothing;
+
+/**
+ * Tests {@link HoodieStreamerUtils}.
+ */
+public class TestHoodieStreamerUtils extends UtilitiesTestBase {
+  private static final String SCHEMA_STRING = "{\"type\": \"record\"," + "\"name\": \"rec\"," + "\"fields\": [ "
+      + "{\"name\": \"timestamp\",\"type\": \"long\"}," + "{\"name\": \"_row_key\", \"type\": \"string\"},"
+      + "{\"name\": \"partition_path\", \"type\": [\"null\", \"string\"], \"default\": null },"
+      + "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"}]}";
+
+  @BeforeAll
+  public static void setupOnce() throws Exception {
+    initTestServices();
+  }
+
+  @ParameterizedTest
+  @EnumSource(HoodieRecordType.class)
+  public void testCreateHoodieRecordsWithError(HoodieRecordType recordType) {
+    Schema schema = new Schema.Parser().parse(SCHEMA_STRING);
+    JavaRDD<GenericRecord> recordRdd = jsc.parallelize(Collections.singletonList(1)).map(i -> {
+      GenericRecord record = new GenericData.Record(schema);
+      record.put(0, i * 1000L);
+      record.put(1, "key" + i);
+      record.put(2, "path" + i);
+      // The field is non-null in schema but the value is null, so this fails the Hudi record creation
+      record.put(3, null);
+      record.put(4, "driver");
+      return record;
+    });
+    HoodieStreamer.Config cfg = new HoodieStreamer.Config();
+    TypedProperties props = new TypedProperties();
+    SchemaProvider schemaProvider = new SimpleSchemaProvider(jsc, schema, props);
+    BaseErrorTableWriter errorTableWriter = Mockito.mock(BaseErrorTableWriter.class);
+    ArgumentCaptor<JavaRDD<?>> errorEventCaptor = ArgumentCaptor.forClass(JavaRDD.class);
+    doNothing().when(errorTableWriter).addErrorEvents(errorEventCaptor.capture());
+    HoodieStreamerUtils.createHoodieRecords(cfg, props, Option.of(recordRdd),
+                schemaProvider, recordType, false, "000", Option.of(errorTableWriter));
+    List<ErrorEvent<String>> actualErrorEvents = (List<ErrorEvent<String>>) errorEventCaptor.getValue().collect();
+    ErrorEvent<String> expectedErrorEvent = new ErrorEvent<>("{\"timestamp\": 1000, \"_row_key\": \"key1\", \"partition_path\": \"path1\", \"rider\": null, \"driver\": \"driver\"}",
+        ErrorEvent.ErrorReason.RECORD_CREATION);
+    assertEquals(Collections.singletonList(expectedErrorEvent), actualErrorEvents);
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestStreamSyncUnitTests.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestStreamSyncUnitTests.java
new file mode 100644
index 000000000000..99148eb4b072
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestStreamSyncUnitTests.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.streamer;
+
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieErrorTableConfig;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.sources.InputBatch;
+import org.apache.hudi.utilities.transform.Transformer;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.stream.Stream;
+
+import static org.apache.hudi.config.HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class TestStreamSyncUnitTests {
+
+  @ParameterizedTest
+  @MethodSource("testCasesFetchNextBatchFromSource")
+  void testFetchNextBatchFromSource(Boolean useRowWriter, Boolean hasTransformer, Boolean hasSchemaProvider,
+                                    Boolean isNullTargetSchema, Boolean hasErrorTable, Boolean shouldTryWriteToErrorTable) {
+    //basic deltastreamer inputs
+    HoodieSparkEngineContext hoodieSparkEngineContext = mock(HoodieSparkEngineContext.class);
+    FileSystem fs = mock(FileSystem.class);
+    SparkSession sparkSession = mock(SparkSession.class);
+    Configuration configuration = mock(Configuration.class);
+    HoodieStreamer.Config cfg = new HoodieStreamer.Config();
+    cfg.targetTableName = "testTableName";
+    cfg.targetBasePath = "/fake/table/name";
+    cfg.tableType = "MERGE_ON_READ";
+
+    //Source format adapter
+    SourceFormatAdapter sourceFormatAdapter = mock(SourceFormatAdapter.class);
+    SchemaProvider inputBatchSchemaProvider = getSchemaProvider("InputBatch", false);
+    Option<Dataset<Row>> fakeDataFrame = Option.of(mock(Dataset.class));
+    InputBatch<Dataset<Row>> fakeRowInputBatch = new InputBatch<>(fakeDataFrame, "chkpt", inputBatchSchemaProvider);
+    when(sourceFormatAdapter.fetchNewDataInRowFormat(any(), anyLong())).thenReturn(fakeRowInputBatch);
+    //batch is empty because we don't want getBatch().map() to do anything because it calls static method we can't mock
+    InputBatch<JavaRDD<GenericRecord>> fakeAvroInputBatch = new InputBatch<>(Option.empty(), "chkpt", inputBatchSchemaProvider);
+    when(sourceFormatAdapter.fetchNewDataInAvroFormat(any(),anyLong())).thenReturn(fakeAvroInputBatch);
+
+    //transformer
+    //return empty because we don't want .map() to do anything because it calls static method we can't mock
+    when(sourceFormatAdapter.processErrorEvents(any(), any())).thenReturn(Option.empty());
+    Option<Transformer> transformerOption = Option.empty();
+    if (hasTransformer) {
+      transformerOption = Option.of(mock(Transformer.class));
+    }
+
+    //user provided schema provider
+    SchemaProvider schemaProvider = null;
+    if (hasSchemaProvider) {
+      schemaProvider = getSchemaProvider("UserProvided", isNullTargetSchema);
+    }
+
+    //error table
+    TypedProperties props = new TypedProperties();
+    props.put(DataSourceWriteOptions.RECONCILE_SCHEMA().key(), false);
+    Option<BaseErrorTableWriter> errorTableWriterOption = Option.empty();
+    if (hasErrorTable) {
+      errorTableWriterOption = Option.of(mock(BaseErrorTableWriter.class));
+      props.put(ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.key(), true);
+    }
+    TypedProperties propsSpy = spy(props);
+
+
+    //Actually create the deltastreamer
+    StreamSync streamSync = new StreamSync(cfg, sparkSession, propsSpy, hoodieSparkEngineContext,
+        fs, configuration, client -> true, schemaProvider, errorTableWriterOption, sourceFormatAdapter, transformerOption, useRowWriter, false);
+    StreamSync spy = spy(streamSync);
+    SchemaProvider deducedSchemaProvider;
+    deducedSchemaProvider = getSchemaProvider("deduced", false);
+    doReturn(deducedSchemaProvider).when(spy).getDeducedSchemaProvider(any(), any(), any());
+
+    //run the method we are unit testing:
+    InputBatch batch = spy.fetchNextBatchFromSource(Option.empty(), mock(HoodieTableMetaClient.class));
+
+    //make sure getDeducedSchemaProvider is always called once
+    verify(spy, times(1)).getDeducedSchemaProvider(any(), any(), any());
+
+    //make sure the deduced schema is actually used
+    assertEquals(deducedSchemaProvider.getTargetSchema(), batch.getSchemaProvider().getTargetSchema());
+
+    //make sure we use error table when we should
+    verify(propsSpy, shouldTryWriteToErrorTable ? times(1) : never())
+        .getBoolean(HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.key(),
+            HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.defaultValue());
+  }
+
+  private SchemaProvider getSchemaProvider(String name, boolean isNullTargetSchema) {
+    SchemaProvider schemaProvider = mock(SchemaProvider.class);
+    Schema sourceSchema = mock(Schema.class);
+    Schema targetSchema = isNullTargetSchema ? InputBatch.NULL_SCHEMA : mock(Schema.class);
+    when(schemaProvider.getSourceSchema()).thenReturn(sourceSchema);
+    when(schemaProvider.getTargetSchema()).thenReturn(targetSchema);
+    when(sourceSchema.toString()).thenReturn(name + "SourceSchema");
+    if (!isNullTargetSchema) {
+      when(targetSchema.toString()).thenReturn(name + "TargetSchema");
+    }
+    return schemaProvider;
+  }
+
+  static Stream<Arguments> testCasesFetchNextBatchFromSource() {
+    Stream.Builder<Arguments> b = Stream.builder();
+
+    //no transformer
+    for (Boolean useRowWriter : new Boolean[]{false, true}) {
+      for (Boolean hasErrorTable : new Boolean[]{false, true}) {
+        boolean errorTableEnabled = hasErrorTable && !useRowWriter;
+        b.add(Arguments.of(useRowWriter, false, false, false,
+            hasErrorTable, errorTableEnabled));
+      }
+    }
+
+    //with transformer
+    for (Boolean useRowWriter : new Boolean[]{false, true}) {
+      for (Boolean hasSchemaProvider : new Boolean[]{false, true}) {
+        for (Boolean isNullTargetSchema : new Boolean[]{false, true}) {
+          for (Boolean hasErrorTable : new Boolean[]{false, true}) {
+            boolean errorTableEnabled = hasErrorTable && !useRowWriter;
+            boolean schemaProviderNullOrMissing = isNullTargetSchema || !hasSchemaProvider;
+            boolean shouldTryWriteToErrorTable = errorTableEnabled && !schemaProviderNullOrMissing;
+            b.add(Arguments.of(useRowWriter, true, hasSchemaProvider, isNullTargetSchema,
+                hasErrorTable, shouldTryWriteToErrorTable));
+          }
+        }
+      }
+    }
+    return b.build();
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
index 79047794f979..227013b05481 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
@@ -44,6 +44,11 @@ public class JdbcTestUtils {
 
   private static final Logger LOG = LoggerFactory.getLogger(JdbcTestUtils.class);
 
+  public static final String JDBC_URL = "jdbc:h2:mem:test_mem";
+  public static final String JDBC_DRIVER = "org.h2.Driver";
+  public static final String JDBC_USER = "test";
+  public static final String JDBC_PASS = "jdbc";
+
   public static List<HoodieRecord> clearAndInsert(String commitTime, int numRecords, Connection connection, HoodieTestDataGenerator dataGenerator, TypedProperties props)
       throws SQLException {
     execute(connection, "DROP TABLE triprec", "Table does not exists");
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index 0dcba5e5fa27..a200f3a51513 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -158,7 +158,7 @@ public static void initTestServices(boolean needsHdfs, boolean needsHive, boolea
       zookeeperTestService.start();
     }
 
-    jsc = UtilHelpers.buildSparkContext(UtilitiesTestBase.class.getName() + "-hoodie", "local[4]");
+    jsc = UtilHelpers.buildSparkContext(UtilitiesTestBase.class.getName() + "-hoodie", "local[8]");
     context = new HoodieSparkEngineContext(jsc);
     sqlContext = new SQLContext(jsc);
     sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate();
@@ -450,14 +450,14 @@ public static TypedProperties setupSchemaOnDFS() throws IOException {
     public static TypedProperties setupSchemaOnDFS(String scope, String filename) throws IOException {
       UtilitiesTestBase.Helpers.copyToDFS(scope + "/" + filename, fs, basePath + "/" + filename);
       TypedProperties props = new TypedProperties();
-      props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/" + filename);
+      props.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/" + filename);
       return props;
     }
 
     public static TypedProperties setupSchemaOnDFSWithAbsoluteScope(String scope, String filename) throws IOException {
       UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(scope + "/" + filename, fs, basePath + "/" + filename);
       TypedProperties props = new TypedProperties();
-      props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/" + filename);
+      props.setProperty("hoodie.streamer.schemaprovider.source.schema.file", basePath + "/" + filename);
       return props;
     }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractBaseTestSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractBaseTestSource.java
index 56d435ddf0f1..08e73d36bc04 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractBaseTestSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractBaseTestSource.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.RawTripTestPayload;
+import org.apache.hudi.common.util.ConfigUtils;
 import org.apache.hudi.common.util.collection.RocksDBBasedMap;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.utilities.config.SourceTestConfig;
@@ -63,11 +64,10 @@ public static void initDataGen() {
 
   public static void initDataGen(TypedProperties props, int partition) {
     try {
-      boolean useRocksForTestDataGenKeys = props.getBoolean(SourceTestConfig.USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS.key(),
-          SourceTestConfig.USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS.defaultValue());
-      String baseStoreDir = props.getString(SourceTestConfig.ROCKSDB_BASE_DIR_FOR_TEST_DATAGEN_KEYS.key(),
+      boolean useRocksForTestDataGenKeys = ConfigUtils.getBooleanWithAltKeys(props, SourceTestConfig.USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS);
+      String baseStoreDir = ConfigUtils.getStringWithAltKeys(props, SourceTestConfig.ROCKSDB_BASE_DIR_FOR_TEST_DATAGEN_KEYS,
           File.createTempFile("test_data_gen", ".keys").getParent()) + "/" + partition;
-      LOG.info("useRocksForTestDataGenKeys=" + useRocksForTestDataGenKeys + ", BaseStoreDir=" + baseStoreDir);
+      LOG.info("useRocksForTestDataGenKeys={}, BaseStoreDir={}", useRocksForTestDataGenKeys, baseStoreDir);
       dataGeneratorMap.put(partition, new HoodieTestDataGenerator(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS,
           useRocksForTestDataGenKeys ? new RocksDBBasedMap<>(baseStoreDir) : new HashMap<>()));
     } catch (IOException e) {
@@ -106,18 +106,17 @@ protected AbstractBaseTestSource(TypedProperties props, JavaSparkContext sparkCo
 
   protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String instantTime,
       int partition) {
-    int maxUniqueKeys =
-        props.getInteger(SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.key(), SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.defaultValue());
+    int maxUniqueKeys = ConfigUtils.getIntWithAltKeys(props, SourceTestConfig.MAX_UNIQUE_RECORDS_PROP);
 
     HoodieTestDataGenerator dataGenerator = dataGeneratorMap.get(partition);
 
     // generate `sourceLimit` number of upserts each time.
     int numExistingKeys = dataGenerator.getNumExistingKeys(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA);
-    LOG.info("NumExistingKeys=" + numExistingKeys);
+    LOG.info("NumExistingKeys={}", numExistingKeys);
 
     int numUpdates = Math.min(numExistingKeys, sourceLimit / 2);
     int numInserts = sourceLimit - numUpdates;
-    LOG.info("Before adjustments => numInserts=" + numInserts + ", numUpdates=" + numUpdates);
+    LOG.info("Before adjustments => numInserts={}, numUpdates={}", numInserts, numUpdates);
     boolean reachedMax = false;
 
     if (numInserts + numExistingKeys > maxUniqueKeys) {
@@ -134,17 +133,16 @@ protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int
     Stream<GenericRecord> deleteStream = Stream.empty();
     Stream<GenericRecord> updateStream;
     long memoryUsage1 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
-    LOG.info("Before DataGen. Memory Usage=" + memoryUsage1 + ", Total Memory=" + Runtime.getRuntime().totalMemory()
-        + ", Free Memory=" + Runtime.getRuntime().freeMemory());
+    LOG.info("Before DataGen. Memory Usage={}, Total Memory={}, Free Memory={}", memoryUsage1, Runtime.getRuntime().totalMemory(),
+        Runtime.getRuntime().freeMemory());
     if (!reachedMax && numUpdates >= 50) {
-      LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords="
-          + maxUniqueKeys);
+      LOG.info("After adjustments => NumInserts={}, NumUpdates={}, NumDeletes=50, maxUniqueRecords={}", numInserts, (numUpdates - 50), maxUniqueKeys);
       // if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates
       deleteStream = dataGenerator.generateUniqueDeleteRecordStream(instantTime, 50).map(AbstractBaseTestSource::toGenericRecord);
       updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates - 50, HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
           .map(AbstractBaseTestSource::toGenericRecord);
     } else {
-      LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys);
+      LOG.info("After adjustments => NumInserts={}, NumUpdates={}, maxUniqueRecords={}", numInserts, numUpdates, maxUniqueKeys);
       updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates, HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
           .map(AbstractBaseTestSource::toGenericRecord);
     }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractDFSSourceTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractDFSSourceTestBase.java
index 0de087ece73e..76a1a6453670 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractDFSSourceTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/AbstractDFSSourceTestBase.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.utilities.testutils.sources;
 
 import org.apache.hudi.AvroConversionUtils;
+import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
@@ -74,7 +75,11 @@ public void setup() throws Exception {
    *
    * @return A {@link Source} using DFS as the file system.
    */
-  protected abstract Source prepareDFSSource();
+  protected final Source prepareDFSSource() {
+    return prepareDFSSource(new TypedProperties());
+  }
+
+  protected abstract Source prepareDFSSource(TypedProperties props);
 
   /**
    * Writes test data, i.e., a {@link List} of {@link HoodieRecord}, to a file on DFS.
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/DistributedTestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/DistributedTestDataSource.java
index 4bcbdbbe874b..808a8efb8a4e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/DistributedTestDataSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/sources/DistributedTestDataSource.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.utilities.testutils.sources;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ConfigUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.utilities.config.SourceTestConfig;
 import org.apache.hudi.utilities.schema.SchemaProvider;
@@ -46,15 +47,14 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
   public DistributedTestDataSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
       SchemaProvider schemaProvider) {
     super(props, sparkContext, sparkSession, schemaProvider);
-    this.numTestSourcePartitions =
-        props.getInteger(SourceTestConfig.NUM_SOURCE_PARTITIONS_PROP.key(), SourceTestConfig.NUM_SOURCE_PARTITIONS_PROP.defaultValue());
+    this.numTestSourcePartitions = ConfigUtils.getIntWithAltKeys(props, SourceTestConfig.NUM_SOURCE_PARTITIONS_PROP);
   }
 
   @Override
   protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
     int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
     String instantTime = String.format("%05d", nextCommitNum);
-    LOG.info("Source Limit is set to " + sourceLimit);
+    LOG.info("Source Limit is set to {}", sourceLimit);
 
     // No new data.
     if (sourceLimit <= 0) {
@@ -65,15 +65,14 @@ protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkp
     newProps.putAll(props);
 
     // Set the maxUniqueRecords per partition for TestDataSource
-    int maxUniqueRecords =
-        props.getInteger(SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.key(), SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.defaultValue());
+    int maxUniqueRecords = ConfigUtils.getIntWithAltKeys(props, SourceTestConfig.MAX_UNIQUE_RECORDS_PROP);
     String maxUniqueRecordsPerPartition = String.valueOf(Math.max(1, maxUniqueRecords / numTestSourcePartitions));
     newProps.setProperty(SourceTestConfig.MAX_UNIQUE_RECORDS_PROP.key(), maxUniqueRecordsPerPartition);
     int perPartitionSourceLimit = Math.max(1, (int) (sourceLimit / numTestSourcePartitions));
     JavaRDD<GenericRecord> avroRDD =
         sparkContext.parallelize(IntStream.range(0, numTestSourcePartitions).boxed().collect(Collectors.toList()),
             numTestSourcePartitions).mapPartitionsWithIndex((p, idx) -> {
-              LOG.info("Initializing source with newProps=" + newProps);
+              LOG.info("Initializing source with newProps={}", newProps);
               if (!dataGeneratorMap.containsKey(p)) {
                 initDataGen(newProps, p);
               }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlFileBasedTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlFileBasedTransformer.java
index 1b0cc7f52a6d..ea2ce8ed86f9 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlFileBasedTransformer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlFileBasedTransformer.java
@@ -87,7 +87,7 @@ public void testSqlFileBasedTransformerIllegalArguments() {
   public void testSqlFileBasedTransformerIncorrectConfig() {
     // Test if the class throws hoodie IO exception correctly when given a incorrect config.
     props.setProperty(
-        "hoodie.deltastreamer.transformer.sql.file",
+        "hoodie.streamer.transformer.sql.file",
         UtilitiesTestBase.basePath + "/non-exist-sql-file.sql");
     assertThrows(
         HoodieTransformException.class,
@@ -103,7 +103,7 @@ public void testSqlFileBasedTransformerInvalidSQL() throws IOException {
 
     // Test if the SQL file based transformer works as expected for the invalid SQL statements.
     props.setProperty(
-        "hoodie.deltastreamer.transformer.sql.file",
+        "hoodie.streamer.transformer.sql.file",
         UtilitiesTestBase.basePath + "/sql-file-transformer-invalid.sql");
     assertThrows(
         ParseException.class,
@@ -119,7 +119,7 @@ public void testSqlFileBasedTransformerEmptyDataset() throws IOException {
 
     // Test if the SQL file based transformer works as expected for the empty SQL statements.
     props.setProperty(
-        "hoodie.deltastreamer.transformer.sql.file",
+        "hoodie.streamer.transformer.sql.file",
         UtilitiesTestBase.basePath + "/sql-file-transformer-empty.sql");
     Dataset<Row> emptyRow = sqlFileTransformer.apply(jsc, sparkSession, inputDatasetRows, props);
     String[] actualRows = emptyRow.as(Encoders.STRING()).collectAsList().toArray(new String[0]);
@@ -136,7 +136,7 @@ public void testSqlFileBasedTransformer() throws IOException {
 
     // Test if the SQL file based transformer works as expected for the correct input.
     props.setProperty(
-        "hoodie.deltastreamer.transformer.sql.file",
+        "hoodie.streamer.transformer.sql.file",
         UtilitiesTestBase.basePath + "/sql-file-transformer.sql");
     Dataset<Row> transformedRow =
         sqlFileTransformer.apply(jsc, sparkSession, inputDatasetRows, props);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java
index b6fdc2582422..e9f6f9e4fd39 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java
@@ -29,6 +29,7 @@
 
 import java.util.Collections;
 
+import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 
@@ -39,8 +40,7 @@ public void testSqlQuery() {
 
     SparkSession spark = SparkSession
         .builder()
-        .master("local[2]")
-        .appName(TestSqlQueryBasedTransformer.class.getName())
+        .config(getSparkConfForTest(TestSqlQueryBasedTransformer.class.getName()))
         .getOrCreate();
 
     JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@@ -78,7 +78,7 @@ public void testSqlQuery() {
         + "from\n"
         + "\t<SRC>";
     TypedProperties props = new TypedProperties();
-    props.put("hoodie.deltastreamer.transformer.sql", transSql);
+    props.put("hoodie.streamer.transformer.sql", transSql);
 
     // transform
     SqlQueryBasedTransformer transformer = new SqlQueryBasedTransformer();
diff --git a/hudi-utilities/src/test/resources/data/partitioned/country=IND/state=TS/data.json b/hudi-utilities/src/test/resources/data/partitioned/country=IND/state=TS/data.json
new file mode 100644
index 000000000000..9fb29b4dcf47
--- /dev/null
+++ b/hudi-utilities/src/test/resources/data/partitioned/country=IND/state=TS/data.json
@@ -0,0 +1 @@
+{"data": "some data"}
\ No newline at end of file
diff --git a/hudi-utilities/src/test/resources/data/partitioned/country=US/state=TX/data.json b/hudi-utilities/src/test/resources/data/partitioned/country=US/state=TX/data.json
new file mode 100644
index 000000000000..9fb29b4dcf47
--- /dev/null
+++ b/hudi-utilities/src/test/resources/data/partitioned/country=US/state=TX/data.json
@@ -0,0 +1 @@
+{"data": "some data"}
\ No newline at end of file
diff --git a/hudi-utilities/src/test/resources/streamer-config/dfs-source.properties b/hudi-utilities/src/test/resources/streamer-config/dfs-source.properties
index 3a5edb2b6f23..35beefab7b22 100644
--- a/hudi-utilities/src/test/resources/streamer-config/dfs-source.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/dfs-source.properties
@@ -20,8 +20,8 @@ include=base.properties
 hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.partitionpath.field=driver
 # Schema provider props (change to absolute path based on your installation)
-hoodie.deltastreamer.filebased.schemaprovider.source.schema.file=file:///path/to/hoodie/hoodie-utilities/src/main/resources/streamer-props/source.avsc
-hoodie.deltastreamer.filebased.schemaprovider.target.schema.file=file:///path/to/hoodie/hoodie-utilities/src/main/resources/streamer-props/target.avsc
+hoodie.streamer.filebased.schemaprovider.source.schema.file=file:///path/to/hoodie/hoodie-utilities/src/main/resources/streamer-props/source.avsc
+hoodie.streamer.filebased.schemaprovider.target.schema.file=file:///path/to/hoodie/hoodie-utilities/src/main/resources/streamer-props/target.avsc
 # DFS Source
-hoodie.deltastreamer.source.dfs.root=file:///tmp/hoodie-dfs-input
+hoodie.streamer.source.dfs.root=file:///tmp/hoodie-dfs-input
 
diff --git a/hudi-utilities/src/test/resources/streamer-config/invalid_hive_sync_uber_config.properties b/hudi-utilities/src/test/resources/streamer-config/invalid_hive_sync_uber_config.properties
index 5c569c5d0a0d..248de399272e 100644
--- a/hudi-utilities/src/test/resources/streamer-config/invalid_hive_sync_uber_config.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/invalid_hive_sync_uber_config.properties
@@ -18,6 +18,6 @@
 include=base.properties
 hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.partitionpath.field=created_at
-hoodie.deltastreamer.source.kafka.topic=test_topic
-hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
-hoodie.deltastreamer.keygen.timebased.input.dateformat=yyyy-MM-dd
\ No newline at end of file
+hoodie.streamer.source.kafka.topic=test_topic
+hoodie.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.keygen.timebased.input.dateformat=yyyy-MM-dd
\ No newline at end of file
diff --git a/hudi-utilities/src/test/resources/streamer-config/kafka-source.properties b/hudi-utilities/src/test/resources/streamer-config/kafka-source.properties
index e256b8c77fbb..87edb1a1df7d 100644
--- a/hudi-utilities/src/test/resources/streamer-config/kafka-source.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/kafka-source.properties
@@ -20,10 +20,10 @@ include=base.properties
 hoodie.datasource.write.recordkey.field=impressionid
 hoodie.datasource.write.partitionpath.field=userid
 # schema provider configs
-hoodie.deltastreamer.schemaprovider.registry.url=http://localhost:8081/subjects/impressions-value/versions/latest
+hoodie.streamer.schemaprovider.registry.url=http://localhost:8081/subjects/impressions-value/versions/latest
 # Kafka Source
-#hoodie.deltastreamer.source.kafka.topic=uber_trips
-hoodie.deltastreamer.source.kafka.topic=impressions
+#hoodie.streamer.source.kafka.topic=uber_trips
+hoodie.streamer.source.kafka.topic=impressions
 #Kafka props
 bootstrap.servers=localhost:9092
 auto.offset.reset=earliest
diff --git a/hudi-utilities/src/test/resources/streamer-config/short_trip_uber_config.properties b/hudi-utilities/src/test/resources/streamer-config/short_trip_uber_config.properties
index d415e19eb20a..b74f5a080f3d 100644
--- a/hudi-utilities/src/test/resources/streamer-config/short_trip_uber_config.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/short_trip_uber_config.properties
@@ -18,11 +18,11 @@
 include=base.properties
 hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.partitionpath.field=created_at
-hoodie.deltastreamer.source.kafka.topic=topic2
-hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
-hoodie.deltastreamer.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
+hoodie.streamer.source.kafka.topic=topic2
+hoodie.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
 hoodie.datasource.hive_sync.table=short_trip_uber_hive_dummy_table
 hoodie.datasource.write.keygenerator.class=org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer$TestGenerator
-hoodie.deltastreamer.schemaprovider.registry.baseUrl=http://localhost:8081/subjects/
-hoodie.deltastreamer.schemaprovider.registry.urlSuffix=-value/versions/latest
-hoodie.deltastreamer.transformer.class=org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer$TestIdentityTransformer
+hoodie.streamer.schemaprovider.registry.baseUrl=http://localhost:8081/subjects/
+hoodie.streamer.schemaprovider.registry.urlSuffix=-value/versions/latest
+hoodie.streamer.transformer.class=org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer$TestIdentityTransformer
diff --git a/hudi-utilities/src/test/resources/streamer-config/sql-transformer.properties b/hudi-utilities/src/test/resources/streamer-config/sql-transformer.properties
index 9172337d0389..9bfbd889de98 100644
--- a/hudi-utilities/src/test/resources/streamer-config/sql-transformer.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/sql-transformer.properties
@@ -16,4 +16,4 @@
 # limitations under the License.
 ###
 include=base.properties
-hoodie.deltastreamer.transformer.sql=SELECT a.timestamp, a._row_key, a.partition_path, a.trip_type, a.rider, a.driver, a.begin_lat, a.begin_lon, a.end_lat, a.end_lon, a.distance_in_meters, a.seconds_since_epoch, a.weight, a.nation, a.current_date, a.current_ts, a.height, a.city_to_state, a.fare, a.tip_history, a.`_hoodie_is_deleted`, CAST(1.0 AS DOUBLE) AS haversine_distance FROM <SRC> a
+hoodie.streamer.transformer.sql=SELECT a.timestamp, a._row_key, a.partition_path, a.trip_type, a.rider, a.driver, a.begin_lat, a.begin_lon, a.end_lat, a.end_lon, a.distance_in_meters, a.seconds_since_epoch, a.weight, a.nation, a.current_date, a.current_ts, a.height, a.city_to_state, a.fare, a.tip_history, a.`_hoodie_is_deleted`, CAST(1.0 AS DOUBLE) AS haversine_distance FROM <SRC> a
diff --git a/hudi-utilities/src/test/resources/streamer-config/uber_config.properties b/hudi-utilities/src/test/resources/streamer-config/uber_config.properties
index f5b079265d43..a8e278249e86 100644
--- a/hudi-utilities/src/test/resources/streamer-config/uber_config.properties
+++ b/hudi-utilities/src/test/resources/streamer-config/uber_config.properties
@@ -18,10 +18,10 @@
 include=base.properties
 hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.partitionpath.field=created_at
-hoodie.deltastreamer.source.kafka.topic=topic1
-hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
-hoodie.deltastreamer.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
+hoodie.streamer.source.kafka.topic=topic1
+hoodie.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
 hoodie.datasource.hive_sync.database=uber_hive_db
 hoodie.datasource.hive_sync.table=uber_hive_dummy_table
-hoodie.deltastreamer.schemaprovider.registry.url=http://localhost:8081/subjects/random-value/versions/latest
-hoodie.deltastreamer.schemaprovider.registry.targetUrl=http://localhost:8081/subjects/random-value/versions/latest
\ No newline at end of file
+hoodie.streamer.schemaprovider.registry.url=http://localhost:8081/subjects/random-value/versions/latest
+hoodie.streamer.schemaprovider.registry.targetUrl=http://localhost:8081/subjects/random-value/versions/latest
\ No newline at end of file
diff --git a/packaging/hudi-aws-bundle/pom.xml b/packaging/hudi-aws-bundle/pom.xml
index 1ab4d9c82ede..f55a97d67e1a 100644
--- a/packaging/hudi-aws-bundle/pom.xml
+++ b/packaging/hudi-aws-bundle/pom.xml
@@ -139,9 +139,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 
diff --git a/packaging/hudi-datahub-sync-bundle/pom.xml b/packaging/hudi-datahub-sync-bundle/pom.xml
index 460c48d7258b..f9f4c813eecd 100644
--- a/packaging/hudi-datahub-sync-bundle/pom.xml
+++ b/packaging/hudi-datahub-sync-bundle/pom.xml
@@ -115,9 +115,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index 23a21371d6d3..064ce639e818 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -127,6 +127,8 @@
                   <include>io.prometheus:simpleclient_dropwizard</include>
                   <include>io.prometheus:simpleclient_pushgateway</include>
                   <include>io.prometheus:simpleclient_common</include>
+                  <include>com.uber.m3:tally-m3</include>
+                  <include>com.uber.m3:tally-core</include>
 
                   <!-- Used for HUDI TimelineService -->
                   <include>org.eclipse.jetty:*</include>
@@ -194,10 +196,6 @@
                   <pattern>com.beust.jcommander.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.beust.jcommander.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>com.codahale.metrics.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}com.codahale.metrics.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.commons.codec.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}org.apache.commons.codec.</shadedPattern>
@@ -214,6 +212,10 @@
                   <pattern>org.openjdk.jol.</pattern>
                   <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.uber.m3.</pattern>
+                  <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -241,9 +243,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml
index eeb1ce7ff842..01e722e85926 100644
--- a/packaging/hudi-gcp-bundle/pom.xml
+++ b/packaging/hudi-gcp-bundle/pom.xml
@@ -39,7 +39,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>25.1.0</version>
+        <version>${gcp-libraries-bom.version}</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
@@ -139,9 +139,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index c95a0c0742ac..1994b9951885 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -160,9 +160,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml
index a042c9439934..580b4e96eaa0 100644
--- a/packaging/hudi-hive-sync-bundle/pom.xml
+++ b/packaging/hudi-hive-sync-bundle/pom.xml
@@ -136,9 +136,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml
index 5b3974ad08de..a21b0ac110a2 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -164,6 +164,8 @@
                   <include>io.prometheus:simpleclient_dropwizard</include>
                   <include>io.prometheus:simpleclient_pushgateway</include>
                   <include>io.prometheus:simpleclient_common</include>
+                  <include>com.uber.m3:tally-m3</include>
+                  <include>com.uber.m3:tally-core</include>
                   <include>org.openjdk.jol:jol-core</include>
                 </includes>
               </artifactSet>
@@ -272,6 +274,10 @@
                   <pattern>org.eclipse.jetty.</pattern>
                   <shadedPattern>org.apache.hudi.org.eclipse.jetty.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.uber.m3.</pattern>
+                  <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml
index bfbafc7672fc..3cc55e6dbeaf 100644
--- a/packaging/hudi-kafka-connect-bundle/pom.xml
+++ b/packaging/hudi-kafka-connect-bundle/pom.xml
@@ -125,6 +125,8 @@
                                     <include>io.prometheus:simpleclient_dropwizard</include>
                                     <include>io.prometheus:simpleclient_pushgateway</include>
                                     <include>io.prometheus:simpleclient_common</include>
+                                    <include>com.uber.m3:tally-m3</include>
+                                    <include>com.uber.m3:tally-core</include>
                                     <include>com.google.protobuf:protobuf-java</include>
 
                                     <include>org.scala-lang:*</include>
@@ -182,6 +184,10 @@
                                     <pattern>com.fasterxml.jackson.</pattern>
                                     <shadedPattern>org.apache.hudi.com.fasterxml.jackson.</shadedPattern>
                                 </relocation>
+                                <relocation>
+                                    <pattern>com.uber.m3.</pattern>
+                                    <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                                </relocation>
                             </relocations>
                             <filters>
                                 <filter>
@@ -206,9 +212,6 @@
             <resource>
                 <directory>src/main/resources</directory>
             </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
         </resources>
     </build>
 
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index e498a459782b..275d7ae8e4e5 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -173,9 +173,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index 58e2d802e64a..9f60db19e029 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -112,6 +112,9 @@
                   <include>io.prometheus:simpleclient_dropwizard</include>
                   <include>io.prometheus:simpleclient_pushgateway</include>
                   <include>io.prometheus:simpleclient_common</include>
+                  <include>com.uber.m3:tally-m3</include>
+                  <include>com.uber.m3:tally-core</include>
+
                   <include>com.yammer.metrics:metrics-core</include>
 
                   <include>org.apache.hive:hive-common</include>
@@ -201,6 +204,10 @@
                   <pattern>org.roaringbitmap.</pattern>
                   <shadedPattern>org.apache.hudi.org.roaringbitmap.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.uber.m3.</pattern>
+                  <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -228,9 +235,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml
index b56f86cde7d4..f1d7e685dbb0 100644
--- a/packaging/hudi-trino-bundle/pom.xml
+++ b/packaging/hudi-trino-bundle/pom.xml
@@ -166,9 +166,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index 7bd698580cad..b992e5bbeb8c 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -141,6 +141,8 @@
                   <include>io.prometheus:simpleclient_dropwizard</include>
                   <include>io.prometheus:simpleclient_pushgateway</include>
                   <include>io.prometheus:simpleclient_common</include>
+                  <include>com.uber.m3:tally-m3</include>
+                  <include>com.uber.m3:tally-core</include>
                   <include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
                   <include>org.apache.spark:spark-token-provider-kafka-0-10_${scala.binary.version}</include>
                   <include>org.apache.kafka:kafka_${scala.binary.version}</include>
@@ -237,6 +239,10 @@
                   <pattern>org.roaringbitmap.</pattern>
                   <shadedPattern>org.apache.hudi.org.roaringbitmap.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.uber.m3.</pattern>
+                  <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -261,9 +267,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml
index ed10d2ac8773..3919b103465c 100644
--- a/packaging/hudi-utilities-slim-bundle/pom.xml
+++ b/packaging/hudi-utilities-slim-bundle/pom.xml
@@ -127,6 +127,8 @@
                   <include>io.prometheus:simpleclient_dropwizard</include>
                   <include>io.prometheus:simpleclient_pushgateway</include>
                   <include>io.prometheus:simpleclient_common</include>
+                  <include>com.uber.m3:tally-m3</include>
+                  <include>com.uber.m3:tally-core</include>
                   <include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
                   <include>org.apache.spark:spark-token-provider-kafka-0-10_${scala.binary.version}</include>
                   <include>org.apache.kafka:kafka_${scala.binary.version}</include>
@@ -196,6 +198,10 @@
                   <pattern>com.google.protobuf.</pattern>
                   <shadedPattern>org.apache.hudi.com.google.protobuf.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.uber.m3.</pattern>
+                  <shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -220,9 +226,6 @@
       <resource>
         <directory>src/main/resources</directory>
       </resource>
-      <resource>
-        <directory>src/test/resources</directory>
-      </resource>
     </resources>
   </build>
 
diff --git a/pom.xml b/pom.xml
index 3eeed340178b..34c741f3466e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -113,7 +113,7 @@
     <junit.jupiter.version>5.7.2</junit.jupiter.version>
     <junit.vintage.version>5.7.2</junit.vintage.version>
     <junit.platform.version>1.7.2</junit.platform.version>
-    <mockito.jupiter.version>3.3.3</mockito.jupiter.version>
+    <mockito.jupiter.version>3.12.4</mockito.jupiter.version>
     <log4j2.version>2.17.2</log4j2.version>
     <slf4j.version>1.7.36</slf4j.version>
     <joda.version>2.9.9</joda.version>
@@ -130,6 +130,7 @@
     <orc.flink.version>1.5.6</orc.flink.version>
     <roaringbitmap.version>0.9.47</roaringbitmap.version>
     <airlift.version>0.25</airlift.version>
+    <tally.version>0.13.0</tally.version>
     <prometheus.version>0.8.0</prometheus.version>
     <aws.sdk.httpclient.version>4.5.13</aws.sdk.httpclient.version>
     <aws.sdk.httpcore.version>4.4.13</aws.sdk.httpcore.version>
@@ -166,7 +167,7 @@
     <spark32.version>3.2.3</spark32.version>
     <spark33.version>3.3.1</spark33.version>
     <spark34.version>3.4.1</spark34.version>
-    <spark35.version>3.5.0</spark35.version>
+    <spark35.version>3.5.1</spark35.version>
     <hudi.spark.module>hudi-spark3.2.x</hudi.spark.module>
     <!-- NOTE: Different Spark versions might require different number of shared
                modules being incorporated, hence we're creating multiple placeholders
@@ -224,6 +225,7 @@
     <zookeeper.version>3.5.7</zookeeper.version>
     <openjdk.jol.version>0.16</openjdk.jol.version>
     <google.cloud.pubsub.version>1.120.0</google.cloud.pubsub.version>
+    <gcp-libraries-bom.version>26.15.0</gcp-libraries-bom.version>
     <gcs.connector.version>hadoop2-2.2.7</gcs.connector.version>
     <dynamodb-local.port>8000</dynamodb-local.port>
     <dynamodb-local.endpoint>http://localhost:${dynamodb-local.port}</dynamodb-local.endpoint>
@@ -480,6 +482,7 @@
               <include>org.apache.htrace:htrace-core4</include>
               <!-- afterburner module for jackson performance -->
               <include>com.fasterxml.jackson.module:jackson-module-afterburner</include>
+              <include>com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version}</include>
               <!-- native HFile reader uses protobuf -->
               <include>com.google.protobuf:protobuf-java</include>
             </includes>
@@ -1109,7 +1112,6 @@
         <artifactId>metrics-jmx</artifactId>
         <version>${metrics.version}</version>
       </dependency>
-
       <dependency>
         <groupId>io.prometheus</groupId>
         <artifactId>simpleclient</artifactId>
@@ -1130,6 +1132,16 @@
         <artifactId>simpleclient_pushgateway</artifactId>
         <version>${prometheus.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.uber.m3</groupId>
+        <artifactId>tally-m3</artifactId>
+        <version>${tally.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.uber.m3</groupId>
+        <artifactId>tally-core</artifactId>
+        <version>${tally.version}</version>
+      </dependency>
 
       <dependency>
         <groupId>com.beust</groupId>
diff --git a/rfc/rfc-60/read_flow.png b/rfc/rfc-60/read_flow.png
new file mode 100644
index 000000000000..4ef464f41e74
Binary files /dev/null and b/rfc/rfc-60/read_flow.png differ
diff --git a/rfc/rfc-60/rfc-60.md b/rfc/rfc-60/rfc-60.md
index d509aec1f208..bdfaa58b8990 100644
--- a/rfc/rfc-60/rfc-60.md
+++ b/rfc/rfc-60/rfc-60.md
@@ -15,7 +15,7 @@
   limitations under the License.
 -->
 
-# RFC-60: Federated Storage Layer
+# RFC-60: Federated Storage Layout
 
 ## Proposers
 - @umehrot2
@@ -52,7 +52,10 @@ but there can be a 30 - 60 minute wait time before new partitions are created. T
 same table path prefix could result in these request limits being hit for the table prefix, specially as workloads
 scale, and there are several thousands of files being written/updated concurrently. This hurts performance due to
 re-trying of failed requests affecting throughput, and result in occasional failures if the retries are not able to
-succeed either and continue to be throttled.
+succeed either and continue to be throttled. Note an exception would be non-partitioned tables 
+reside directly under S3 buckets (using S3 buckets as their table paths), and those tables would be free
+from the throttling problem. However, this exception cannot invalidate the necessity of addressing the throttling 
+problem for partitioned tables.
 
 The traditional storage layout also tightly couples the partitions as folders under the table path. However,
 some users want flexibility to be able to distribute files/partitions under multiple different paths across cloud stores,
@@ -97,22 +100,21 @@ public interface HoodieStorageStrategy extends Serializable {
 }
 ```
 
-### Generating file paths for object store optimized layout
+### Generating File Paths for Object Store Optimized Layout
 
 We want to distribute files evenly across multiple random prefixes, instead of following the traditional Hive storage
 layout of keeping them under a common table path/prefix. In addition to the `Table Path`, for this new layout user will
 configure another `Table Storage Path` under which the actual data files will be distributed. The original `Table Path` will
 be used to maintain the table/partitions Hudi metadata.
 
-For the purpose of this documentation lets assume:
+For the purpose of this documentation let's assume:
 ```
 Table Path => s3://<table_bucket>/<hudi_table_name>/
 
 Table Storage Path => s3://<table_storage_bucket>/
 ```
-Note: `Table Storage Path` can be a path in the same Amazon S3 bucket or a different bucket. For best results,
-`Table Storage Path` should be a top-level bucket instead of a prefix under the bucket to avoid multiple 
-tables sharing the prefix.
+`Table Storage Path` should be a top-level bucket instead of a prefix under the bucket for the best results.
+So that we can avoid multiple tables sharing the prefix causing throttling.
 
 We will use a Hashing function on the `Partition Path/File ID` to map them to a prefix generated under `Table Storage Path`:
 ```
@@ -148,7 +150,7 @@ s3://<table_storage_bucket>/0bfb3d6e/<hudi_table_name>/.075f3295-def8-4a42-a927-
 ...
 ```
 
-Note: Storage strategy would only return a storage location instead of a full path. In the above example,
+Storage strategy would only return a storage location instead of a full path. In the above example,
 the storage location is `s3://<table_storage_bucket>/0bfb3d6e/`, and the lower-level folder structure would be appended
 later automatically to get the actual file path. In another word, 
 users would only be able to customize upper-level folder structure (storage location). 
@@ -176,7 +178,7 @@ The hashing function should be made user configurable for use cases like bucketi
 sub-partitioning/re-hash to reduce the number of hash prefixes. Having too many unique hash prefixes
 would make files too dispersed, and affect performance on other operations such as listing.
 
-### Maintain mapping to files
+### Maintaining Mapping to Files with Metadata Table
 
 In [RFC-15](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=147427331), we introduced an internal
 Metadata Table with a `files` partition that maintains mapping from partitions to list of files in the partition stored
@@ -196,13 +198,75 @@ for metadata table to be populated.
 
 4. If there is an error reading from Metadata table, we will not fall back listing from file system.
 
-5. In case of metadata table getting corrupted or lost, we need to have a solution here to reconstruct metadata table
-from the files which distributed using federated storage. We will likely have to implement a file system listing
-logic, that can get all the partition to files mapping by listing all the prefixes under the `Table Storage Path`.
-Following the folder structure of adding table name/partitions under the prefix will help in getting the listing and
-identifying the table/partition they belong to.
+### Integration
+This section mainly describes how storage strategy is integrated with other components and how read/write
+would look like from Hudi side with object storage layout.
+
+We propose integrating the storage strategy at the filesystem level, specifically within `HoodieWrapperFileSystem`. 
+This way, only file read/write operations undergo path conversion and we can limit the usage of 
+storage strategy to only filesystem level so other upper-level components don't need to be aware of physical paths.
+
+This also mandates that `HoodieWrapperFileSystem` is the filesystem of choice for all upper-level Hudi components.
+Getting filesystem from `Path` or such won't be allowed anymore as using raw filesystem may not reach 
+to physical locations without storage strategy. Hudi components can simply call `HoodieMetaClient#getFs` 
+to get `HoodieWrapperFileSystem`, and this needs to be the only allowed way for any filesystem-related operation. 
+The only exception is when we need to interact with metadata that's still stored under the original table path, 
+and we should call `HoodieMetaClient#getRawFs` in this case so `HoodieMetaClient` can still be the single entry
+for getting filesystem.
+
+![](wrapper_fs.png)
+
+When conducting a read operation, Hudi would: 
+1. Access filesystem view, `HoodieMetadataFileSystemView` specifically
+2. Scan metadata table via filesystem view to compose `HoodieMetadataPayload`
+3. Call `HoodieMetadataPayload#getFileStatuses` and employ `HoodieWrapperFileSystem` to get 
+file statuses with physical locations
+
+This flow can be concluded in the chart below.
+
+![](read_flow.png)
+
+#### Considerations
+- Path conversion happens on the fly when reading/writing files. This saves Hudi from storing physical locations,
+and adds the cost of hashing, but the performance burden should be negligible.
+- Since table path and data path will most likely have different top-level folders/authorities,
+`HoodieWrapperFileSystem` should maintain at least two `FileSystem` objects: one to access table path and another
+to access storage path. `HoodieWrapperFileSystem` should intelligently tell if it needs
+to convert the path by checking the path on the fly.
+- When using Hudi file reader/writer implementation, we will need to pass `HoodieWrapperFileSystem` down
+to parent reader. For instance, when using `HoodieAvroHFileReader`, we will need to pass `HoodieWrapperFileSystem`
+to `HFile.Reader` so it can have access to storage strategy. If reader/writer doesn't take filesystem
+directly (e.g. `ParquetFileReader` only takes `Configuration` and `Path` for reading), then we will
+need to register `HoodieWrapperFileSystem` to `Configuration` so it can be initialized/used later.
+
+### Repair Tool
+In case of metadata table getting corrupted or lost, we need to have a solution here to reconstruct metadata table
+from the files that are distributed using federated storage. We will need a repair tool 
+to get all the partition to files mapping by listing all the prefixes under the `Table Storage Path` 
+and then reconstruct metadata table.
+
+In Hudi we already have `HoodieBackedTableMetadataWriter` to list existing data files to initialize/construct
+metadata table. We can extract the logic of listing files and get partition info to a new method `getPartitionInfo`, 
+and then extend `HoodieBackedTableMetadataWriter` and override `getPartitionInfo` so
+for repair tool it can list data files stored under storage path instead of table path.
 
-### Query Side Integration
+```java
+ public class StorageRepairMetadataWriter extends SparkHoodieBackedTableMetadataWriter {
+    <T extends SpecificRecordBase> StorageRepairMetadataWriter(Configuration hadoopConf,
+                                                               HoodieWriteConfig writeConfig,
+                                                               HoodieEngineContext engineContext,
+                                                               Option<String> inflightInstantTimestamp) {
+      super(hadoopConf, writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, engineContext, inflightInstantTimestamp);
+    }
+
+    @Override
+    protected Map<String, Map<String, Long>> getPartitionToFilesMap() {
+      return listFilesUnderStoragePath();
+    }
+  }
+```
+
+### Query Engine Side Integration
 
 Spark, Hive, [Presto](https://github.com/prestodb/presto/commit/ef1fd25c582631513ccdd097e0a654cda44ec3dc), 
 and [Trino](https://github.com/trinodb/trino/pull/10228) are already integrated to use metadata based listing.
@@ -224,4 +288,7 @@ should not be user's responsibility to enable metadata listing from query engine
 - We need a tool to bootstrap existing Hudi table to switch to another storage strategy.
 - Partition-level storage strategy: Each partition can have its own storage strategy for users to have
 finer grasp on how data is stored. It would also make new storage strategies more accessible for
-existing Hudi tables as they would only need to re-construct the metadata table.
\ No newline at end of file
+existing Hudi tables as they would only need to re-construct the metadata table.
+- For the first cut, we would only have 2 `FileSystem` objects in `HoodieWrapperFileSystem`, and this
+prevents users from distributing their data across multiple different buckets. We'll need to support
+this in the future.
\ No newline at end of file
diff --git a/rfc/rfc-60/wrapper_fs.png b/rfc/rfc-60/wrapper_fs.png
new file mode 100644
index 000000000000..179d41b9c296
Binary files /dev/null and b/rfc/rfc-60/wrapper_fs.png differ
diff --git a/rfc/rfc-76/rfc-76.md b/rfc/rfc-76/rfc-76.md
index 1ddc107b5ce7..e9f176f1d5f7 100644
--- a/rfc/rfc-76/rfc-76.md
+++ b/rfc/rfc-76/rfc-76.md
@@ -61,7 +61,7 @@ Let's consider following scenario: while persisting the dataset, writing one of
 To provide for aforementioned requirement of the records obtaining globally unique synthetic keys either of the 2 following properties have to hold true:
 Key generation has to be deterministic and reproducible (so that upon Spark retries we could be certain same records will be obtaining the identity value they did during previous pass)
 Records have to be getting globally unique identity value every time (such that key collisions are simply impossible)
-Note that, deterministic and reproducible identity value association is is only feasible for the incoming datasets represented as "determinate" RDDs. However, It's worth pointing out that other RDD classes (such as "unordered", "indeterminate") are very rare occurrences involving some inherent non-determinism (varying content, order, etc), and pose challenges in terms of their respective handling by Hudi even w/o auto-generation (for ex, for such RDDs Hudi can't provide for uniqueness guarantee even for "insert" operation in the presence of failures).
+Note that, deterministic and reproducible identity value association is only feasible for the incoming datasets represented as "determinate" RDDs. However, It's worth pointing out that other RDD classes (such as "unordered", "indeterminate") are very rare occurrences involving some inherent non-determinism (varying content, order, etc), and pose challenges in terms of their respective handling by Hudi even w/o auto-generation (for ex, for such RDDs Hudi can't provide for uniqueness guarantee even for "insert" operation in the presence of failures).
 For achieving our goal of providing globally unique keys we're planning on relying on the following synthetic key format comprised of 2 components
 (Reserved) Commit timestamp: Use reserved commit timestamp as prefix (to provide for global uniqueness of rows)
 Row id: unique identifier of the row (record) w/in the provided batch
diff --git a/scripts/ci/move_surefire_reports.sh b/scripts/ci/move_surefire_reports.sh
new file mode 100755
index 000000000000..a4b9b2869bda
--- /dev/null
+++ b/scripts/ci/move_surefire_reports.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Check if two arguments were provided
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <source_directory> <destination_directory>"
+    exit 1
+fi
+
+# Assign the first and second argument to SOURCE and DEST variables
+SOURCE="$1"
+DEST="$2"
+
+# Ensure the source directory exists
+if [ ! -d "$SOURCE" ]; then
+    echo "Source directory does not exist: $SOURCE"
+    exit 1
+fi
+
+# Create the destination directory if it doesn't exist
+if [ ! -d "$DEST" ]; then
+    mkdir -p "$DEST"
+fi
+
+find "$SOURCE" -type f -name "TEST-*.xml" | while IFS= read -r file; do
+    # Extract the relative directory path
+    relative_path="${file#$SOURCE}"
+    destination_path="$DEST$relative_path"
+    destination_dir=$(dirname "$destination_path")
+
+    if [[ "$relative_path" == *"scripts/ci"* ]]; then
+        continue # Skip this file
+    fi
+
+    # Create the destination directory if it doesn't exist
+    mkdir -p "$destination_dir"
+
+    # Move the file to the new location, preserving the directory structure
+    mv "$file" "$destination_path"
+done
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
index af7d9454f70f..dcd3c4c0caf4 100644
--- a/scripts/pr_compliance.py
+++ b/scripts/pr_compliance.py
@@ -108,7 +108,7 @@ def test_title():
 #                                                                             #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #                                            
     
-#Enums for the the outcome of parsing a single line
+#Enums for the outcome of parsing a single line
 class Outcomes:
     #error was found so we should stop parsing and exit with error
     ERROR = 0
@@ -389,21 +389,29 @@ def validate(self):
 #Generate the validator for the current template.
 #needs to be manually updated
 def make_default_validator(body, debug=False):
-    changelogs = ParseSectionData("CHANGELOGS",
+    changelogs = ParseSectionData("CHANGE_LOGS",
         "### Change Logs",
         {"_Describe context and summary for this change. Highlight if any code was copied._"})
     impact = ParseSectionData("IMPACT",
         "### Impact",
         {"_Describe any public API or user-facing feature change or any performance impact._"})
-    risklevel = RiskLevelData("RISKLEVEL",
+    risklevel = RiskLevelData("RISK_LEVEL",
         "### Risk level",
         {"_If medium or high, explain what verification was done to mitigate the risks._"})
+    docsUpdate = ParseSectionData("DOCUMENTATION_UPDATE",
+        "### Documentation Update",
+        {"_Describe any necessary documentation update if there is any new feature, config, or user-facing change_",
+        "",
+        "- _The config description must be updated if new configs are added or the default value of the configs are changed. If not, put \"none\"._",
+        "- _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the",
+        "  ticket number here and follow the [instruction](https://hudi.apache.org/contribute/developer-setup#website) to make",
+        "  changes to the website._"})
     checklist = ParseSectionData("CHECKLIST",
         "### Contributor's checklist",
         {})
-    parseSections = ParseSections([changelogs, impact, risklevel, checklist])
+    parseSections = ParseSections([changelogs, impact, risklevel, docsUpdate, checklist])
 
-    return ValidateBody(body, "CHANGELOGS", parseSections, debug)
+    return ValidateBody(body, "CHANGE_LOGS", parseSections, debug)
 
 
 #takes a list of strings and returns a string of those lines separated by \n
@@ -466,6 +474,21 @@ def test_body():
     good_risklevel = template_risklevel.copy()
     good_risklevel[1] = "none"
 
+    template_docs_update = [
+        "### Documentation Update",
+        "",
+        "_Describe any necessary documentation update if there is any new feature, config, or user-facing change_",
+        "",
+        "- _The config description must be updated if new configs are added or the default value of the configs are changed. If not, put \"none\"._",
+        "- _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the",
+        "  ticket number here and follow the [instruction](https://hudi.apache.org/contribute/developer-setup#website) to make",
+        "  changes to the website._",
+        ""
+    ]
+
+    good_docs_update = template_docs_update.copy()
+    good_docs_update[1] = "update docs"
+
     template_checklist = [
         "### Contributor's checklist",
         "",
@@ -476,10 +499,10 @@ def test_body():
     ]
 
     #list of sections that when combined form a valid body
-    good_sections = [good_changelogs, good_impact, good_risklevel, template_checklist]
+    good_sections = [good_changelogs, good_impact, good_risklevel, good_docs_update, template_checklist]
 
     #list of sections that when combined form the template
-    template_sections = [template_changelogs, template_impact, template_risklevel, template_checklist]
+    template_sections = [template_changelogs, template_impact, template_risklevel, template_docs_update, template_checklist]
 
     tests_passed = True
     #Test section not filled out
@@ -532,9 +555,6 @@ def test_body():
     return tests_passed
 
 
-
-
-
 if __name__ == '__main__':
     if len(sys.argv) > 1:
         title_tests = test_title()