From d5eddf52ab706bc68799b92ca10b183871a7a9a0 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro.cls93@gmail.com>
Date: Thu, 2 Dec 2021 20:42:43 +0000
Subject: [PATCH 1/6] fix(test): Adds docker engine configuration checks before
 running docker-based tests for metadata-io (#3654)

---
 .../linkedin/metadata/DockerTestUtils.java    | 21 +++++++++++++++++++
 .../graph/DgraphGraphServiceTest.java         |  3 ++-
 .../graph/ElasticSearchGraphServiceTest.java  |  2 ++
 .../metadata/search/SearchServiceTest.java    |  2 ++
 .../ElasticSearchServiceTest.java             |  2 ++
 ...lasticSearchSystemMetadataServiceTest.java |  2 ++
 ...sticSearchTimeseriesAspectServiceTest.java |  2 ++
 7 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/DockerTestUtils.java

diff --git a/metadata-io/src/test/java/com/linkedin/metadata/DockerTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/DockerTestUtils.java
new file mode 100644
index 0000000000000..364ccd86d45fd
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/DockerTestUtils.java
@@ -0,0 +1,21 @@
+package com.linkedin.metadata;
+
+import com.github.dockerjava.api.DockerClient;
+
+public class DockerTestUtils {
+
+    final private static int MIN_MEMORY_NEEDED_GB = 7;
+
+    public static void checkContainerEngine(DockerClient dockerClient) {
+        final long dockerEngineMemoryBytes = dockerClient.infoCmd().exec().getMemTotal();
+        final long dockerEngineMemoryGB = dockerEngineMemoryBytes / 1000 / 1000 / 1000;
+        if (dockerEngineMemoryGB < MIN_MEMORY_NEEDED_GB) {
+            final String error = String.format("Total Docker memory configured: %s GB (%d bytes) is below the minimum threshold "
+                    + "of %d GB", dockerEngineMemoryGB, dockerEngineMemoryBytes, MIN_MEMORY_NEEDED_GB);
+            throw new IllegalStateException(error);
+        }
+    }
+
+    private DockerTestUtils() {
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java
index 894cb00a103ce..249a3b31b1857 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java
@@ -27,6 +27,7 @@
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER;
 import static com.linkedin.metadata.search.utils.QueryUtils.newFilter;
 import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter;
@@ -52,8 +53,8 @@ public void setup() {
                 .withTmpFs(Collections.singletonMap("/dgraph", "rw,noexec,nosuid,size=1g"))
                 .withStartupTimeout(Duration.ofMinutes(1))
                 .withStartupAttempts(3);
+        checkContainerEngine(_container.getDockerClient());
         _container.start();
-
         Slf4jLogConsumer logConsumer = new Slf4jLogConsumer(log);
         _container.followOutput(logConsumer);
     }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
index af76422f3a98e..81c03bf4b21fb 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
@@ -27,6 +27,7 @@
 import java.util.HashSet;
 import java.util.List;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static org.testng.Assert.assertEquals;
 
 import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
@@ -45,6 +46,7 @@ public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
   @BeforeTest
   public void setup() {
     _elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
+    checkContainerEngine(_elasticsearchContainer.getDockerClient());
     _elasticsearchContainer.start();
     _searchClient = buildRestClient();
     _client = buildService();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
index f99d1a6ade0a0..f9836273ce434 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
@@ -32,6 +32,7 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 
@@ -57,6 +58,7 @@ public void setup() {
     _indexConvention = new IndexConventionImpl(null);
     _elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
     _settingsBuilder = new SettingsBuilder(Collections.emptyList());
+    checkContainerEngine(_elasticsearchContainer.getDockerClient());
     _elasticsearchContainer.start();
     _searchClient = buildRestClient();
     _elasticSearchService = buildEntitySearchService();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
index a6ce63fb5c401..ed0fa2c60281b 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
@@ -30,6 +30,7 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 
@@ -53,6 +54,7 @@ public void setup() {
     _indexConvention = new IndexConventionImpl(null);
     _elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
     _settingsBuilder = new SettingsBuilder(Collections.emptyList());
+    checkContainerEngine(_elasticsearchContainer.getDockerClient());
     _elasticsearchContainer.start();
     _searchClient = buildRestClient();
     _elasticSearchService = buildService();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
index 9b0154d4d7d56..82e6936b9fe48 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
@@ -18,6 +18,7 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
 import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME;
 import static org.testng.Assert.*;
@@ -36,6 +37,7 @@ public class ElasticSearchSystemMetadataServiceTest {
   @BeforeTest
   public void setup() {
     _elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
+    checkContainerEngine(_elasticsearchContainer.getDockerClient());
     _elasticsearchContainer.start();
     _searchClient = buildRestClient();
     _client = buildService();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
index 445bcaaa66935..0813414d133f1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
@@ -50,6 +50,7 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.ElasticSearchTestUtils.*;
 import static org.testng.Assert.*;
 
@@ -89,6 +90,7 @@ public void setup() {
         TestEntityProfile.class.getClassLoader().getResourceAsStream("test-entity-registry.yml"));
     _indexConvention = new IndexConventionImpl(null);
     _elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
+    checkContainerEngine(_elasticsearchContainer.getDockerClient());
     _elasticsearchContainer.start();
     _searchClient = buildRestClient();
     _elasticSearchTimeseriesAspectService = buildService();

From 8757543be813d46485098793198cab7e6420af72 Mon Sep 17 00:00:00 2001
From: Dexter Lee <dexter@acryl.io>
Date: Thu, 2 Dec 2021 20:22:19 -0800
Subject: [PATCH 2/6] refactor: remove unused PDL files (#3659)

---
 .../main/java/com/linkedin/mxe/Configs.java   |   4 -
 .../linkedin/metadata/entity/BaseEntity.pdl   |  12 ---
 .../linkedin/metadata/entity/ChartEntity.pdl  |  24 -----
 .../metadata/entity/CorpGroupEntity.pdl       |  19 ----
 .../metadata/entity/CorpUserEntity.pdl        |  19 ----
 .../metadata/entity/DashboardEntity.pdl       |  24 -----
 .../metadata/entity/DataFlowEntity.pdl        |  30 ------
 .../metadata/entity/DataJobEntity.pdl         |  27 -----
 .../metadata/entity/DataProcessEntity.pdl     |  31 ------
 .../metadata/entity/DatasetEntity.pdl         |  31 ------
 .../com/linkedin/metadata/entity/Entity.pdl   |   6 --
 .../metadata/entity/GlossaryNodeEntity.pdl    |  19 ----
 .../metadata/entity/GlossaryTermEntity.pdl    |  19 ----
 .../metadata/entity/MLModelEntity.pdl         |  31 ------
 .../linkedin/metadata/entity/TagEntity.pdl    |  19 ----
 .../relationship/BaseRelationship.pdl         |  19 ----
 .../metadata/relationship/Consumes.pdl        |  11 --
 .../metadata/relationship/Contains.pdl        |  17 ---
 .../metadata/relationship/DownstreamOf.pdl    |  21 ----
 .../metadata/relationship/EvaluatedOn.pdl     |  12 ---
 .../metadata/relationship/IsPartOf.pdl        |  12 ---
 .../metadata/relationship/OwnedBy.pdl         |  36 -------
 .../metadata/relationship/Produces.pdl        |  11 --
 .../metadata/relationship/Relationship.pdl    |   6 --
 .../metadata/relationship/ReportsTo.pdl       |  11 --
 .../metadata/relationship/RunsBefore.pdl      |  11 --
 .../metadata/relationship/TrainedOn.pdl       |  12 ---
 .../linkedin/metadata/search/BaseDocument.pdl |  17 ---
 .../metadata/search/ChartDocument.pdl         |  57 ----------
 .../metadata/search/CorpGroupDocument.pdl     |  34 ------
 .../metadata/search/CorpUserInfoDocument.pdl  |  59 -----------
 .../metadata/search/DashboardDocument.pdl     |  45 --------
 .../metadata/search/DataFlowDocument.pdl      |  60 -----------
 .../metadata/search/DataJobDocument.pdl       |  77 --------------
 .../metadata/search/DataProcessDocument.pdl   |  61 -----------
 .../metadata/search/DatasetDocument.pdl       | 100 ------------------
 .../com/linkedin/metadata/search/Document.pdl |   6 --
 .../search/GlossaryNodeInfoDocument.pdl       |  29 -----
 .../search/GlossaryTermInfoDocument.pdl       |  39 -------
 .../metadata/search/MLModelDocument.pdl       |  71 -------------
 .../linkedin/metadata/search/TagDocument.pdl  |  19 ----
 .../com/linkedin/mxe/MetadataGraphEvent.pdl   |  31 ------
 .../com/linkedin/mxe/MetadataSearchEvent.pdl  |  20 ----
 .../linkedin/metadata/ModelValidation.java    |  27 +----
 .../metadata/ModelValidationConstants.java    |  10 --
 45 files changed, 4 insertions(+), 1252 deletions(-)
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/BaseEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/ChartEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpGroupEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpUserEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DashboardEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataFlowEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataJobEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataProcessEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/Entity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryNodeEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryTermEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/MLModelEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/entity/TagEntity.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/BaseRelationship.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Consumes.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Contains.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/DownstreamOf.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/EvaluatedOn.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/IsPartOf.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/OwnedBy.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Produces.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/ReportsTo.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/RunsBefore.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/TrainedOn.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpGroupDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpUserInfoDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/DashboardDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataFlowDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataJobDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataProcessDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/DatasetDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/Document.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryNodeInfoDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryTermInfoDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/MLModelDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/search/TagDocument.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataGraphEvent.pdl
 delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataSearchEvent.pdl

diff --git a/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/Configs.java b/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/Configs.java
index 1364edd89a9b3..df06d1bae28e0 100644
--- a/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/Configs.java
+++ b/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/Configs.java
@@ -3,8 +3,6 @@
 import com.linkedin.pegasus2avro.mxe.FailedMetadataChangeEvent;
 import com.linkedin.pegasus2avro.mxe.MetadataAuditEvent;
 import com.linkedin.pegasus2avro.mxe.MetadataChangeEvent;
-import com.linkedin.pegasus2avro.mxe.MetadataGraphEvent;
-import com.linkedin.pegasus2avro.mxe.MetadataSearchEvent;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
@@ -26,8 +24,6 @@ public class Configs {
       put(Topics.METADATA_AUDIT_EVENT, MetadataAuditEvent.SCHEMA$);
       put(Topics.METADATA_CHANGE_EVENT, MetadataChangeEvent.SCHEMA$);
       put(Topics.FAILED_METADATA_CHANGE_EVENT, FailedMetadataChangeEvent.SCHEMA$);
-      put(Topics.METADATA_GRAPH_EVENT, MetadataGraphEvent.SCHEMA$);
-      put(Topics.METADATA_SEARCH_EVENT, MetadataSearchEvent.SCHEMA$);
 
       put(Topics.DEV_METADATA_AUDIT_EVENT, MetadataAuditEvent.SCHEMA$);
       put(Topics.DEV_METADATA_CHANGE_EVENT, MetadataChangeEvent.SCHEMA$);
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/BaseEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/BaseEntity.pdl
deleted file mode 100644
index 50f6eac371459..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/BaseEntity.pdl
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-/**
- * Common fields that apply to all entities
- */
-record BaseEntity {
-
-  /**
-   * Whether the entity has been removed or not
-   */
-  removed: optional boolean = false
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/ChartEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/ChartEntity.pdl
deleted file mode 100644
index 977097ed564ac..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/ChartEntity.pdl
+++ /dev/null
@@ -1,24 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.ChartUrn
-
-/**
- * Data model for a Chart entity
- */
-record ChartEntity includes BaseEntity {
-
-  /**
-   * Urn for the chart
-   */
-  urn: ChartUrn
-
-  /**
-   * Dashboard tool
-   */
-  dashboardTool: optional string
-
-  /**
-   * Chart Id
-   */
-  chartId: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpGroupEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpGroupEntity.pdl
deleted file mode 100644
index 7833b210850f4..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpGroupEntity.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.CorpGroupUrn
-
-/**
- * Data model for a CorpGroup entity(go/groupId)
- */
-record CorpGroupEntity includes BaseEntity {
-
-  /**
-   * Urn for the LDAP Group
-   */
-  urn: CorpGroupUrn
-
-  /**
-   * name of the group, e.g. wherehows-dev, ask_metadata
-   */
-  name: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpUserEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpUserEntity.pdl
deleted file mode 100644
index 8a01900b29525..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/CorpUserEntity.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.CorpuserUrn
-
-/**
- * Data model for a CorpUser entity
- */
-record CorpUserEntity includes BaseEntity {
-
-  /**
-   * Urn for the LDAP User
-   */
-  urn: CorpuserUrn
-
-  /**
-   * LDAP name(id) : e.g. hzhang2, ywang5 ..
-   */
-  name: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DashboardEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DashboardEntity.pdl
deleted file mode 100644
index 655cf0341fbec..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DashboardEntity.pdl
+++ /dev/null
@@ -1,24 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DashboardUrn
-
-/**
- * Data model for a Dashboard entity
- */
-record DashboardEntity includes BaseEntity {
-
-  /**
-   * Urn for the dashboard
-   */
-  urn: DashboardUrn
-
-  /**
-   * Dashboard tool
-   */
-  dashboardTool: optional string
-
-  /**
-   * Dashboard Id
-   */
-  dashboardId: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataFlowEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataFlowEntity.pdl
deleted file mode 100644
index 1c9e777b52307..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataFlowEntity.pdl
+++ /dev/null
@@ -1,30 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DataFlowUrn
-
-/**
- * Data model for a DataFlow entity
- */
-record DataFlowEntity includes BaseEntity {
-
-  /**
-   * Urn for the DataFlow
-   */
-  urn: DataFlowUrn
-
-  /**
-   * Workflow orchestrator ex: Azkaban, Airflow
-   */
-  orchestrator: optional string
-
-  /**
-   * Id of the flow
-   */
-  flowId: optional string
-
-  /**
-   * Cluster of the flow
-   */
-  cluster: optional string
-
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataJobEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataJobEntity.pdl
deleted file mode 100644
index 91dfdeacecbf7..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataJobEntity.pdl
+++ /dev/null
@@ -1,27 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DataJobUrn
-import com.linkedin.common.DataFlowUrn
-
-
-/**
- * Data model for a DataJob entity
- */
-record DataJobEntity includes BaseEntity {
-
-  /**
-   * Urn for the DataJob
-   */
-  urn: DataJobUrn
-
-  /**
-   * Urn of the associated DataFlow
-   */
-  flow: optional DataFlowUrn
-
-  /**
-   * Id of the job
-   */
-  jobId: optional string
-
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataProcessEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataProcessEntity.pdl
deleted file mode 100644
index 346e2665a3874..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DataProcessEntity.pdl
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DataPlatformUrn
-import com.linkedin.common.FabricType
-import com.linkedin.common.DataProcessUrn
-
-/**
- * Data model for a Data Process entity
- */
-record DataProcessEntity {
-
-  /**
-   * Urn for the Data Process
-   */
-  urn: DataProcessUrn
-
-  /**
-   * Data Process name(id)
-   */
-  name: optional string
-
-  /**
-   * Process Orchestrator for this process in the form. Options can be Airflow, Azkaban, Azure Data Factory
-   */
-   orchestrator: optional string
-
-  /**
-   * Fabric type where dataset belongs to or where it was generated.
-   */
-  origin: optional FabricType
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl
deleted file mode 100644
index 23c1ab0aa3514..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DataPlatformUrn
-import com.linkedin.common.DatasetUrn
-import com.linkedin.common.FabricType
-
-/**
- * Data model for a dataset entity
- */
-record DatasetEntity includes BaseEntity {
-
-  /**
-   * Urn for the dataset
-   */
-  urn: DatasetUrn
-
-  /**
-   * Dataset native name e.g. {db}.{table}, /dir/subdir/{name}, or {name}
-   */
-  name: optional string
-
-  /**
-   * Platform urn for the dataset in the form of urn:li:platform:{platform_name}
-   */
-  platform: optional DataPlatformUrn
-
-  /**
-   * Fabric type where dataset belongs to or where it was generated.
-   */
-  origin: optional FabricType
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/Entity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/Entity.pdl
deleted file mode 100644
index 6ba41ac73940c..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/Entity.pdl
+++ /dev/null
@@ -1,6 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-/**
- * A union of all supported entity types.
- */
-typeref Entity = union[CorpUserEntity, DatasetEntity, DataProcessEntity, MLModelEntity, DataFlowEntity, DataJobEntity]
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryNodeEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryNodeEntity.pdl
deleted file mode 100644
index e0ea997bf75ab..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryNodeEntity.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.GlossaryNodeUrn
-
-/**
- * Data model for a GlossaryNode entity
- */
-record GlossaryNodeEntity includes BaseEntity {
-
-  /**
-   * Urn for the GlossaryNode
-   */
-  urn: GlossaryNodeUrn
-
-  /**
-   * Business node name
-   */
-  name: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryTermEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryTermEntity.pdl
deleted file mode 100644
index 4e3dd82a5f8a9..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/GlossaryTermEntity.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.GlossaryTermUrn
-
-/**
- * Data model for a GlossaryTerm entity
- */
-record GlossaryTermEntity includes BaseEntity {
-
-  /**
-   * Urn for the GlossaryTerm
-   */
-  urn: GlossaryTermUrn
-
-  /**
-   * Business term name
-   */
-  name: optional string
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/MLModelEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/MLModelEntity.pdl
deleted file mode 100644
index 5b5e25814567d..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/MLModelEntity.pdl
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.DataPlatformUrn
-import com.linkedin.common.FabricType
-import com.linkedin.common.MLModelUrn
-
-/**
- * Data model for a ML Model entity
- */
-record MLModelEntity includes BaseEntity {
-
-  /**
-   * Urn for the ML Model
-   */
-  urn: MLModelUrn
-
-  /**
-   * ML Model native name
-   */
-  name: optional string
-
-  /**
-   * Platform urn for the ML Model in the form of urn:li:platform:{platform_name}
-   */
-  platform: optional DataPlatformUrn
-
-  /**
-   * Fabric type where ML Model belongs to or where it was generated.
-   */
-  origin: optional FabricType
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/TagEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/TagEntity.pdl
deleted file mode 100644
index 154d623dded69..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/TagEntity.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.entity
-
-import com.linkedin.common.TagUrn
-
-/**
- * Data model for a tag entity
- */
-record TagEntity includes BaseEntity {
-
-  /**
-   * Urn for the tag
-   */
-  urn: TagUrn
-
-  /**
-   * Name of the tag
-   */
-  name: optional string
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/BaseRelationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/BaseRelationship.pdl
deleted file mode 100644
index 9a787f150176c..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/BaseRelationship.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-import com.linkedin.common.Urn
-
-/**
- * Common fields that apply to all relationships
- */
-record BaseRelationship {
-
-  /**
-   * Urn for the source of the relationship
-   */
-  source: Urn
-
-  /**
-   * Urn for the destination of the relationship
-   */
-  destination: Urn
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Consumes.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Consumes.pdl
deleted file mode 100644
index 1386f0a42c3d5..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Consumes.pdl
+++ /dev/null
@@ -1,11 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.DataJobUrn"
-} ]
-record Consumes includes BaseRelationship {
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Contains.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Contains.pdl
deleted file mode 100644
index 8de23b7ed02d9..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Contains.pdl
+++ /dev/null
@@ -1,17 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the Has-A relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.ChartUrn",
-  "source" : "com.linkedin.common.urn.DashboardUrn"
-},{
-  "destination" : "com.linkedin.common.urn.GlossaryTermUrn",
-  "source" : "com.linkedin.common.urn.GlossaryNodeUrn"
-},{
-  "destination" : "com.linkedin.common.urn.GlossaryNodeUrn",
-  "source" : "com.linkedin.common.urn.GlossaryNodeUrn"
-} ]
-record Contains includes BaseRelationship {
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/DownstreamOf.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/DownstreamOf.pdl
deleted file mode 100644
index 0036b3cb1d720..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/DownstreamOf.pdl
+++ /dev/null
@@ -1,21 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-import com.linkedin.dataset.DatasetLineageType
-
-/**
- * A generic model for the DownstreamOf relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.DatasetUrn"
-}, {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.ChartUrn"
-} ]
-record DownstreamOf includes BaseRelationship {
-
-  /**
-   * The type of the lineage
-   */
-  type: optional DatasetLineageType
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/EvaluatedOn.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/EvaluatedOn.pdl
deleted file mode 100644
index a8ffb5755b814..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/EvaluatedOn.pdl
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the Evaluated-On relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.MLModelUrn"
-}]
-record EvaluatedOn includes BaseRelationship {
-
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/IsPartOf.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/IsPartOf.pdl
deleted file mode 100644
index b60b06e655a7a..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/IsPartOf.pdl
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the Is-Part-Of relationship
- */
-@pairings = [
-{
-  "destination" : "com.linkedin.common.urn.DataFlowUrn",
-  "source" : "com.linkedin.common.urn.DataJobUrn"
-} ]
-record IsPartOf includes BaseRelationship {
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/OwnedBy.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/OwnedBy.pdl
deleted file mode 100644
index 22d9e1ffbab20..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/OwnedBy.pdl
+++ /dev/null
@@ -1,36 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-import com.linkedin.common.OwnershipType
-
-/**
- * A generic model for the Owned-By relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.CorpuserUrn",
-  "source" : "com.linkedin.common.urn.DatasetUrn"
-}, {
-   "destination" : "com.linkedin.common.urn.CorpuserUrn",
-   "source" : "com.linkedin.common.urn.DataProcessUrn"
-}, {
-   "destination" : "com.linkedin.common.urn.CorpuserUrn",
-   "source" : "com.linkedin.common.urn.MLModelUrn"
- }, {
-   "destination" : "com.linkedin.common.urn.CorpuserUrn",
-   "source" : "com.linkedin.common.urn.DataJobUrn"
- }, {
-   "destination" : "com.linkedin.common.urn.CorpuserUrn",
-   "source" : "com.linkedin.common.urn.DataFlowUrn"
-}, {
-    "destination" : "com.linkedin.common.urn.CorpuserUrn",
-    "source" : "com.linkedin.common.urn.GlossaryTermUrn"
-}, {
-    "destination" : "com.linkedin.common.urn.CorpuserUrn",
-    "source" : "com.linkedin.common.urn.GlossaryNodeUrn"
- } ]
-record OwnedBy includes BaseRelationship {
-
-  /**
-   * The type of the ownership
-   */
-  type: OwnershipType
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Produces.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Produces.pdl
deleted file mode 100644
index 7f7001e3e6c43..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Produces.pdl
+++ /dev/null
@@ -1,11 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.DataJobUrn"
-} ]
-record Produces includes BaseRelationship {
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl
deleted file mode 100644
index 982c3e888e9b9..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl
+++ /dev/null
@@ -1,6 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A union of all supported relationship types.
- */
-typeref Relationship = union[Contains, IsPartOf, OwnedBy, Consumes, Produces]
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/ReportsTo.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/ReportsTo.pdl
deleted file mode 100644
index 6c9f3b9df458d..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/ReportsTo.pdl
+++ /dev/null
@@ -1,11 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the Reports-To relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.CorpuserUrn",
-  "source" : "com.linkedin.common.urn.CorpuserUrn"
-} ]
-record ReportsTo includes BaseRelationship {
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/RunsBefore.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/RunsBefore.pdl
deleted file mode 100644
index e00d39e11d603..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/RunsBefore.pdl
+++ /dev/null
@@ -1,11 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DataJobUrn",
-  "source" : "com.linkedin.common.urn.DataJobUrn"
-} ]
-record RunsBefore includes BaseRelationship {
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/TrainedOn.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/TrainedOn.pdl
deleted file mode 100644
index 0b52b11600462..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/TrainedOn.pdl
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace com.linkedin.metadata.relationship
-
-/**
- * A generic model for the Trained-On relationship
- */
-@pairings = [ {
-  "destination" : "com.linkedin.common.urn.DatasetUrn",
-  "source" : "com.linkedin.common.urn.MLModelUrn"
-}]
-record TrainedOn includes BaseRelationship {
-
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl
deleted file mode 100644
index 48535aaac2e94..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl
+++ /dev/null
@@ -1,17 +0,0 @@
-namespace com.linkedin.metadata.search
-
-/**
- * Common fields that may apply to all documents
- */
-record BaseDocument {
-
-  /**
-   * Whether the entity has been removed or not
-   */
-  removed: optional boolean = false
-
-  /**
-   * All paths representing the hierarchy of this entity. This is essential for browsing various paths leading to this entity.
-   */
-  browsePaths: optional array[string]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl
deleted file mode 100644
index 8365755a70b05..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl
+++ /dev/null
@@ -1,57 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.chart.ChartQueryType
-import com.linkedin.chart.ChartType
-import com.linkedin.common.AccessLevel
-import com.linkedin.common.ChartUrn
-
-/**
- * Data model for Chart entity search
- */
-record ChartDocument includes BaseDocument {
-
-  /**
-   * Urn for the Chart
-   */
-  urn: ChartUrn
-
-  /**
-   * Title of the chart
-   */
-  title: optional string
-
-  /**
-   * Detailed description about the chart
-   */
-  description: optional string
-
-  /**
-   * Dashboard tool ex: Looker, Redash
-   */
-  tool: optional string
-
-  /**
-   * Chart query type
-   */
-  queryType: optional ChartQueryType
-
-  /**
-   * LDAP usernames of corp users who are the owners of this chart
-   */
-  owners: optional array[string]
-
-  /**
-   * Type of the chart
-   */
-  type: optional ChartType
-
-  /**
-   * Access level for the chart
-   */
-  access: optional AccessLevel
-
-  /**
-   * List of tags for this dataset
-   */
-  tags: optional array[string]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpGroupDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpGroupDocument.pdl
deleted file mode 100644
index 75acb19f4a811..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpGroupDocument.pdl
+++ /dev/null
@@ -1,34 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.CorpGroupUrn
-
-/**
- * Data model for Corp Group entity search
- */
-record CorpGroupDocument includes BaseDocument {
-
-  /**
-   * Urn for the Corp group.
-   */
-  urn: CorpGroupUrn
-
-  /**
-   * Email of the corp group
-   */
-  email: optional string
-
-  /**
-   * ldap usernames of corp users who are direct members of this group
-   */
-  members: optional array[string]
-
-  /**
-   * ldap usernames of corp users who are direct admins of this group
-   */
-  admins: optional array[string]
-
-  /**
-   * List of group names who are part of this group
-   */
-  groups: optional array[string]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpUserInfoDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpUserInfoDocument.pdl
deleted file mode 100644
index 9ab24cf19a486..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/CorpUserInfoDocument.pdl
+++ /dev/null
@@ -1,59 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.CorpuserUrn
-
-/**
- * Data model for CorpUserInfo entity search
- */
-record CorpUserInfoDocument includes BaseDocument {
-
-  /**
-   * Urn for the CorpUser.
-   */
-  urn: CorpuserUrn
-
-  /**
-   * ldap of the CorpUser
-   */
-  ldap: optional string
-
-  /**
-   * title of the CorpUser
-   */
-  title: optional string
-
-  /**
-   * direct manager's ldap of the CorpUser
-   */
-  managerLdap: optional string
-
-  /**
-   * Common name of the CorpUser, format is firstName + lastName (split by a whitespace)
-   */
-  fullName: optional string
-
-  /**
-   * About me section of the user
-   */
-  aboutMe: optional string
-
-  /**
-   * Teams that the user belongs to e.g. Metadata
-   */
-  teams: optional array[string]
-
-  /**
-   * Skills that the user possesses e.g. Machine Learning
-   */
-  skills: optional array[string]
-
-  /**
-   * Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools
-   */
-  active: optional boolean
-
-  /**
-   * The user's full email(s).
-   */
-  emails: optional array[string]
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DashboardDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DashboardDocument.pdl
deleted file mode 100644
index a18adda7c998f..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DashboardDocument.pdl
+++ /dev/null
@@ -1,45 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.AccessLevel
-import com.linkedin.common.DashboardUrn
-
-/**
- * Data model for Chart entity search
- */
-record DashboardDocument includes BaseDocument {
-
-  /**
-   * Urn for the Dashboard
-   */
-  urn: DashboardUrn
-
-  /**
-   * Title of the dashboard
-   */
-  title: optional string
-
-  /**
-   * Detailed description about the dashboard
-   */
-  description: optional string
-
-  /**
-   * Dashboard tool ex: Looker, Redash
-   */
-  tool: optional string
-
-  /**
-   * LDAP usernames of corp users who are the owners of this dashboard
-   */
-  owners: optional array[string]
-
-  /**
-   * Access level for the dashboard
-   */
-  access: optional AccessLevel
-
-  /**
-   * List of tags for this dataset
-   */
-  tags: optional array[string]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataFlowDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataFlowDocument.pdl
deleted file mode 100644
index fec19a88c3b8c..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataFlowDocument.pdl
+++ /dev/null
@@ -1,60 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.AccessLevel
-import com.linkedin.common.DataFlowUrn
-
-/**
- * Data model for DataFlow entity search
- */
-record DataFlowDocument includes BaseDocument {
-
-  /**
-   * Urn for the DataFlow
-   */
-  urn: DataFlowUrn
-
-  /**
-   * Id of the flow
-   */
-  flowId: optional string
-
-  /**
-   * Name of the flow
-   */
-  name: optional string
-
-  /**
-   * Description of the flow
-   */
-  description: optional string
-
-  /**
-   * Workflow orchestrator ex: Azkaban, Airflow
-   */
-  orchestrator: optional string
-
-  /**
-   * Cluster of the flow
-   */
-  cluster: optional string
-
-  /**
-   * Project of the flow
-   */
-  project: optional string
-
-  /**
-   * LDAP usernames of corp users who are the owners of this flow
-   */
-  owners: optional array[string]
-
-  /**
-   * Flag to indicate if the flow has non empty corp users as owners or not.
-   */
-  hasOwners: optional boolean
-
-  /**
-   * List of tags for this dataset
-   */
-  tags: optional array[string]
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataJobDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataJobDocument.pdl
deleted file mode 100644
index d0b1d3a0c1c3b..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataJobDocument.pdl
+++ /dev/null
@@ -1,77 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.AccessLevel
-import com.linkedin.common.DataJobUrn
-import com.linkedin.common.DatasetUrn
-
-
-/**
- * Data model for DataJob entity search
- */
-record DataJobDocument includes BaseDocument {
-
-  /*
-   * Urn for the DataJob
-   */
-  urn: DataJobUrn
-
-  /**
-   * Optional description of the job
-   */
-  description: optional string
-
-  /**
-   * Optional name of the job
-   */
-  name: optional string
-
-  /**
-   * Name of the associated data flow
-   */
-  dataFlow: optional string
-
-  /**
-   * Id of the job
-   */
-  jobId: optional string
-
-  /**
-   * LDAP usernames of corp users who are the owners of this job
-   */
-  owners: optional array[string]
-
-  /**
-   * Flag to indicate if the job has non empty corp users as owners or not.
-   */
-  hasOwners: optional boolean
-
-  /**
-   * Lineage information represented by the number of immediate input datasets of this job.
-   */
-  numInputDatasets: optional long
-
-  /**
-   * Lineage information represented by the number of immediate output datasets of this job.
-   */
-  numOutputDatasets: optional long
-
-  /**
-   * List of inputs for this job
-   */
-  inputs: optional array[DatasetUrn]
-
-  /**
-   * List of outputs for this job
-   */
-  outputs: optional array[DatasetUrn]
-
-  /**
-   * Workflow orchestrator ex: Azkaban, Airflow
-   */
-  orchestrator: optional string
-
-  /**
-   * List of tags for this dataset
-   */
-  tags: optional array[string]
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataProcessDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataProcessDocument.pdl
deleted file mode 100644
index b4179129a5fb5..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DataProcessDocument.pdl
+++ /dev/null
@@ -1,61 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.DataProcessUrn
-import com.linkedin.common.DatasetUrn
-import com.linkedin.common.FabricType
-
-/**
- * Data model for data process entity search
- */
-record DataProcessDocument includes BaseDocument {
-
-  /**
-   * Urn for the data process
-   */
-  urn: DataProcessUrn
-
-  /**
-   * Process native name e.g. a ETL script name
-   */
-  name: optional string
-
-  /**
-   * Orchestrator name for this process, such as Azure Data Factory
-   */
-  orchestrator: optional string
-
-  /**
-   * Fabric type where data process belongs to or where it was generated
-   */
-  origin: optional FabricType
-
-  /**
-   * LDAP usernames of corp users who are the owners of this process
-   */
-  owners: optional array[string]
-
-  /**
-   * Flag to indicate if the process has non empty corp users as owners or not.
-   */
-  hasOwners: optional boolean
-
-  /**
-   * Lineage information represented by the number of immediate input datasets of this process.
-   */
-  numInputDatasets: optional long
-
-  /**
-   * Lineage information represented by the number of immediate output datasets of this process.
-   */
-  numOutputDatasets: optional long
-
-  /**
-   * List of inputs for this process
-   */
-  inputs: optional array[DatasetUrn]
-
-  /**
-   * List of outputs for this process
-   */
-  outputs: optional array[DatasetUrn]
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DatasetDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DatasetDocument.pdl
deleted file mode 100644
index b5cc13939ab7e..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/DatasetDocument.pdl
+++ /dev/null
@@ -1,100 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.DatasetUrn
-import com.linkedin.common.FabricType
-
-/**
- * Data model for dataset entity search
- */
-record DatasetDocument includes BaseDocument {
-
-  /**
-   * Urn for the dataset
-   */
-  urn: DatasetUrn
-
-  /**
-   * Dataset native name e.g. {db}.{table}, /dir/subdir/{name}, or {name}
-   */
-  name: optional string
-
-  /**
-   * Platform name for the dataset
-   */
-  platform: optional string
-
-  /**
-   * Fabric type where dataset belongs to or where it was generated
-   */
-  origin: optional FabricType
-
-  /**
-   * LDAP usernames of corp users who are the owners of this dataset
-   */
-  owners: optional array[string]
-
-  /**
-   * Flag to indicate if the dataset is deprecated.
-   */
-  deprecated: optional boolean
-
-  /**
-   * Documentation of the dataset.
-   */
-  description: optional string
-
-  /**
-   * Field paths of the dataset
-   */
-  fieldPaths: optional array[string]
-
-  /**
-   * Flag to indicate if the dataset has non empty corp users as owners or not.
-   */
-  hasOwners: optional boolean
-
-  /**
-   * Flag to indicate if the dataset has non-empty schema or not.
-   */
-  hasSchema: optional boolean
-
-  /**
-   * Lineage information represented by the number of immediate downstream datasets of this dataset.
-   */
-  numDownstreamDatasets: optional long
-
-  /**
-   * List of upstreams for this dataset
-   */
-  upstreams: optional array[DatasetUrn]
-
-  /**
-   * List of tags for this dataset
-   */
-  tags: optional array[string]
-
-  /**
-   * List of field descriptions
-   */
-  fieldDescriptions: optional array[string]
-
-  /**
-   * List of tags applied to fields
-   */
-  fieldTags: optional array[string]
-
-  /**
-   * List of field descriptions
-   */
-  editedFieldDescriptions: optional array[string]
-
-  /**
-   * List of tags applied to fields
-   */
-  editedFieldTags: optional array[string]
-
-  /**
-   * List of terms for this dataset
-   */
-  glossaryTerms: optional array[string]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/Document.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/Document.pdl
deleted file mode 100644
index d4482e953d36a..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/Document.pdl
+++ /dev/null
@@ -1,6 +0,0 @@
-namespace com.linkedin.metadata.search
-
-/**
- * A union of all supported document types.
- */
-typeref Document = union[CorpUserInfoDocument, DatasetDocument]
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryNodeInfoDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryNodeInfoDocument.pdl
deleted file mode 100644
index 5205caf45220f..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryNodeInfoDocument.pdl
+++ /dev/null
@@ -1,29 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.GlossaryNodeUrn
-
-/**
- * Data model for GlossaryNodeInfo entity search
- */
-record GlossaryNodeInfoDocument includes BaseDocument {
-
-  /**
-   * Urn for the GlossaryNode.
-   */
-  urn: GlossaryNodeUrn
-
-  /**
-   * Name of business node
-   */
-  name: optional string
-
-  /**
-   * Definition of business node
-   */
-  definition: optional string
-
-  /**
-   * LDAP usernames of corp users who are the owners of this business node
-   */
-  owners: optional array[string]
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryTermInfoDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryTermInfoDocument.pdl
deleted file mode 100644
index c21ebaab0b99e..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/GlossaryTermInfoDocument.pdl
+++ /dev/null
@@ -1,39 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.GlossaryTermUrn
-
-/**
- * Data model for GlossaryTermInfo entity search
- */
-record GlossaryTermInfoDocument includes BaseDocument {
-
-  /**
-   * Urn for the GlossaryTerm.
-   */
-  urn: GlossaryTermUrn
-
-  /**
-   * Name of business term
-   */
-  name: optional string
-
-  /**
-   * Definition of business term
-   */
-  definition: optional string
-
-  /**
-   * LDAP usernames of corp users who are the owners of this dataset
-   */
-  owners: optional array[string]
-
-  /**
-   * Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL
-   */
-  termSource: optional string
-
-  /**
-   * External Reference to the business-term (URL)
-   */
-  sourceRef: optional string
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/MLModelDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/MLModelDocument.pdl
deleted file mode 100644
index c3a7fcfed223e..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/MLModelDocument.pdl
+++ /dev/null
@@ -1,71 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.MLModelUrn
-import com.linkedin.common.DatasetUrn
-import com.linkedin.common.FabricType
-
-/**
- * Data model for ML Model entity search
- */
-record MLModelDocument includes BaseDocument {
-
-  /**
-   * Urn for the model
-   */
-  urn: MLModelUrn
-
-  /**
-   * Name of model
-   */
-  name: optional string
-
-  /**
-   * Platform name for the model
-   */
-  platform: optional string
-
-  /**
-   * Fabric type where model belongs to or where it was generated
-   */
-  origin: optional FabricType
-
-  /**
-  * Description of the model
-  */
-  description: optional string
-
-  /**
-  * Timestamp model was created
-  */
-  createdTimestamp: optional long
-
-  /**
-  * Whether or not the Model has owners
-  */
-  hasOwners: optional boolean
-
-  /**
-  * LDAP usernames of corp users who are the owners of this model
-  */
-  owners: optional array[string]
-
-  /**
-  * Type of Algorithm or Model such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc
-  */
-  type: optional string
-
-  /**
-  * What datasets were used to train the model?
-  */
-  trainingDatasets: optional array[DatasetUrn]
-
-  /**
-  * What datasets were used to evaluate the model?
-  */
-  evaluationDatasets: optional array[DatasetUrn]
-
-  /**
-  * Whether or not the model is currently active
-  */
-  active: optional boolean
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/TagDocument.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/TagDocument.pdl
deleted file mode 100644
index abab05f22d303..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/TagDocument.pdl
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace com.linkedin.metadata.search
-
-import com.linkedin.common.TagUrn
-
-/**
- * Data model for tag entity search
- */
-record TagDocument includes BaseDocument {
-
-  /**
-   * Urn for the dataset
-   */
-  urn: TagUrn
-
-  /**
-   * Tag name e.g. `Legacy`
-   */
-  name: optional string
-}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataGraphEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataGraphEvent.pdl
deleted file mode 100644
index 66993993a1ec4..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataGraphEvent.pdl
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace com.linkedin.mxe
-
-import com.linkedin.avro2pegasus.events.KafkaAuditHeader
-import com.linkedin.metadata.entity.Entity
-import com.linkedin.metadata.relationship.Relationship
-
-/**
- * Kafka event for capturing update made to a list of entities and relationships.
- */
-record MetadataGraphEvent {
-
-  /**
-   * Kafka audit header. See go/kafkaauditheader for more info.
-   */
-  auditHeader: optional KafkaAuditHeader
-
-  /**
-   * A list of entity updates-or-inserts. Only fields updated are set in the case of partial update.
-   */
-  upsertedEntities: array[Entity]
-
-  /**
-   * A list of removed relationships. Only fields used to identify the relationships to remove are set.
-   */
-  removedRelationships: array[Relationship]
-
-  /**
-   * A list of relationship updates-or-inserts.
-   */
-  upsertedRelationships: array[Relationship]
-}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataSearchEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataSearchEvent.pdl
deleted file mode 100644
index 79d434ae044bf..0000000000000
--- a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataSearchEvent.pdl
+++ /dev/null
@@ -1,20 +0,0 @@
-namespace com.linkedin.mxe
-
-import com.linkedin.avro2pegasus.events.KafkaAuditHeader
-import com.linkedin.metadata.search.Document
-
-/**
- * Kafka event for capturing update made to a list of search documents.
- */
-record MetadataSearchEvent {
-
-  /**
-   * Kafka audit header. See go/kafkaauditheader for more info.
-   */
-  auditHeader: optional KafkaAuditHeader
-
-  /**
-   * A list of search document updates-or-inserts. Only fields updated are set in the case of partial update.
-   */
-  upsertedDocuments: array[Document]
-}
\ No newline at end of file
diff --git a/metadata-models/src/test/java/com/linkedin/metadata/ModelValidation.java b/metadata-models/src/test/java/com/linkedin/metadata/ModelValidation.java
index e3e2bbfbb650e..11be355a7a37b 100644
--- a/metadata-models/src/test/java/com/linkedin/metadata/ModelValidation.java
+++ b/metadata-models/src/test/java/com/linkedin/metadata/ModelValidation.java
@@ -5,8 +5,6 @@
 import com.linkedin.data.template.UnionTemplate;
 import com.linkedin.metadata.validator.AspectValidator;
 import com.linkedin.metadata.validator.DeltaValidator;
-import com.linkedin.metadata.validator.EntityValidator;
-import com.linkedin.metadata.validator.RelationshipValidator;
 import com.linkedin.metadata.validator.SnapshotValidator;
 import java.io.IOException;
 import java.util.List;
@@ -15,30 +13,14 @@
 import javax.annotation.Nonnull;
 import org.testng.annotations.Test;
 
-import static com.linkedin.metadata.ModelValidationConstants.*;
-import static org.testng.AssertJUnit.*;
+import static com.linkedin.metadata.ModelValidationConstants.IGNORED_ASPECT_CLASSES;
+import static com.linkedin.metadata.ModelValidationConstants.IGNORED_DELTA_CLASSES;
+import static com.linkedin.metadata.ModelValidationConstants.IGNORED_SNAPSHOT_CLASSES;
+import static org.testng.AssertJUnit.assertFalse;
 
 
 public class ModelValidation {
 
-  @Test
-  public void validateEntities() throws Exception {
-    List<? extends Class<? extends RecordTemplate>> entities =
-        getRecordTemplatesInPackage("com.linkedin.metadata.entity", IGNORED_ENTITY_CLASSES);
-
-    assertFalse("Failed to find any entities", entities.isEmpty());
-    entities.forEach(EntityValidator::validateEntitySchema);
-  }
-
-  @Test
-  public void validateRelationships() throws Exception {
-    List<? extends Class<? extends RecordTemplate>> relationships =
-        getRecordTemplatesInPackage("com.linkedin.metadata.relationship", IGNORED_RELATIONSHIP_CLASSES);
-
-    assertFalse("Failed to find any relationships", relationships.isEmpty());
-    relationships.forEach(RelationshipValidator::validateRelationshipSchema);
-  }
-
   @Test
   public void validateAspects() throws Exception {
     List<? extends Class<? extends UnionTemplate>> aspects =
@@ -53,7 +35,6 @@ public void validateSnapshots() throws Exception {
     List<? extends Class<? extends RecordTemplate>> snapshots =
         getRecordTemplatesInPackage("com.linkedin.metadata.snapshot", IGNORED_SNAPSHOT_CLASSES);
 
-
     assertFalse("Failed to find any snapshots", snapshots.isEmpty());
     snapshots.forEach(SnapshotValidator::validateSnapshotSchema);
   }
diff --git a/metadata-models/src/test/java/com/linkedin/metadata/ModelValidationConstants.java b/metadata-models/src/test/java/com/linkedin/metadata/ModelValidationConstants.java
index a2117471d3f6d..11fa8cdc965d4 100644
--- a/metadata-models/src/test/java/com/linkedin/metadata/ModelValidationConstants.java
+++ b/metadata-models/src/test/java/com/linkedin/metadata/ModelValidationConstants.java
@@ -3,9 +3,6 @@
 import com.google.common.collect.ImmutableSet;
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.data.template.UnionTemplate;
-import com.linkedin.metadata.entity.BaseEntity;
-import com.linkedin.metadata.relationship.BaseRelationship;
-import com.linkedin.metadata.search.BaseDocument;
 import java.util.Set;
 
 
@@ -15,13 +12,6 @@ private ModelValidationConstants() {
     // Util class
   }
 
-  static final Set<Class<? extends RecordTemplate>> IGNORED_ENTITY_CLASSES = ImmutableSet.of(BaseEntity.class);
-
-  static final Set<Class<? extends RecordTemplate>> IGNORED_RELATIONSHIP_CLASSES =
-      ImmutableSet.of(BaseRelationship.class);
-
-  static final Set<Class<? extends RecordTemplate>> IGNORED_DOCUMENT_CLASSES = ImmutableSet.of(BaseDocument.class);
-
   static final Set<Class<? extends UnionTemplate>> IGNORED_ASPECT_CLASSES = ImmutableSet.of();
 
   static final Set<Class<? extends RecordTemplate>> IGNORED_SNAPSHOT_CLASSES = ImmutableSet.of();

From d6ec5e7257ca15b0a0032204ece85781a9201c0b Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 3 Dec 2021 21:00:35 +0530
Subject: [PATCH 3/6] fix(docs): fix build on m1 (#3662)

---
 docs-website/build.gradle | 2 +-
 docs-website/sidebars.js  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs-website/build.gradle b/docs-website/build.gradle
index 0d591ca615264..b5edf446a6c6e 100644
--- a/docs-website/build.gradle
+++ b/docs-website/build.gradle
@@ -12,7 +12,7 @@ node {
   }
 
   // Version of node to use.
-  version = '14.15.3'
+  version = '16.8.0'
 
   // Version of Yarn to use.
   yarnVersion = '1.22.0'
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index ab265311733f9..36c178058c889 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -221,6 +221,8 @@ module.exports = {
           // WIP "docs/advanced/entity-hierarchy",
           // WIP "docs/advanced/partial-update",
           // WIP "docs/advanced/pdl-best-practices",
+          // WIP "docs/introducing-metadata-service-authentication"
+          // WIP "metadata-models-custom/README"
         ],
       },
     ],

From 1afcb661bd5978e1ca58974864429e1f017f998c Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 3 Dec 2021 17:07:07 -0500
Subject: [PATCH 4/6] feat(ingest): add --strict-warnings option (#3665)

---
 metadata-ingestion/src/datahub/cli/ingest_cli.py         | 9 +++++++--
 metadata-ingestion/src/datahub/ingestion/run/pipeline.py | 4 ++--
 metadata-ingestion/tests/integration/mysql/test_mysql.py | 4 +++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 3a55899c30256..2c4317013ef5a 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -56,7 +56,12 @@ def ingest() -> None:
     default=False,
     help="Perform limited ingestion from the source to the sink to get a quick preview.",
 )
-def run(config: str, dry_run: bool, preview: bool) -> None:
+@click.option(
+    "--strict-warnings/--no-strict-warnings",
+    default=False,
+    help="If enabled, ingestion runs with warnings will yield a non-zero error code",
+)
+def run(config: str, dry_run: bool, preview: bool, strict_warnings: bool) -> None:
     """Ingest metadata into DataHub."""
     logger.debug("DataHub CLI version: %s", datahub_package.nice_version_name())
 
@@ -73,7 +78,7 @@ def run(config: str, dry_run: bool, preview: bool) -> None:
     logger.info("Starting metadata ingestion")
     pipeline.run()
     logger.info("Finished metadata ingestion")
-    ret = pipeline.pretty_print_summary()
+    ret = pipeline.pretty_print_summary(warnings_as_failure=strict_warnings)
     sys.exit(ret)
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index ab9dfaba2b078..213cd09a965ef 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -189,7 +189,7 @@ def raise_from_status(self, raise_warnings: bool = False) -> None:
                 "Source reported warnings", self.source.get_report()
             )
 
-    def pretty_print_summary(self) -> int:
+    def pretty_print_summary(self, warnings_as_failure: bool = False) -> int:
         click.echo()
         click.secho(f"Source ({self.config.source.type}) report:", bold=True)
         click.echo(self.source.get_report().as_string())
@@ -201,7 +201,7 @@ def pretty_print_summary(self) -> int:
             return 1
         elif self.source.get_report().warnings or self.sink.get_report().warnings:
             click.secho("Pipeline finished with warnings", fg="yellow", bold=True)
-            return 0
+            return 1 if warnings_as_failure else 0
         else:
             click.secho("Pipeline finished successfully", fg="green", bold=True)
             return 0
diff --git a/metadata-ingestion/tests/integration/mysql/test_mysql.py b/metadata-ingestion/tests/integration/mysql/test_mysql.py
index 2a548ee6c0abe..65b241b51a059 100644
--- a/metadata-ingestion/tests/integration/mysql/test_mysql.py
+++ b/metadata-ingestion/tests/integration/mysql/test_mysql.py
@@ -24,7 +24,9 @@ def test_mysql_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
         runner = CliRunner()
         with fs_helpers.isolated_filesystem(tmp_path):
             config_file = (test_resources_dir / "mysql_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
+            result = runner.invoke(
+                datahub, ["ingest", "--strict-warnings", "-c", f"{config_file}"]
+            )
             assert_result_ok(result)
 
             # Verify the output.

From 999e9e5b496a1d8c6d4c0ae449cbd140fbb983f4 Mon Sep 17 00:00:00 2001
From: Dexter Lee <dexter@acryl.io>
Date: Fri, 3 Dec 2021 18:13:11 -0800
Subject: [PATCH 5/6] fix(search): Improve search and recs performance (#3660)

---
 .../resolvers/group/EntityCountsResolver.java |   2 +
 .../ListRecommendationsResolver.java          |   2 +
 .../resolvers/search/SearchResolver.java      |   2 +
 .../datahubusage/DataHubUsageEventType.java   |   4 +-
 .../graph/elastic/ESGraphWriteDAO.java        |  60 +++-------
 .../elastic/ElasticSearchGraphService.java    |   7 +-
 .../RecommendationsService.java               |   2 +
 .../EntitySearchAggregationSource.java        |  28 ++---
 .../candidatesource/MostPopularSource.java    |   2 +
 .../candidatesource/RecentlyViewedSource.java |   2 +
 .../candidatesource/RecommendationSource.java |   2 +
 .../metadata/search/EntitySearchService.java  |   4 +-
 .../elasticsearch/ElasticSearchService.java   |   6 +-
 ...{IndexBuilder.java => ESIndexBuilder.java} |  47 +++++---
 .../indexbuilder/EntityIndexBuilder.java      |   5 +-
 ...Builders.java => EntityIndexBuilders.java} |   7 +-
 .../indexbuilder/SettingsBuilder.java         |   4 +-
 .../elasticsearch/query/ESSearchDAO.java      |  39 ++++---
 .../query/request/SearchRequestHandler.java   |  31 ++++--
 .../elasticsearch/update/ESWriteDAO.java      |  20 +---
 .../SearchDocumentTransformer.java            |  19 ++--
 .../systemmetadata/ESSystemMetadataDAO.java   |  57 ++++------
 .../ElasticSearchSystemMetadataService.java   |  12 +-
 .../ElasticSearchTimeseriesAspectService.java |  24 +---
 .../TimeseriesAspectIndexBuilders.java        |   9 +-
 .../graph/ElasticSearchGraphServiceTest.java  |  83 +++++++-------
 ...ySearchAggregationCandidateSourceTest.java | 104 ++----------------
 .../metadata/search/SearchServiceTest.java    |  11 +-
 .../ElasticSearchServiceTest.java             |  29 ++++-
 .../SearchDocumentTransformerTest.java        |   6 +-
 ...lasticSearchSystemMetadataServiceTest.java |  10 +-
 ...sticSearchTimeseriesAspectServiceTest.java |  40 ++++---
 .../kafka/MetadataAuditEventsProcessor.java   |  10 +-
 .../kafka/MetadataChangeLogProcessor.java     |  24 ++--
 .../common/GlossaryTermAssociation.pdl        |   2 +-
 .../com/linkedin/common/TagAssociation.pdl    |   2 +-
 .../schema/EditableSchemaFieldInfo.pdl        |   4 +-
 .../com/linkedin/schema/SchemaField.pdl       |   6 +-
 .../ElasticSearchGraphServiceFactory.java     |  42 ++-----
 ...ticSearchSystemMetadataServiceFactory.java |  40 ++-----
 .../common/IndexConventionFactory.java        |   2 +-
 .../BaseElasticSearchComponentsFactory.java   |  56 ++++++++++
 .../ElasticSearchBulkProcessorFactory.java    |  52 +++++++++
 .../ElasticSearchIndexBuilderFactory.java     |  36 ++++++
 .../search/ElasticSearchServiceFactory.java   |  43 ++------
 .../SearchDocumentTransformerFactory.java     |  21 ++++
 ...cSearchTimeseriesAspectServiceFactory.java |  34 ++----
 .../src/main/resources/application.yml        |   6 +-
 .../entity/client/JavaEntityClient.java       |   2 +
 49 files changed, 542 insertions(+), 520 deletions(-)
 rename metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/{IndexBuilder.java => ESIndexBuilder.java} (80%)
 rename metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/{ESIndexBuilders.java => EntityIndexBuilders.java} (79%)
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java
index 8edce498219af..9fba4b8ca7712 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java
@@ -8,6 +8,7 @@
 import com.linkedin.entity.client.EntityClient;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CompletableFuture;
@@ -25,6 +26,7 @@ public EntityCountsResolver(final EntityClient entityClient) {
   }
 
   @Override
+  @WithSpan
   public CompletableFuture<EntityCountResults> get(final DataFetchingEnvironment environment) throws Exception {
 
     final QueryContext context = environment.getContext();
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java
index 123ac2a3acc53..2c00eee5f9106 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java
@@ -21,6 +21,7 @@
 import com.linkedin.metadata.recommendation.SearchRequestContext;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.net.URISyntaxException;
 import java.util.Collections;
 import java.util.List;
@@ -41,6 +42,7 @@ public class ListRecommendationsResolver implements DataFetcher<CompletableFutur
 
   private final RecommendationsService _recommendationsService;
 
+  @WithSpan
   @Override
   public CompletableFuture<ListRecommendationsResult> get(DataFetchingEnvironment environment) {
     final ListRecommendationsInput input =
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java
index b91e98a6ac007..78e39ae346efc 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java
@@ -8,6 +8,7 @@
 import com.linkedin.entity.client.EntityClient;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.util.concurrent.CompletableFuture;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@@ -28,6 +29,7 @@ public class SearchResolver implements DataFetcher<CompletableFuture<SearchResul
   private final EntityClient _entityClient;
 
   @Override
+  @WithSpan
   public CompletableFuture<SearchResults> get(DataFetchingEnvironment environment) {
     final SearchInput input = bindArgument(environment.getArgument("input"), SearchInput.class);
     final String entityName = EntityTypeMapper.getName(input.getType());
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
index c4beceaf24a35..37ea2bf29bac3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
@@ -14,7 +14,9 @@ public enum DataHubUsageEventType {
   BROWSE_RESULT_CLICK_EVENT("BrowseResultClickEvent"),
   ENTITY_VIEW_EVENT("EntityViewEvent"),
   ENTITY_SECTION_VIEW_EVENT("EntitySectionViewEvent"),
-  ENTITY_ACTION_EVENT("EntityActionEvent");
+  ENTITY_ACTION_EVENT("EntityActionEvent"),
+  RECOMMENDATION_IMPRESSION_EVENT("RecommendationImpressionEvent"),
+  RECOMMENDATION_CLICK_EVENT("RecommendationClickEvent");
 
   private final String type;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
index 765467f2da629..607c3425026b3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
@@ -2,50 +2,33 @@
 
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipFilter;
-import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
-
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import java.util.List;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BackoffPolicy;
 import org.elasticsearch.action.bulk.BulkProcessor;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.update.UpdateRequest;
 import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.reindex.BulkByScrollResponse;
 import org.elasticsearch.index.reindex.DeleteByQueryRequest;
 
-import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*;
-import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.*;
+import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.buildQuery;
+import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
 
 
 @Slf4j
+@RequiredArgsConstructor
 public class ESGraphWriteDAO {
-  private final BulkProcessor bulkProcessor;
-  private final IndexConvention indexConvention;
   private final RestHighLevelClient client;
-
-  public ESGraphWriteDAO(RestHighLevelClient searchClient, IndexConvention indexConvention, int bulkRequestsLimit, int bulkFlushPeriod, int numRetries,
-      long retryInterval) {
-    this.client = searchClient;
-    this.indexConvention = indexConvention;
-    this.bulkProcessor = BulkProcessor.builder(
-        (request, bulkListener) -> {
-            searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener);
-        },
-        BulkListener.getInstance())
-        .setBulkActions(bulkRequestsLimit)
-        .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod))
-        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(retryInterval), numRetries))
-        .build();
-  }
+  private final IndexConvention indexConvention;
+  private final BulkProcessor bulkProcessor;
 
   /**
    * Updates or inserts the given search document.
@@ -54,28 +37,21 @@ public ESGraphWriteDAO(RestHighLevelClient searchClient, IndexConvention indexCo
    * @param docId the ID of the document
    */
   public void upsertDocument(@Nonnull String docId, @Nonnull String document) {
-    final IndexRequest indexRequest = new IndexRequest(indexConvention.getIndexName(INDEX_NAME)).id(docId).source(document, XContentType.JSON);
-    final UpdateRequest updateRequest = new UpdateRequest(indexConvention.getIndexName(INDEX_NAME), docId).doc(document, XContentType.JSON)
-        .detectNoop(false)
-        .upsert(indexRequest);
+    final IndexRequest indexRequest =
+        new IndexRequest(indexConvention.getIndexName(INDEX_NAME)).id(docId).source(document, XContentType.JSON);
+    final UpdateRequest updateRequest =
+        new UpdateRequest(indexConvention.getIndexName(INDEX_NAME), docId).doc(document, XContentType.JSON)
+            .detectNoop(false)
+            .upsert(indexRequest);
     bulkProcessor.add(updateRequest);
   }
 
-  public BulkByScrollResponse deleteByQuery(
-      @Nullable final String sourceType,
-      @Nonnull  final Filter sourceEntityFilter,
-      @Nullable final String destinationType,
-      @Nonnull final Filter destinationEntityFilter,
-      @Nonnull final List<String> relationshipTypes,
-      @Nonnull final RelationshipFilter relationshipFilter) {
-    BoolQueryBuilder finalQuery = buildQuery(
-        sourceType,
-        sourceEntityFilter,
-        destinationType,
-        destinationEntityFilter,
-        relationshipTypes,
-        relationshipFilter
-    );
+  public BulkByScrollResponse deleteByQuery(@Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter,
+      @Nullable final String destinationType, @Nonnull final Filter destinationEntityFilter,
+      @Nonnull final List<String> relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) {
+    BoolQueryBuilder finalQuery =
+        buildQuery(sourceType, sourceEntityFilter, destinationType, destinationEntityFilter, relationshipTypes,
+            relationshipFilter);
 
     DeleteByQueryRequest deleteByQueryRequest = new DeleteByQueryRequest();
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
index d38a243a403c6..d2b3a1a260230 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
@@ -16,7 +16,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.query.filter.RelationshipFilter;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.IndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -51,6 +51,7 @@ public class ElasticSearchGraphService implements GraphService {
   private final IndexConvention _indexConvention;
   private final ESGraphWriteDAO _graphWriteDAO;
   private final ESGraphQueryDAO _graphReadDAO;
+  private final ESIndexBuilder _indexBuilder;
 
   private static final String DOC_DELIMETER = "--";
   public static final String INDEX_NAME = "graph_service_v1";
@@ -206,8 +207,8 @@ public void removeEdgesFromNode(
   public void configure() {
     log.info("Setting up elastic graph index");
     try {
-      new IndexBuilder(searchClient, _indexConvention.getIndexName(INDEX_NAME),
-          GraphRelationshipMappingsBuilder.getMappings(), Collections.emptyMap()).buildIndex();
+      _indexBuilder.buildIndex(_indexConvention.getIndexName(INDEX_NAME),
+          GraphRelationshipMappingsBuilder.getMappings(), Collections.emptyMap());
     } catch (IOException e) {
       e.printStackTrace();
     }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java
index ca801e0e29219..27cb7fdec22d3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java
@@ -4,6 +4,7 @@
 import com.linkedin.metadata.recommendation.candidatesource.RecommendationSource;
 import com.linkedin.metadata.recommendation.ranker.RecommendationModuleRanker;
 import com.linkedin.metadata.utils.ConcurrencyUtils;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -49,6 +50,7 @@ private void validateRecommendationSources(final List<RecommendationSource> cand
    * @return List of recommendation modules
    */
   @Nonnull
+  @WithSpan
   public List<RecommendationModule> listRecommendations(
       @Nonnull Urn userUrn,
       @Nonnull RecommendationRequestContext requestContext,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java
index 7470fd0f9024e..d228bd7a1f281 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java
@@ -12,7 +12,7 @@
 import com.linkedin.metadata.recommendation.SearchParams;
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.cache.NonEmptyEntitiesCache;
-import com.linkedin.metadata.utils.ConcurrencyUtils;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.net.URISyntaxException;
 import java.util.Collections;
 import java.util.Comparator;
@@ -41,9 +41,7 @@ public abstract class EntitySearchAggregationSource implements RecommendationSou
   private final EntitySearchService _entitySearchService;
   private final NonEmptyEntitiesCache _nonEmptyEntitiesCache;
 
-  protected EntitySearchAggregationSource(
-      EntitySearchService entitySearchService,
-      EntityRegistry entityRegistry,
+  protected EntitySearchAggregationSource(EntitySearchService entitySearchService, EntityRegistry entityRegistry,
       CacheManager cacheManager) {
     _entitySearchService = entitySearchService;
     _nonEmptyEntitiesCache = new NonEmptyEntitiesCache(entityRegistry, entitySearchService, cacheManager);
@@ -90,30 +88,25 @@ protected <T> boolean isValidCandidate(T candidate) {
   }
 
   @Override
-  public List<RecommendationContent> getRecommendations(
-      @Nonnull Urn userUrn,
+  @WithSpan
+  public List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn,
       @Nullable RecommendationRequestContext requestContext) {
-    // Fetch number of documents per platform for each entity type
-    List<Map<String, Long>> resultPerEntity =
-        ConcurrencyUtils.transformAndCollectAsync(_nonEmptyEntitiesCache.getNonEmptyEntities(),
-            entity -> _entitySearchService.aggregateByValue(entity, getSearchFieldName(), null, getMaxContent() * 10));
+    Map<String, Long> aggregationResult =
+        _entitySearchService.aggregateByValue(null, getSearchFieldName(), null, getMaxContent());
 
-    // Merge the aggregated result into one
-    Map<String, Long> mergedResult = resultPerEntity.stream().reduce(this::mergeAggregation).orElse(Collections.emptyMap());
-
-    if (mergedResult.isEmpty()) {
+    if (aggregationResult.isEmpty()) {
       return Collections.emptyList();
     }
 
     // If the aggregated values are not urn, simply get top k values with the most counts
     if (!isValueUrn()) {
-      return getTopKValues(mergedResult).stream()
+      return getTopKValues(aggregationResult).stream()
           .map(entry -> buildRecommendationContent(entry.getKey(), entry.getValue()))
           .collect(Collectors.toList());
     }
 
     // If the aggregated values are urns, convert key into urns
-    Map<Urn, Long> urnCounts = mergedResult.entrySet().stream().map(entry -> {
+    Map<Urn, Long> urnCounts = aggregationResult.entrySet().stream().map(entry -> {
       try {
         Urn tagUrn = Urn.createFromString(entry.getKey());
         return Optional.of(Pair.of(tagUrn, entry.getValue()));
@@ -135,7 +128,8 @@ public List<RecommendationContent> getRecommendations(
 
   // Get top K entries with the most count
   private <T> List<Map.Entry<T, Long>> getTopKValues(Map<T, Long> countMap) {
-    final PriorityQueue<Map.Entry<T, Long>> queue = new PriorityQueue<>(getMaxContent(), Map.Entry.comparingByValue(Comparator.naturalOrder()));
+    final PriorityQueue<Map.Entry<T, Long>> queue =
+        new PriorityQueue<>(getMaxContent(), Map.Entry.comparingByValue(Comparator.naturalOrder()));
     for (Map.Entry<T, Long> entry : countMap.entrySet()) {
       if (queue.size() < getMaxContent() && isValidCandidate(entry.getKey())) {
         queue.add(entry);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
index 43c0a77c95678..5bc306c05a487 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
@@ -14,6 +14,7 @@
 import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.metrics.MetricUtils;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.List;
@@ -73,6 +74,7 @@ public boolean isEligible(@Nonnull Urn userUrn, @Nonnull RecommendationRequestCo
   }
 
   @Override
+  @WithSpan
   public List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn,
       @Nonnull RecommendationRequestContext requestContext) {
     SearchRequest searchRequest = buildSearchRequest(userUrn);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
index b1e0ef6ad6e59..b6f744ab3a660 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
@@ -14,6 +14,7 @@
 import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.metrics.MetricUtils;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.List;
@@ -74,6 +75,7 @@ public boolean isEligible(@Nonnull Urn userUrn, @Nonnull RecommendationRequestCo
   }
 
   @Override
+  @WithSpan
   public List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn,
       @Nonnull RecommendationRequestContext requestContext) {
     SearchRequest searchRequest = buildSearchRequest(userUrn);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java
index adb7e8177f592..7d43e3652b492 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java
@@ -6,6 +6,7 @@
 import com.linkedin.metadata.recommendation.RecommendationModule;
 import com.linkedin.metadata.recommendation.RecommendationRenderType;
 import com.linkedin.metadata.recommendation.RecommendationRequestContext;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.util.List;
 import java.util.Optional;
 import javax.annotation.Nonnull;
@@ -47,6 +48,7 @@ public interface RecommendationSource {
    * @param requestContext Context of where the recommendations are being requested
    * @return list of recommendation candidates
    */
+  @WithSpan
   List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn, @Nonnull RecommendationRequestContext requestContext);
 
   /**
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index 8a6ab9c68a4b2..94f1fd965df8c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -93,14 +93,14 @@ AutoCompleteResult autoComplete(@Nonnull String entityName, @Nonnull String quer
   /**
    * Returns number of documents per field value given the field and filters
    *
-   * @param entityName name of the entity
+   * @param entityName name of the entity, if empty aggregate over all entities
    * @param field the field name for aggregate
    * @param requestParams filters to apply before aggregating
    * @param limit the number of aggregations to return
    * @return
    */
   @Nonnull
-  Map<String, Long> aggregateByValue(@Nonnull String entityName, @Nonnull String field, @Nullable Filter requestParams,
+  Map<String, Long> aggregateByValue(@Nullable String entityName, @Nonnull String field, @Nullable Filter requestParams,
       int limit);
 
   /**
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index da22ecd561139..243f958bf2aed 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -7,7 +7,7 @@
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.SearchResult;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO;
 import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO;
 import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
@@ -23,7 +23,7 @@
 @RequiredArgsConstructor
 public class ElasticSearchService implements EntitySearchService {
 
-  private final ESIndexBuilders indexBuilders;
+  private final EntityIndexBuilders indexBuilders;
   private final ESSearchDAO esSearchDAO;
   private final ESBrowseDAO esBrowseDAO;
   private final ESWriteDAO esWriteDAO;
@@ -87,7 +87,7 @@ public AutoCompleteResult autoComplete(@Nonnull String entityName, @Nonnull Stri
 
   @Nonnull
   @Override
-  public Map<String, Long> aggregateByValue(@Nonnull String entityName, @Nonnull String field,
+  public Map<String, Long> aggregateByValue(@Nullable String entityName, @Nonnull String field,
       @Nullable Filter requestParams, int limit) {
     log.debug("Aggregating by value: {}, field: {}, requestParams: {}, limit: {}", entityName, field, requestParams,
         limit);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/IndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
similarity index 80%
rename from metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/IndexBuilder.java
rename to metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
index e808db22c2875..8777d592379b1 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/IndexBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
@@ -1,10 +1,14 @@
 package com.linkedin.metadata.search.elasticsearch.indexbuilder;
 
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.MapDifference;
 import com.google.common.collect.Maps;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.TimeUnit;
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
@@ -28,22 +32,28 @@
 
 @Slf4j
 @RequiredArgsConstructor
-public class IndexBuilder {
+public class ESIndexBuilder {
 
   private final RestHighLevelClient searchClient;
-  private final String indexName;
-  private final Map<String, Object> mappings;
-  private final Map<String, Object> settings;
+  private final int numShards;
+  private final int numReplicas;
 
   private static final int NUM_RETRIES = 3;
+  private static final List<String> SETTINGS_TO_COMPARE = ImmutableList.of("number_of_shards", "number_of_replicas");
 
-  public void buildIndex() throws IOException {
+  public void buildIndex(String indexName, Map<String, Object> mappings, Map<String, Object> settings)
+      throws IOException {
     // Check if index exists
     boolean exists = searchClient.indices().exists(new GetIndexRequest(indexName), RequestOptions.DEFAULT);
 
+    Map<String, Object> baseSettings = new HashMap<>(settings);
+    baseSettings.put("number_of_shards", numShards);
+    baseSettings.put("number_of_replicas", numReplicas);
+    Map<String, Object> finalSettings = ImmutableMap.of("index", baseSettings);
+
     // If index doesn't exist, create index
     if (!exists) {
-      createIndex(indexName, mappings, settings);
+      createIndex(indexName, mappings, finalSettings);
       return;
     }
 
@@ -65,7 +75,7 @@ public void buildIndex() throws IOException {
         .next();
 
     // If there are no updates to mappings, return
-    if (mappingsDiff.areEqual() && equals(settings, oldSettings)) {
+    if (mappingsDiff.areEqual() && equals(finalSettings, oldSettings)) {
       log.info("No updates to index {}", indexName);
       return;
     }
@@ -77,9 +87,10 @@ public void buildIndex() throws IOException {
     }
 
     String tempIndexName = indexName + "_" + System.currentTimeMillis();
-    createIndex(tempIndexName, mappings, settings);
+    createIndex(tempIndexName, mappings, finalSettings);
     try {
-      searchClient.reindex(new ReindexRequest().setSourceIndices(indexName).setDestIndex(tempIndexName),
+      searchClient.reindex(
+          new ReindexRequest().setSourceIndices(indexName).setDestIndex(tempIndexName),
           RequestOptions.DEFAULT);
     } catch (Exception e) {
       log.info("Failed to reindex {} to {}: Exception {}", indexName, tempIndexName, e.toString());
@@ -151,14 +162,22 @@ private void createIndex(String indexName, Map<String, Object> mappings, Map<Str
   }
 
   private boolean equals(Map<String, Object> newSettings, Settings oldSettings) {
-    if (!newSettings.containsKey("index") || !((Map<String, Object>) newSettings.get("index")).containsKey(
-        "analysis")) {
+    if (!newSettings.containsKey("index")) {
+      return true;
+    }
+    Map<String, Object> indexSettings = (Map<String, Object>) newSettings.get("index");
+    if (!indexSettings.containsKey("analysis")) {
       return true;
     }
-    Map<String, Object> newAnalysis =
-        (Map<String, Object>) ((Map<String, Object>) newSettings.get("index")).get("analysis");
+    // Compare analysis section
+    Map<String, Object> newAnalysis = (Map<String, Object>) indexSettings.get("analysis");
     Settings oldAnalysis = oldSettings.getByPrefix("index.analysis.");
-    return equalsGroup(newAnalysis, oldAnalysis);
+    if (!equalsGroup(newAnalysis, oldAnalysis)) {
+      return false;
+    }
+    // Compare remaining settings
+    return SETTINGS_TO_COMPARE.stream()
+        .noneMatch(settingKey -> Objects.equals(indexSettings.get(settingKey), oldSettings.get("index." + settingKey)));
   }
 
   private boolean equalsGroup(Map<String, Object> newSettings, Settings oldSettings) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
index 56eb2b354efa2..6709a1160c03c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
@@ -5,13 +5,12 @@
 import java.util.Map;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RestHighLevelClient;
 
 
 @Slf4j
 @RequiredArgsConstructor
 public class EntityIndexBuilder {
-  private final RestHighLevelClient searchClient;
+  private final ESIndexBuilder indexBuilder;
   private final EntitySpec entitySpec;
   private final SettingsBuilder settingsBuilder;
   private final String indexName;
@@ -21,6 +20,6 @@ public void buildIndex() throws IOException {
     Map<String, Object> mappings = MappingsBuilder.getMappings(entitySpec);
     Map<String, Object> settings = settingsBuilder.getSettings();
 
-    new IndexBuilder(searchClient, indexName, mappings, settings).buildIndex();
+    indexBuilder.buildIndex(indexName, mappings, settings);
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
similarity index 79%
rename from metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilders.java
rename to metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
index 37bcd875e953d..349187bd347a3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
@@ -5,20 +5,19 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import lombok.RequiredArgsConstructor;
-import org.elasticsearch.client.RestHighLevelClient;
 
 
 @RequiredArgsConstructor
-public class ESIndexBuilders {
+public class EntityIndexBuilders {
+  private final ESIndexBuilder indexBuilder;
   private final EntityRegistry entityRegistry;
-  private final RestHighLevelClient searchClient;
   private final IndexConvention indexConvention;
   private final SettingsBuilder settingsBuilder;
 
   public void buildAll() {
     for (EntitySpec entitySpec : entityRegistry.getEntitySpecs().values()) {
       try {
-        new EntityIndexBuilder(searchClient, entitySpec, settingsBuilder,
+        new EntityIndexBuilder(indexBuilder, entitySpec, settingsBuilder,
             indexConvention.getIndexName(entitySpec)).buildIndex();
       } catch (IOException e) {
         e.printStackTrace();
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/SettingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/SettingsBuilder.java
index 9643a51fe221a..16118fcecd3c9 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/SettingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/SettingsBuilder.java
@@ -28,7 +28,7 @@ private static Map<String, Object> buildSettings(List<String> urnStopWords) {
         .put("normalizer", buildNormalizers())
         .put("analyzer", buildAnalyzers())
         .build());
-    return ImmutableMap.of("index", settings.build());
+    return settings.build();
   }
 
   private static Map<String, Object> buildFilters(List<String> urnStopWords) {
@@ -88,7 +88,7 @@ private static Map<String, Object> buildAnalyzers() {
 
     // Analyzer for text tokenized into words (split by spaces, periods, and slashes)
     analyzers.put("word_delimited", ImmutableMap.<String, Object>builder().put("tokenizer", "main_tokenizer")
-        .put("filter", ImmutableList.of("custom_delimiter", "lowercase"))
+        .put("filter", ImmutableList.of("custom_delimiter", "lowercase", "stop"))
         .build());
 
     // Analyzer for splitting by slashes (used to get depth of browsePath)
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
index def29046b4eef..23089d0199efe 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
@@ -14,8 +14,6 @@
 import com.linkedin.metadata.utils.metrics.MetricUtils;
 import io.opentelemetry.extension.annotations.WithSpan;
 import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.Map;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -61,7 +59,7 @@ private SearchResult executeAndExtract(@Nonnull EntitySpec entitySpec, @Nonnull
       // extract results, validated against document model as well
       return SearchRequestHandler.getBuilder(entitySpec).extractResult(searchResponse, from, size);
     } catch (Exception e) {
-      log.error("Search query failed:" + e.getMessage());
+      log.error("Search query failed", e);
       throw new ESQueryException("Search query failed:", e);
     }
   }
@@ -84,8 +82,8 @@ public SearchResult search(@Nonnull String entityName, @Nonnull String input, @N
     Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time();
     EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
     // Step 1: construct the query
-    final SearchRequest searchRequest =
-        SearchRequestHandler.getBuilder(entitySpec).getSearchRequest(finalInput, postFilters, sortCriterion, from, size);
+    final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec)
+        .getSearchRequest(finalInput, postFilters, sortCriterion, from, size);
     searchRequest.indices(indexConvention.getIndexName(entitySpec));
     searchRequestTimer.stop();
     // Step 2: execute the query and extract results, validated against document model as well
@@ -141,23 +139,32 @@ public AutoCompleteResult autoComplete(@Nonnull String entityName, @Nonnull Stri
   /**
    * Returns number of documents per field value given the field and filters
    *
-   * @param entityName name of the entity
+   * @param entityName name of the entity, if null, aggregates over all entities
    * @param field the field name for aggregate
    * @param requestParams filters to apply before aggregating
    * @param limit the number of aggregations to return
    * @return
    */
   @Nonnull
-  public Map<String, Long> aggregateByValue(@Nonnull String entityName, @Nonnull String field,
+  public Map<String, Long> aggregateByValue(@Nullable String entityName, @Nonnull String field,
       @Nullable Filter requestParams, int limit) {
-    EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
-    final SearchRequest searchRequest =
-        SearchRequestHandler.getBuilder(entitySpec).getAggregationRequest(field, requestParams, limit);
-    searchRequest.indices(indexConvention.getIndexName(entitySpec));
-    return executeAndExtract(entitySpec, searchRequest, 0, 0).getMetadata()
-        .getAggregations()
-        .stream()
-        .findFirst().<Map<String, Long>>map(aggregationMetadata -> new HashMap<>(aggregationMetadata.getAggregations()))
-        .orElse(Collections.emptyMap());
+    final SearchRequest searchRequest = SearchRequestHandler.getAggregationRequest(field, requestParams, limit);
+    String indexName;
+    if (entityName == null) {
+      indexName = indexConvention.getAllEntityIndicesPattern();
+    } else {
+      EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
+      indexName = indexConvention.getIndexName(entitySpec);
+    }
+    searchRequest.indices(indexName);
+
+    try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esSearch").time()) {
+      final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
+      // extract results, validated against document model as well
+      return SearchRequestHandler.extractTermAggregations(searchResponse, field);
+    } catch (Exception e) {
+      log.error("Aggregation query failed", e);
+      throw new ESQueryException("Aggregation query failed:", e);
+    }
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index b5093292d3e3f..548c20057e1fa 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -4,7 +4,6 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.template.DoubleMap;
 import com.linkedin.data.template.LongMap;
-import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.SearchableFieldSpec;
 import com.linkedin.metadata.models.annotation.SearchableAnnotation;
@@ -20,11 +19,13 @@
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchResultMetadata;
 import com.linkedin.metadata.search.features.Features;
+import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.utils.SearchUtil;
 import io.opentelemetry.extension.annotations.WithSpan;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -52,6 +53,7 @@
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
 
+
 @Slf4j
 public class SearchRequestHandler {
 
@@ -96,7 +98,7 @@ private Set<String> getDefaultQueryFieldNames() {
         .collect(Collectors.toSet());
   }
 
-  private BoolQueryBuilder getFilterQuery(@Nullable Filter filter) {
+  private static BoolQueryBuilder getFilterQuery(@Nullable Filter filter) {
     BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter);
     // Filter out entities that are marked "removed"
     filterQuery.mustNot(QueryBuilders.matchQuery("removed", true));
@@ -123,8 +125,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
 
     searchSourceBuilder.from(from);
     searchSourceBuilder.size(size);
-
-    searchSourceBuilder.query(getQuery(input));
+    searchSourceBuilder.fetchSource("urn", null);
 
     BoolQueryBuilder filterQuery = getFilterQuery(filter);
     searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input)).must(filterQuery));
@@ -171,7 +172,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr
    * @return {@link SearchRequest} that contains the aggregation query
    */
   @Nonnull
-  public SearchRequest getAggregationRequest(@Nonnull String field, @Nullable Filter filter, int limit) {
+  public static SearchRequest getAggregationRequest(@Nonnull String field, @Nullable Filter filter, int limit) {
     SearchRequest searchRequest = new SearchRequest();
     BoolQueryBuilder filterQuery = getFilterQuery(filter);
 
@@ -292,7 +293,7 @@ private SearchResultMetadata extractSearchResultMetadata(@Nonnull SearchResponse
     final SearchResultMetadata searchResultMetadata =
         new SearchResultMetadata().setAggregations(new AggregationMetadataArray());
 
-    final List<AggregationMetadata> aggregationMetadataList = extractAggregation(searchResponse);
+    final List<AggregationMetadata> aggregationMetadataList = extractAggregationMetadata(searchResponse);
     if (!aggregationMetadataList.isEmpty()) {
       searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList));
     }
@@ -300,7 +301,7 @@ private SearchResultMetadata extractSearchResultMetadata(@Nonnull SearchResponse
     return searchResultMetadata;
   }
 
-  private List<AggregationMetadata> extractAggregation(@Nonnull SearchResponse searchResponse) {
+  private List<AggregationMetadata> extractAggregationMetadata(@Nonnull SearchResponse searchResponse) {
     final List<AggregationMetadata> aggregationMetadataList = new ArrayList<>();
 
     if (searchResponse.getAggregations() == null) {
@@ -322,6 +323,20 @@ private List<AggregationMetadata> extractAggregation(@Nonnull SearchResponse sea
     return aggregationMetadataList;
   }
 
+  @WithSpan
+  public static Map<String, Long> extractTermAggregations(@Nonnull SearchResponse searchResponse,
+      @Nonnull String aggregationName) {
+    if (searchResponse.getAggregations() == null) {
+      return Collections.emptyMap();
+    }
+
+    Aggregation aggregation = searchResponse.getAggregations().get(aggregationName);
+    if (aggregation == null) {
+      return Collections.emptyMap();
+    }
+    return extractTermAggregations((ParsedTerms) aggregation);
+  }
+
   /**
    * Extracts term aggregations give a parsed term.
    *
@@ -329,7 +344,7 @@ private List<AggregationMetadata> extractAggregation(@Nonnull SearchResponse sea
    * @return a map with aggregation key and corresponding doc counts
    */
   @Nonnull
-  private Map<String, Long> extractTermAggregations(@Nonnull ParsedTerms terms) {
+  private static Map<String, Long> extractTermAggregations(@Nonnull ParsedTerms terms) {
 
     final Map<String, Long> aggResult = new HashMap<>();
     List<? extends Terms.Bucket> bucketList = terms.getBuckets();
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
index 12ba9a84dca10..d557f3227e58d 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
@@ -4,8 +4,8 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import javax.annotation.Nonnull;
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BackoffPolicy;
 import org.elasticsearch.action.bulk.BulkProcessor;
 import org.elasticsearch.action.delete.DeleteRequest;
 import org.elasticsearch.action.index.IndexRequest;
@@ -14,33 +14,19 @@
 import org.elasticsearch.client.RestHighLevelClient;
 import org.elasticsearch.client.indices.GetIndexRequest;
 import org.elasticsearch.client.indices.GetIndexResponse;
-import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.index.reindex.DeleteByQueryRequest;
 
 
 @Slf4j
+@RequiredArgsConstructor
 public class ESWriteDAO {
 
   private final EntityRegistry entityRegistry;
   private final RestHighLevelClient searchClient;
-  private final BulkProcessor bulkProcessor;
   private final IndexConvention indexConvention;
-
-  public ESWriteDAO(EntityRegistry entityRegistry, RestHighLevelClient searchClient, IndexConvention indexConvention,
-      int bulkRequestsLimit, int bulkFlushPeriod, int numRetries, long retryInterval) {
-    this.entityRegistry = entityRegistry;
-    this.indexConvention = indexConvention;
-    this.searchClient = searchClient;
-    this.bulkProcessor = BulkProcessor.builder(
-        (request, bulkListener) -> searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener),
-        BulkListener.getInstance())
-        .setBulkActions(bulkRequestsLimit)
-        .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod))
-        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(retryInterval), numRetries))
-        .build();
-  }
+  private final BulkProcessor bulkProcessor;
 
   /**
    * Updates or inserts the given search document.
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 5d2c1d7891da1..a61d5622e300b 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -1,7 +1,6 @@
 package com.linkedin.metadata.search.transformer;
 
 import com.fasterxml.jackson.databind.JsonNode;
-
 import com.fasterxml.jackson.databind.node.ArrayNode;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -16,6 +15,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
 
@@ -23,12 +23,14 @@
  * Class that provides a utility function that transforms the snapshot object into a search document
  */
 @Slf4j
+@RequiredArgsConstructor
 public class SearchDocumentTransformer {
 
-  private SearchDocumentTransformer() {
-  }
+  // Number of elements to index for a given array.
+  // The cap improves search speed when having fields with a large number of elements
+  private final int maxArrayLength;
 
-  public static Optional<String> transformSnapshot(
+  public Optional<String> transformSnapshot(
       final RecordTemplate snapshot,
       final EntitySpec entitySpec,
       final Boolean forDelete
@@ -44,7 +46,7 @@ public static Optional<String> transformSnapshot(
     return Optional.of(searchDocument.toString());
   }
 
-  public static Optional<String> transformAspect(
+  public Optional<String> transformAspect(
       final Urn urn,
       final RecordTemplate aspect,
       final AspectSpec aspectSpec,
@@ -61,7 +63,7 @@ public static Optional<String> transformAspect(
     return Optional.of(searchDocument.toString());
   }
 
-  public static void setValue(final SearchableFieldSpec fieldSpec, final List<Object> fieldValues,
+  public void setValue(final SearchableFieldSpec fieldSpec, final List<Object> fieldValues,
       final ObjectNode searchDocument, final Boolean forDelete) {
     DataSchema.Type valueType = fieldSpec.getPegasusSchema().getType();
     Optional<Object> firstValue = fieldValues.stream().findFirst();
@@ -109,14 +111,15 @@ public static void setValue(final SearchableFieldSpec fieldSpec, final List<Obje
 
     if (isArray || valueType == DataSchema.Type.MAP) {
       ArrayNode arrayNode = JsonNodeFactory.instance.arrayNode();
-      fieldValues.forEach(value -> getNodeForValue(valueType, value, fieldType).ifPresent(arrayNode::add));
+      fieldValues.subList(0, Math.min(fieldValues.size(), maxArrayLength))
+          .forEach(value -> getNodeForValue(valueType, value, fieldType).ifPresent(arrayNode::add));
       searchDocument.set(fieldName, arrayNode);
     } else if (!fieldValues.isEmpty()) {
       getNodeForValue(valueType, fieldValues.get(0), fieldType).ifPresent(node -> searchDocument.set(fieldName, node));
     }
   }
 
-  private static Optional<JsonNode> getNodeForValue(final DataSchema.Type schemaFieldType, final Object fieldValue,
+  private Optional<JsonNode> getNodeForValue(final DataSchema.Type schemaFieldType, final Object fieldValue,
       final FieldType fieldType) {
     switch (schemaFieldType) {
       case BOOLEAN:
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
index ac8f7d970c5fe..799cb7583c8af 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
@@ -1,15 +1,14 @@
 package com.linkedin.metadata.systemmetadata;
 
 import com.google.common.collect.ImmutableList;
-import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import javax.annotation.Nonnull;
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BackoffPolicy;
 import org.elasticsearch.action.bulk.BulkProcessor;
 import org.elasticsearch.action.delete.DeleteRequest;
 import org.elasticsearch.action.delete.DeleteResponse;
@@ -19,7 +18,6 @@
 import org.elasticsearch.action.update.UpdateRequest;
 import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
@@ -33,29 +31,15 @@
 import org.elasticsearch.search.sort.FieldSortBuilder;
 import org.elasticsearch.search.sort.SortOrder;
 
-import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.*;
+import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME;
 
 
 @Slf4j
+@RequiredArgsConstructor
 public class ESSystemMetadataDAO {
-  private final BulkProcessor bulkProcessor;
-  private final IndexConvention indexConvention;
   private final RestHighLevelClient client;
-
-  public ESSystemMetadataDAO(RestHighLevelClient searchClient, IndexConvention indexConvention, int bulkRequestsLimit, int bulkFlushPeriod, int numRetries,
-      long retryInterval) {
-    this.client = searchClient;
-    this.indexConvention = indexConvention;
-    this.bulkProcessor = BulkProcessor.builder(
-        (request, bulkListener) -> {
-            searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener);
-        },
-        BulkListener.getInstance())
-        .setBulkActions(bulkRequestsLimit)
-        .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod))
-        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(retryInterval), numRetries))
-        .build();
-  }
+  private final IndexConvention indexConvention;
+  private final BulkProcessor bulkProcessor;
 
   /**
    * Updates or inserts the given search document.
@@ -64,16 +48,16 @@ public ESSystemMetadataDAO(RestHighLevelClient searchClient, IndexConvention ind
    * @param docId the ID of the document
    */
   public void upsertDocument(@Nonnull String docId, @Nonnull String document) {
-    final IndexRequest indexRequest = new IndexRequest(indexConvention.getIndexName(INDEX_NAME)).id(docId).source(document, XContentType.JSON);
-    final UpdateRequest updateRequest = new UpdateRequest(indexConvention.getIndexName(INDEX_NAME), docId).doc(document, XContentType.JSON)
-        .detectNoop(false)
-        .upsert(indexRequest);
+    final IndexRequest indexRequest =
+        new IndexRequest(indexConvention.getIndexName(INDEX_NAME)).id(docId).source(document, XContentType.JSON);
+    final UpdateRequest updateRequest =
+        new UpdateRequest(indexConvention.getIndexName(INDEX_NAME), docId).doc(document, XContentType.JSON)
+            .detectNoop(false)
+            .upsert(indexRequest);
     bulkProcessor.add(updateRequest);
   }
 
-  public DeleteResponse deleteByDocId(
-      @Nonnull final String docId
-  ) {
+  public DeleteResponse deleteByDocId(@Nonnull final String docId) {
     DeleteRequest deleteRequest = new DeleteRequest(indexConvention.getIndexName(INDEX_NAME), docId);
 
     try {
@@ -86,9 +70,7 @@ public DeleteResponse deleteByDocId(
     return null;
   }
 
-  public BulkByScrollResponse deleteByUrn(
-      @Nonnull final String urn
-  ) {
+  public BulkByScrollResponse deleteByUrn(@Nonnull final String urn) {
     BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
     finalQuery.must(QueryBuilders.termQuery("urn", urn));
 
@@ -114,7 +96,8 @@ public SearchResponse findByParams(Map<String, String> searchParams) {
     SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
 
     BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
-    searchParams.entrySet().forEach(entry -> finalQuery.must(QueryBuilders.termQuery(entry.getKey(), entry.getValue())));
+    searchParams.entrySet()
+        .forEach(entry -> finalQuery.must(QueryBuilders.termQuery(entry.getKey(), entry.getValue())));
     searchSourceBuilder.query(finalQuery);
 
     // this is the max page size elastic will return
@@ -160,12 +143,10 @@ public SearchResponse findRuns(Integer pageOffset, Integer pageSize) {
     bucketSort.size(pageSize);
     bucketSort.from(pageOffset);
 
-    TermsAggregationBuilder aggregation =
-        AggregationBuilders.terms("runId")
-            .field("runId")
-            .subAggregation(AggregationBuilders.max("maxTimestamp").field("lastUpdated"))
-            .subAggregation(bucketSort);
-
+    TermsAggregationBuilder aggregation = AggregationBuilders.terms("runId")
+        .field("runId")
+        .subAggregation(AggregationBuilders.max("maxTimestamp").field("lastUpdated"))
+        .subAggregation(bucketSort);
 
     searchSourceBuilder.aggregation(aggregation);
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
index ce7956481787a..50e5c9c6e5a5a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
@@ -4,7 +4,7 @@
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.metadata.run.AspectRowSummary;
 import com.linkedin.metadata.run.IngestionRunSummary;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.IndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.mxe.SystemMetadata;
 import java.io.IOException;
@@ -40,9 +40,10 @@
 @RequiredArgsConstructor
 public class ElasticSearchSystemMetadataService implements SystemMetadataService {
 
-  private final RestHighLevelClient searchClient;
+  private final RestHighLevelClient _searchClient;
   private final IndexConvention _indexConvention;
   private final ESSystemMetadataDAO _esDAO;
+  private final ESIndexBuilder _indexBuilder;
 
   private static final String DOC_DELIMETER = "--";
   public static final String INDEX_NAME = "system_metadata_service_v1";
@@ -161,10 +162,9 @@ public List<IngestionRunSummary> listRuns(Integer pageOffset, Integer pageSize)
   @Override
   public void configure() {
     log.info("Setting up system metadata index");
-    IndexBuilder ib = new IndexBuilder(this.searchClient, _indexConvention.getIndexName(INDEX_NAME),
-        SystemMetadataMappingsBuilder.getMappings(), Collections.emptyMap());
     try {
-      ib.buildIndex();
+      _indexBuilder.buildIndex(_indexConvention.getIndexName(INDEX_NAME), SystemMetadataMappingsBuilder.getMappings(),
+          Collections.emptyMap());
     } catch (IOException ie) {
       throw new RuntimeException("Could not configure system metadata index", ie);
     }
@@ -175,7 +175,7 @@ public void clear() {
     DeleteByQueryRequest deleteRequest =
         new DeleteByQueryRequest(_indexConvention.getIndexName(INDEX_NAME)).setQuery(QueryBuilders.matchAllQuery());
     try {
-      searchClient.deleteByQuery(deleteRequest, RequestOptions.DEFAULT);
+      _searchClient.deleteByQuery(deleteRequest, RequestOptions.DEFAULT);
     } catch (Exception e) {
       log.error("Failed to clear system metadata service: {}", e.toString());
     }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index dd7076d23e8f0..fa1606a28e90c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -8,13 +8,12 @@
 import com.linkedin.data.ByteString;
 import com.linkedin.metadata.aspect.EnvelopedAspect;
 import com.linkedin.metadata.dao.exception.ESQueryException;
-import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.dao.utils.RecordUtils;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.Criterion;
 import com.linkedin.metadata.query.filter.Filter;
-import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
+import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.MappingsBuilder;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders;
@@ -34,7 +33,6 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BackoffPolicy;
 import org.elasticsearch.action.bulk.BulkProcessor;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.search.SearchRequest;
@@ -42,7 +40,6 @@
 import org.elasticsearch.action.update.UpdateRequest;
 import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
@@ -68,18 +65,11 @@ public class ElasticSearchTimeseriesAspectService implements TimeseriesAspectSer
 
   public ElasticSearchTimeseriesAspectService(@Nonnull RestHighLevelClient searchClient,
       @Nonnull IndexConvention indexConvention, @Nonnull TimeseriesAspectIndexBuilders indexBuilders,
-      @Nonnull EntityRegistry entityRegistry, int bulkRequestsLimit, int bulkFlushPeriod, int numRetries,
-      long retryInterval) {
+      @Nonnull EntityRegistry entityRegistry, @Nonnull BulkProcessor bulkProcessor) {
     _indexConvention = indexConvention;
     _indexBuilders = indexBuilders;
     _searchClient = searchClient;
-    _bulkProcessor = BulkProcessor.builder(
-            (request, bulkListener) -> searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener),
-            BulkListener.getInstance())
-        .setBulkActions(bulkRequestsLimit)
-        .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod))
-        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(retryInterval), numRetries))
-        .build();
+    _bulkProcessor = bulkProcessor;
 
     _esAggregatedStatsDAO = new ESAggregatedStatsDAO(indexConvention, searchClient, entityRegistry);
   }
@@ -128,12 +118,8 @@ public void upsertDocument(@Nonnull String entityName, @Nonnull String aspectNam
   }
 
   @Override
-  public List<EnvelopedAspect> getAspectValues(
-      @Nonnull final Urn urn,
-      @Nonnull String entityName,
-      @Nonnull String aspectName,
-      @Nullable Long startTimeMillis,
-      @Nullable Long endTimeMillis,
+  public List<EnvelopedAspect> getAspectValues(@Nonnull final Urn urn, @Nonnull String entityName,
+      @Nonnull String aspectName, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis,
       @Nullable Integer limit) {
     final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(null);
     filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString()));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
index f8c48bfbc8baa..d0fd26d737cf0 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
@@ -3,20 +3,19 @@
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.registry.EntityRegistry;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.IndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.io.IOException;
 import java.util.Collections;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RestHighLevelClient;
 
 
 @Slf4j
 @RequiredArgsConstructor
 public class TimeseriesAspectIndexBuilders {
+  private final ESIndexBuilder _indexBuilder;
   private final EntityRegistry _entityRegistry;
-  private final RestHighLevelClient _searchClient;
   private final IndexConvention _indexConvention;
 
   public void buildAll() {
@@ -24,9 +23,9 @@ public void buildAll() {
       for (AspectSpec aspectSpec : entitySpec.getAspectSpecs()) {
         if (aspectSpec.isTimeseries()) {
           try {
-            new IndexBuilder(_searchClient,
+            _indexBuilder.buildIndex(
                 _indexConvention.getTimeseriesAspectIndexName(entitySpec.getName(), aspectSpec.getName()),
-                MappingsBuilder.getMappings(aspectSpec), Collections.emptyMap()).buildIndex();
+                MappingsBuilder.getMappings(aspectSpec), Collections.emptyMap());
           } catch (IOException e) {
             log.error("Issue while building timeseries field index for entity {} aspect {}", entitySpec.getName(),
                 aspectSpec.getName());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
index 81c03bf4b21fb..3d270662014ac 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/ElasticSearchGraphServiceTest.java
@@ -8,8 +8,13 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.query.filter.RelationshipFilter;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import javax.annotation.Nonnull;
 import org.apache.http.HttpHost;
 import org.apache.http.impl.nio.reactor.IOReactorConfig;
 import org.elasticsearch.client.RestClient;
@@ -22,15 +27,10 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
-import javax.annotation.Nonnull;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-
 import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
+import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
 import static org.testng.Assert.assertEquals;
 
-import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
 
 public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
 
@@ -75,8 +75,10 @@ private RestHighLevelClient buildRestClient() {
   @Nonnull
   private ElasticSearchGraphService buildService() {
     ESGraphQueryDAO readDAO = new ESGraphQueryDAO(_searchClient, _indexConvention);
-    ESGraphWriteDAO writeDAO = new ESGraphWriteDAO(_searchClient, _indexConvention, 1, 1, 1, 1);
-    return new ElasticSearchGraphService(_searchClient, _indexConvention, writeDAO, readDAO);
+    ESGraphWriteDAO writeDAO =
+        new ESGraphWriteDAO(_searchClient, _indexConvention, ElasticSearchServiceTest.getBulkProcessor(_searchClient));
+    return new ElasticSearchGraphService(_searchClient, _indexConvention, writeDAO, readDAO,
+        ElasticSearchServiceTest.getIndexBuilder(_searchClient));
   }
 
   @AfterTest
@@ -85,7 +87,8 @@ public void tearDown() {
   }
 
   @Override
-  protected @Nonnull GraphService getGraphService() {
+  @Nonnull
+  protected GraphService getGraphService() {
     return _client;
   }
 
@@ -107,41 +110,35 @@ protected void assertEqualsAnyOrder(RelatedEntitiesResult actual, RelatedEntitie
   protected <T> void assertEqualsAnyOrder(List<T> actual, List<T> expected, Comparator<T> comparator) {
     // https://github.com/linkedin/datahub/issues/3115
     // ElasticSearchGraphService produces duplicates, which is here ignored until fixed
-    assertEquals(
-            new HashSet<>(actual),
-            new HashSet<>(expected)
-    );
+    assertEquals(new HashSet<>(actual), new HashSet<>(expected));
   }
 
   @Override
-  public void testFindRelatedEntitiesSourceEntityFilter(Filter sourceEntityFilter,
-                                                        List<String> relationshipTypes,
-                                                        RelationshipFilter relationships,
-                                                        List<RelatedEntity> expectedRelatedEntities) throws Exception {
+  public void testFindRelatedEntitiesSourceEntityFilter(Filter sourceEntityFilter, List<String> relationshipTypes,
+      RelationshipFilter relationships, List<RelatedEntity> expectedRelatedEntities) throws Exception {
     if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
       // https://github.com/linkedin/datahub/issues/3114
       throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
     }
-    super.testFindRelatedEntitiesSourceEntityFilter(sourceEntityFilter, relationshipTypes, relationships, expectedRelatedEntities);
+    super.testFindRelatedEntitiesSourceEntityFilter(sourceEntityFilter, relationshipTypes, relationships,
+        expectedRelatedEntities);
   }
 
   @Override
   public void testFindRelatedEntitiesDestinationEntityFilter(Filter destinationEntityFilter,
-                                                             List<String> relationshipTypes,
-                                                             RelationshipFilter relationships,
-                                                             List<RelatedEntity> expectedRelatedEntities) throws Exception {
+      List<String> relationshipTypes, RelationshipFilter relationships, List<RelatedEntity> expectedRelatedEntities)
+      throws Exception {
     if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
       // https://github.com/linkedin/datahub/issues/3114
       throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
     }
-    super.testFindRelatedEntitiesDestinationEntityFilter(destinationEntityFilter, relationshipTypes, relationships, expectedRelatedEntities);
+    super.testFindRelatedEntitiesDestinationEntityFilter(destinationEntityFilter, relationshipTypes, relationships,
+        expectedRelatedEntities);
   }
 
   @Override
-  public void testFindRelatedEntitiesSourceType(String datasetType,
-                                                List<String> relationshipTypes,
-                                                RelationshipFilter relationships,
-                                                List<RelatedEntity> expectedRelatedEntities) throws Exception {
+  public void testFindRelatedEntitiesSourceType(String datasetType, List<String> relationshipTypes,
+      RelationshipFilter relationships, List<RelatedEntity> expectedRelatedEntities) throws Exception {
     if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
       // https://github.com/linkedin/datahub/issues/3114
       throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
@@ -154,10 +151,8 @@ public void testFindRelatedEntitiesSourceType(String datasetType,
   }
 
   @Override
-  public void testFindRelatedEntitiesDestinationType(String datasetType,
-                                                     List<String> relationshipTypes,
-                                                     RelationshipFilter relationships,
-                                                     List<RelatedEntity> expectedRelatedEntities) throws Exception {
+  public void testFindRelatedEntitiesDestinationType(String datasetType, List<String> relationshipTypes,
+      RelationshipFilter relationships, List<RelatedEntity> expectedRelatedEntities) throws Exception {
     if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
       // https://github.com/linkedin/datahub/issues/3114
       throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
@@ -166,7 +161,8 @@ public void testFindRelatedEntitiesDestinationType(String datasetType,
       // https://github.com/linkedin/datahub/issues/3116
       throw new SkipException("ElasticSearchGraphService does not support empty destination type");
     }
-    super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
+    super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships,
+        expectedRelatedEntities);
   }
 
   @Test
@@ -177,23 +173,18 @@ public void testFindRelatedEntitiesNoRelationshipTypes() {
   }
 
   @Override
-  public void testRemoveEdgesFromNode(@Nonnull Urn nodeToRemoveFrom,
-                                      @Nonnull List<String> relationTypes,
-                                      @Nonnull RelationshipFilter relationshipFilter,
-                                      List<RelatedEntity> expectedOutgoingRelatedUrnsBeforeRemove,
-                                      List<RelatedEntity> expectedIncomingRelatedUrnsBeforeRemove,
-                                      List<RelatedEntity> expectedOutgoingRelatedUrnsAfterRemove,
-                                      List<RelatedEntity> expectedIncomingRelatedUrnsAfterRemove) throws Exception {
+  public void testRemoveEdgesFromNode(@Nonnull Urn nodeToRemoveFrom, @Nonnull List<String> relationTypes,
+      @Nonnull RelationshipFilter relationshipFilter, List<RelatedEntity> expectedOutgoingRelatedUrnsBeforeRemove,
+      List<RelatedEntity> expectedIncomingRelatedUrnsBeforeRemove,
+      List<RelatedEntity> expectedOutgoingRelatedUrnsAfterRemove,
+      List<RelatedEntity> expectedIncomingRelatedUrnsAfterRemove) throws Exception {
     if (relationshipFilter.getDirection() == RelationshipDirection.UNDIRECTED) {
       // https://github.com/linkedin/datahub/issues/3114
       throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
     }
-    super.testRemoveEdgesFromNode(
-            nodeToRemoveFrom,
-            relationTypes, relationshipFilter,
-            expectedOutgoingRelatedUrnsBeforeRemove, expectedIncomingRelatedUrnsBeforeRemove,
-            expectedOutgoingRelatedUrnsAfterRemove, expectedIncomingRelatedUrnsAfterRemove
-    );
+    super.testRemoveEdgesFromNode(nodeToRemoveFrom, relationTypes, relationshipFilter,
+        expectedOutgoingRelatedUrnsBeforeRemove, expectedIncomingRelatedUrnsBeforeRemove,
+        expectedOutgoingRelatedUrnsAfterRemove, expectedIncomingRelatedUrnsAfterRemove);
   }
 
   @Test
@@ -207,7 +198,8 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() {
   @Override
   public void testConcurrentAddEdge() {
     // https://github.com/linkedin/datahub/issues/3124
-    throw new SkipException("This test is flaky for ElasticSearchGraphService, ~5% of the runs fail on a race condition");
+    throw new SkipException(
+        "This test is flaky for ElasticSearchGraphService, ~5% of the runs fail on a race condition");
   }
 
   @Test
@@ -223,5 +215,4 @@ public void testConcurrentRemoveNodes() {
     // https://github.com/linkedin/datahub/issues/3118
     throw new SkipException("ElasticSearchGraphService produces duplicates");
   }
-
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java
index 7083b03580634..2d3fac8b608e1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.recommendation.candidatesource;
 
-import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.CorpuserUrn;
 import com.linkedin.common.urn.TestEntityUrn;
 import com.linkedin.common.urn.Urn;
@@ -86,19 +85,9 @@ public boolean isEligible(@Nonnull Urn userUrn, @Nonnull RecommendationRequestCo
     };
   }
 
-  @Test
-  public void testWhenNonEmptyCacheReturnsEmpty() {
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities()).thenReturn(Collections.emptyList());
-    List<RecommendationContent> candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT);
-    assertTrue(candidates.isEmpty());
-    Mockito.verifyZeroInteractions(_entitySearchService);
-    assertFalse(_valueBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
-  }
-
   @Test
   public void testWhenSearchServiceReturnsEmpty() {
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities()).thenReturn(ImmutableList.of("testEntity"));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testValue"), eq(null), anyInt()))
+    Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testValue"), eq(null), anyInt()))
         .thenReturn(Collections.emptyMap());
     List<RecommendationContent> candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT);
     assertTrue(candidates.isEmpty());
@@ -107,9 +96,8 @@ public void testWhenSearchServiceReturnsEmpty() {
 
   @Test
   public void testWhenSearchServiceReturnsValueResults() {
-    // One entity type, one result
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities()).thenReturn(ImmutableList.of("testEntity"));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testValue"), eq(null), anyInt()))
+    // One result
+    Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testValue"), eq(null), anyInt()))
         .thenReturn(ImmutableMap.of("value1", 1L));
     List<RecommendationContent> candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT);
     assertEquals(candidates.size(), 1);
@@ -127,8 +115,8 @@ public void testWhenSearchServiceReturnsValueResults() {
     assertEquals(params.getContentParams().getCount().longValue(), 1L);
     assertTrue(_valueBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
 
-    // One entity type, multiple result
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testValue"), eq(null), anyInt()))
+    // Multiple result
+    Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testValue"), eq(null), anyInt()))
         .thenReturn(ImmutableMap.of("value1", 1L, "value2", 2L, "value3", 3L));
     candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT);
     assertEquals(candidates.size(), 2);
@@ -157,54 +145,15 @@ public void testWhenSearchServiceReturnsValueResults() {
     assertNotNull(params.getContentParams());
     assertEquals(params.getContentParams().getCount().longValue(), 2L);
     assertTrue(_valueBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
-
-    // Multiple entity type, multiple result
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities())
-        .thenReturn(ImmutableList.of("testEntity", "testEntity2"));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testValue"), eq(null), anyInt()))
-        .thenReturn(ImmutableMap.of("value1", 1L, "value3", 3L));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity2"), eq("testValue"), eq(null), anyInt()))
-        .thenReturn(ImmutableMap.of("value1", 3L, "value2", 2L));
-    candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT);
-    assertEquals(candidates.size(), 2);
-    content = candidates.get(0);
-    assertEquals(content.getValue(), "value1");
-    assertNull(content.getEntity());
-    params = content.getParams();
-    assertNotNull(params);
-    assertNotNull(params.getSearchParams());
-    assertTrue(StringUtils.isEmpty(params.getSearchParams().getQuery()));
-    assertEquals(params.getSearchParams().getFilters().size(), 1);
-    assertEquals(params.getSearchParams().getFilters().get(0),
-        new Criterion().setField("testValue").setValue("value1"));
-    assertNotNull(params.getContentParams());
-    assertEquals(params.getContentParams().getCount().longValue(), 4L);
-    content = candidates.get(1);
-    assertEquals(content.getValue(), "value3");
-    assertNull(content.getEntity());
-    params = content.getParams();
-    assertNotNull(params);
-    assertNotNull(params.getSearchParams());
-    assertTrue(StringUtils.isEmpty(params.getSearchParams().getQuery()));
-    assertEquals(params.getSearchParams().getFilters().size(), 1);
-    assertEquals(params.getSearchParams().getFilters().get(0),
-        new Criterion().setField("testValue").setValue("value3"));
-    assertNotNull(params.getContentParams());
-    assertEquals(params.getContentParams().getCount().longValue(), 3L);
-    assertTrue(_valueBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
   }
 
   @Test
   public void testWhenSearchServiceReturnsUrnResults() {
-    // One entity type, one result
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities()).thenReturn(ImmutableList.of("testEntity"));
+    // One result
     Urn testUrn1 = new TestEntityUrn("testUrn1", "testUrn1", "testUrn1");
     Urn testUrn2 = new TestEntityUrn("testUrn2", "testUrn2", "testUrn2");
     Urn testUrn3 = new TestEntityUrn("testUrn3", "testUrn3", "testUrn3");
-//    Urn testUrn1 = new TestEntityUrn("testUrn1", TestEntityUtil.getTestEntityUrn().toString(), "VALUE_1");
-//    Urn testUrn2 = new TestEntityUrn("testUrn2", TestEntityUtil.getTestEntityUrn().toString(), "VALUE_1");
-//    Urn testUrn3 = new TestEntityUrn("testUrn3", TestEntityUtil.getTestEntityUrn().toString(), "VALUE_1");
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testUrn"), eq(null), anyInt()))
+    Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testUrn"), eq(null), anyInt()))
         .thenReturn(ImmutableMap.of(testUrn1.toString(), 1L));
     List<RecommendationContent> candidates = _urnBasedCandidateSource.getRecommendations(USER, CONTEXT);
     assertEquals(candidates.size(), 1);
@@ -222,8 +171,8 @@ public void testWhenSearchServiceReturnsUrnResults() {
     assertEquals(params.getContentParams().getCount().longValue(), 1L);
     assertTrue(_urnBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
 
-    // One entity type, multiple result
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testUrn"), eq(null), anyInt()))
+    // Multiple result
+    Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testUrn"), eq(null), anyInt()))
         .thenReturn(ImmutableMap.of(testUrn1.toString(), 1L, testUrn2.toString(), 2L, testUrn3.toString(), 3L));
     candidates = _urnBasedCandidateSource.getRecommendations(USER, CONTEXT);
     assertEquals(candidates.size(), 2);
@@ -252,40 +201,5 @@ public void testWhenSearchServiceReturnsUrnResults() {
     assertNotNull(params.getContentParams());
     assertEquals(params.getContentParams().getCount().longValue(), 2L);
     assertTrue(_urnBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
-
-    // Multiple entity type, multiple result
-    Mockito.when(_nonEmptyEntitiesCache.getNonEmptyEntities())
-        .thenReturn(ImmutableList.of("testEntity", "testEntity2"));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity"), eq("testUrn"), eq(null), anyInt()))
-        .thenReturn(ImmutableMap.of(testUrn1.toString(), 1L, testUrn3.toString(), 3L));
-    Mockito.when(_entitySearchService.aggregateByValue(eq("testEntity2"), eq("testUrn"), eq(null), anyInt()))
-        .thenReturn(ImmutableMap.of(testUrn1.toString(), 3L, testUrn2.toString(), 2L));
-    candidates = _urnBasedCandidateSource.getRecommendations(USER, CONTEXT);
-    assertEquals(candidates.size(), 2);
-    content = candidates.get(0);
-    assertEquals(content.getValue(), testUrn1.toString());
-    assertEquals(content.getEntity(), testUrn1);
-    params = content.getParams();
-    assertNotNull(params);
-    assertNotNull(params.getSearchParams());
-    assertTrue(StringUtils.isEmpty(params.getSearchParams().getQuery()));
-    assertEquals(params.getSearchParams().getFilters().size(), 1);
-    assertEquals(params.getSearchParams().getFilters().get(0),
-        new Criterion().setField("testUrn").setValue(testUrn1.toString()));
-    assertNotNull(params.getContentParams());
-    assertEquals(params.getContentParams().getCount().longValue(), 4L);
-    content = candidates.get(1);
-    assertEquals(content.getValue(), testUrn3.toString());
-    assertEquals(content.getEntity(), testUrn3);
-    params = content.getParams();
-    assertNotNull(params);
-    assertNotNull(params.getSearchParams());
-    assertTrue(StringUtils.isEmpty(params.getSearchParams().getQuery()));
-    assertEquals(params.getSearchParams().getFilters().size(), 1);
-    assertEquals(params.getSearchParams().getFilters().get(0),
-        new Criterion().setField("testUrn").setValue(testUrn3.toString()));
-    assertNotNull(params.getContentParams());
-    assertEquals(params.getContentParams().getCount().longValue(), 3L);
-    assertTrue(_urnBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent());
   }
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
index f9836273ce434..8d7a18313793a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
@@ -9,7 +9,8 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
 import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO;
 import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO;
@@ -88,11 +89,13 @@ private RestHighLevelClient buildRestClient() {
 
   @Nonnull
   private ElasticSearchService buildEntitySearchService() {
-    ESIndexBuilders indexBuilders =
-        new ESIndexBuilders(_entityRegistry, _searchClient, _indexConvention, _settingsBuilder);
+    EntityIndexBuilders indexBuilders =
+        new EntityIndexBuilders(ElasticSearchServiceTest.getIndexBuilder(_searchClient), _entityRegistry,
+            _indexConvention, _settingsBuilder);
     ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention);
     ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention);
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, 1, 1, 1, 1);
+    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention,
+        ElasticSearchServiceTest.getBulkProcessor(_searchClient));
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
index ed0fa2c60281b..f5813fd10cf02 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
@@ -9,10 +9,12 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.search.SearchResult;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
 import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO;
 import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO;
+import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
 import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
@@ -20,9 +22,13 @@
 import javax.annotation.Nonnull;
 import org.apache.http.HttpHost;
 import org.apache.http.impl.nio.reactor.IOReactorConfig;
+import org.elasticsearch.action.bulk.BackoffPolicy;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestClient;
 import org.elasticsearch.client.RestClientBuilder;
 import org.elasticsearch.client.RestHighLevelClient;
+import org.elasticsearch.common.unit.TimeValue;
 import org.testcontainers.elasticsearch.ElasticsearchContainer;
 import org.testcontainers.shaded.com.google.common.collect.ImmutableMap;
 import org.testng.annotations.AfterTest;
@@ -80,13 +86,28 @@ private RestHighLevelClient buildRestClient() {
     return new RestHighLevelClient(builder);
   }
 
+  public static BulkProcessor getBulkProcessor(RestHighLevelClient searchClient) {
+    return BulkProcessor.builder((request, bulkListener) -> {
+      searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener);
+    }, BulkListener.getInstance())
+        .setBulkActions(1)
+        .setFlushInterval(TimeValue.timeValueSeconds(1))
+        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(1000), 1))
+        .build();
+  }
+
+  public static ESIndexBuilder getIndexBuilder(RestHighLevelClient searchClient) {
+    return new ESIndexBuilder(searchClient, 1, 1);
+  }
+
   @Nonnull
   private ElasticSearchService buildService() {
-    ESIndexBuilders indexBuilders =
-        new ESIndexBuilders(_entityRegistry, _searchClient, _indexConvention, _settingsBuilder);
+    EntityIndexBuilders indexBuilders =
+        new EntityIndexBuilders(getIndexBuilder(_searchClient), _entityRegistry, _indexConvention, _settingsBuilder);
     ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention);
     ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention);
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, 1, 1, 1, 1);
+    ESWriteDAO writeDAO =
+        new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, getBulkProcessor(_searchClient));
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
index c9cbc83ebdbea..45c5b9183588f 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
@@ -22,9 +22,10 @@ public class SearchDocumentTransformerTest {
 
   @Test
   public void testTransform() throws IOException {
+    SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000);
     TestEntitySnapshot snapshot = TestEntityUtil.getSnapshot();
     EntitySpec testEntitySpec = TestEntitySpecBuilder.getSpec();
-    Optional<String> result = SearchDocumentTransformer.transformSnapshot(snapshot, testEntitySpec, false);
+    Optional<String> result = searchDocumentTransformer.transformSnapshot(snapshot, testEntitySpec, false);
     assertTrue(result.isPresent());
     ObjectNode parsedJson = (ObjectNode) OBJECT_MAPPER.readTree(result.get());
     assertEquals(parsedJson.get("urn").asText(), snapshot.getUrn().toString());
@@ -51,9 +52,10 @@ public void testTransform() throws IOException {
 
   @Test
   public void testTransformForDelete() throws IOException {
+    SearchDocumentTransformer searchDocumentTransformer = new SearchDocumentTransformer(1000);
     TestEntitySnapshot snapshot = TestEntityUtil.getSnapshot();
     EntitySpec testEntitySpec = TestEntitySpecBuilder.getSpec();
-    Optional<String> result = SearchDocumentTransformer.transformSnapshot(snapshot, testEntitySpec, true);
+    Optional<String> result = searchDocumentTransformer.transformSnapshot(snapshot, testEntitySpec, true);
     assertTrue(result.isPresent());
     ObjectNode parsedJson = (ObjectNode) OBJECT_MAPPER.readTree(result.get());
     assertEquals(parsedJson.get("urn").asText(), snapshot.getUrn().toString());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
index 82e6936b9fe48..ffe9b366dd170 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
@@ -2,6 +2,7 @@
 
 import com.linkedin.metadata.run.AspectRowSummary;
 import com.linkedin.metadata.run.IngestionRunSummary;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import com.linkedin.mxe.SystemMetadata;
@@ -21,7 +22,8 @@
 import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
 import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME;
-import static org.testng.Assert.*;
+import static org.testng.Assert.assertEquals;
+
 
 public class ElasticSearchSystemMetadataServiceTest {
 
@@ -65,8 +67,10 @@ private RestHighLevelClient buildRestClient() {
 
   @Nonnull
   private ElasticSearchSystemMetadataService buildService() {
-    ESSystemMetadataDAO dao = new ESSystemMetadataDAO(_searchClient, _indexConvention, 1, 1, 1, 1);
-    return new ElasticSearchSystemMetadataService(_searchClient, _indexConvention, dao);
+    ESSystemMetadataDAO dao = new ESSystemMetadataDAO(_searchClient, _indexConvention,
+        ElasticSearchServiceTest.getBulkProcessor(_searchClient));
+    return new ElasticSearchSystemMetadataService(_searchClient, _indexConvention, dao,
+        ElasticSearchServiceTest.getIndexBuilder(_searchClient));
   }
 
   @AfterTest
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
index 0813414d133f1..77d81c4f84a76 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
@@ -20,6 +20,7 @@
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.Criterion;
 import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
 import com.linkedin.metadata.search.utils.QueryUtils;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders;
 import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer;
@@ -51,8 +52,9 @@
 import org.testng.annotations.Test;
 
 import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
-import static com.linkedin.metadata.ElasticSearchTestUtils.*;
-import static org.testng.Assert.*;
+import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
 
 
 public class ElasticSearchTimeseriesAspectServiceTest {
@@ -114,8 +116,8 @@ private RestHighLevelClient buildRestClient() {
   @Nonnull
   private ElasticSearchTimeseriesAspectService buildService() {
     return new ElasticSearchTimeseriesAspectService(_searchClient, _indexConvention,
-        new TimeseriesAspectIndexBuilders(_entityRegistry, _searchClient, _indexConvention), _entityRegistry, 1, 1, 3,
-        1);
+        new TimeseriesAspectIndexBuilders(ElasticSearchServiceTest.getIndexBuilder(_searchClient), _entityRegistry,
+            _indexConvention), _entityRegistry, ElasticSearchServiceTest.getBulkProcessor(_searchClient));
   }
 
   @AfterTest
@@ -265,7 +267,8 @@ public void testGetAggregatedStatsLatestStatForDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -301,7 +304,8 @@ public void testGetAggregatedStatsLatestStrArrayDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -345,7 +349,8 @@ public void testGetAggregatedStatsLatestStatForTwoDays() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 47 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -384,7 +389,8 @@ public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 9 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -422,7 +428,8 @@ public void testGetAggregatedStatsLatestStatForCol1Day1() {
     Criterion hasCol1 =
         new Criterion().setField("componentProfiles.key").setCondition(Condition.EQUAL).setValue("col1");
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, hasCol1, startTimeCriterion, endTimeCriterion));
+    Filter filter = QueryUtils.getFilterFromCriteria(
+        ImmutableList.of(hasUrnCriterion, hasCol1, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -463,7 +470,8 @@ public void testGetAggregatedStatsLatestStatForAllColumnsDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(lastEntryTimeStamp));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec latestStatAggregationSpec =
@@ -508,7 +516,8 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 9 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate the sum of stat value
     AggregationSpec sumAggregationSpec =
@@ -548,7 +557,8 @@ public void testGetAggregatedStatsSumStatForCol2Day1() {
     Criterion hasCol2 =
         new Criterion().setField("componentProfiles.key").setCondition(Condition.EQUAL).setValue("col2");
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, hasCol2, startTimeCriterion, endTimeCriterion));
+    Filter filter = QueryUtils.getFilterFromCriteria(
+        ImmutableList.of(hasUrnCriterion, hasCol2, startTimeCriterion, endTimeCriterion));
 
     // Aggregate the sum of stat value
     AggregationSpec sumStatAggregationSpec =
@@ -591,7 +601,8 @@ public void testGetAggregatedStatsCardinalityAggStrStatDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec cardinalityStatAggregationSpec =
@@ -626,7 +637,8 @@ public void testGetAggregatedStatsSumStatsCollectionDay1() {
         .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
         .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
 
-    Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
+    Filter filter =
+        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 
     // Aggregate on latest stat value
     AggregationSpec cardinalityStatAggregationSpec =
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
index 9e1dcdad8c60e..26b4e37067643 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
@@ -8,6 +8,7 @@
 import com.linkedin.gms.factory.common.GraphServiceFactory;
 import com.linkedin.gms.factory.common.SystemMetadataServiceFactory;
 import com.linkedin.gms.factory.search.EntitySearchServiceFactory;
+import com.linkedin.gms.factory.search.SearchDocumentTransformerFactory;
 import com.linkedin.metadata.EventUtils;
 import com.linkedin.metadata.dao.utils.RecordUtils;
 import com.linkedin.metadata.extractor.AspectExtractor;
@@ -57,22 +58,25 @@
 @Slf4j
 @Component
 @Conditional(MetadataChangeLogProcessorCondition.class)
-@Import({GraphServiceFactory.class, EntitySearchServiceFactory.class, SystemMetadataServiceFactory.class})
+@Import({GraphServiceFactory.class, EntitySearchServiceFactory.class, SystemMetadataServiceFactory.class,
+    SearchDocumentTransformerFactory.class})
 @EnableKafka
 public class MetadataAuditEventsProcessor {
 
   private final GraphService _graphService;
   private final EntitySearchService _entitySearchService;
   private final SystemMetadataService _systemMetadataService;
+  private final SearchDocumentTransformer _searchDocumentTransformer;
 
   private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag"));
 
   @Autowired
   public MetadataAuditEventsProcessor(GraphService graphService, EntitySearchService entitySearchService,
-      SystemMetadataService systemMetadataService) {
+      SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer) {
     _graphService = graphService;
     _entitySearchService = entitySearchService;
     _systemMetadataService = systemMetadataService;
+    _searchDocumentTransformer = searchDocumentTransformer;
 
     _graphService.configure();
     _entitySearchService.configure();
@@ -176,7 +180,7 @@ private void updateSearchService(final RecordTemplate snapshot, final EntitySpec
     Optional<String> searchDocument;
 
     try {
-      searchDocument = SearchDocumentTransformer.transformSnapshot(snapshot, entitySpec, false);
+      searchDocument = _searchDocumentTransformer.transformSnapshot(snapshot, entitySpec, false);
     } catch (Exception e) {
       log.error("Error in getting documents from snapshot: {} for snapshot {}", e, snapshot);
       return;
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
index bff5bdcfdf838..bf5de456008de 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
@@ -11,6 +11,7 @@
 import com.linkedin.gms.factory.common.SystemMetadataServiceFactory;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.search.EntitySearchServiceFactory;
+import com.linkedin.gms.factory.search.SearchDocumentTransformerFactory;
 import com.linkedin.gms.factory.timeseries.TimeseriesAspectServiceFactory;
 import com.linkedin.metadata.EventUtils;
 import com.linkedin.metadata.extractor.FieldExtractor;
@@ -55,14 +56,15 @@
 import org.springframework.kafka.annotation.KafkaListener;
 import org.springframework.stereotype.Component;
 
-import static com.linkedin.metadata.search.utils.QueryUtils.*;
+import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter;
+import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter;
 
 
 @Slf4j
 @Component
 @Conditional(MetadataChangeLogProcessorCondition.class)
 @Import({GraphServiceFactory.class, EntitySearchServiceFactory.class, TimeseriesAspectServiceFactory.class,
-    EntityRegistryFactory.class, SystemMetadataServiceFactory.class})
+    EntityRegistryFactory.class, SystemMetadataServiceFactory.class, SearchDocumentTransformerFactory.class})
 @EnableKafka
 public class MetadataChangeLogProcessor {
 
@@ -71,17 +73,20 @@ public class MetadataChangeLogProcessor {
   private final TimeseriesAspectService _timeseriesAspectService;
   private final SystemMetadataService _systemMetadataService;
   private final EntityRegistry _entityRegistry;
+  private final SearchDocumentTransformer _searchDocumentTransformer;
 
   private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag"));
 
   @Autowired
   public MetadataChangeLogProcessor(GraphService graphService, EntitySearchService entitySearchService,
-      TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, EntityRegistry entityRegistry) {
+      TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService,
+      EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer) {
     _graphService = graphService;
     _entitySearchService = entitySearchService;
     _timeseriesAspectService = timeseriesAspectService;
     _systemMetadataService = systemMetadataService;
     _entityRegistry = entityRegistry;
+    _searchDocumentTransformer = searchDocumentTransformer;
 
     _timeseriesAspectService.configure();
   }
@@ -162,7 +167,8 @@ public void consume(final ConsumerRecord<String, GenericRecord> consumerRecord)
     }
   }
 
-  private Pair<List<Edge>, Set<String>> getEdgesAndRelationshipTypesFromAspect(Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) {
+  private Pair<List<Edge>, Set<String>> getEdgesAndRelationshipTypesFromAspect(Urn urn, AspectSpec aspectSpec,
+      RecordTemplate aspect) {
     final Set<String> relationshipTypesBeingAdded = new HashSet<>();
     final List<Edge> edgesToAdd = new ArrayList<>();
 
@@ -209,7 +215,7 @@ private void updateGraphService(Urn urn, AspectSpec aspectSpec, RecordTemplate a
   private void updateSearchService(String entityName, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) {
     Optional<String> searchDocument;
     try {
-      searchDocument = SearchDocumentTransformer.transformAspect(urn, aspect, aspectSpec, false);
+      searchDocument = _searchDocumentTransformer.transformAspect(urn, aspect, aspectSpec, false);
     } catch (Exception e) {
       log.error("Error in getting documents from aspect: {} for aspect {}", e, aspectSpec.getName());
       return;
@@ -270,11 +276,13 @@ private void deleteGraphData(Urn urn, AspectSpec aspectSpec, RecordTemplate aspe
     final Set<String> relationshipTypesBeingAdded = edgeAndRelationTypes.getSecond();
     if (relationshipTypesBeingAdded.size() > 0) {
       _graphService.removeEdgesFromNode(urn, new ArrayList<>(relationshipTypesBeingAdded),
-          createRelationshipFilter(new Filter().setOr(new ConjunctiveCriterionArray()), RelationshipDirection.OUTGOING));
+          createRelationshipFilter(new Filter().setOr(new ConjunctiveCriterionArray()),
+              RelationshipDirection.OUTGOING));
     }
   }
 
-  private void deleteSearchData(Urn urn, String entityName, AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) {
+  private void deleteSearchData(Urn urn, String entityName, AspectSpec aspectSpec, RecordTemplate aspect,
+      Boolean isKeyAspect) {
     String docId;
     try {
       docId = URLEncoder.encode(urn.toString(), "UTF-8");
@@ -290,7 +298,7 @@ private void deleteSearchData(Urn urn, String entityName, AspectSpec aspectSpec,
 
     Optional<String> searchDocument;
     try {
-      searchDocument = SearchDocumentTransformer.transformAspect(urn, aspect, aspectSpec, true);
+      searchDocument = _searchDocumentTransformer.transformAspect(urn, aspect, aspectSpec, true);
     } catch (Exception e) {
       log.error("Error in getting documents from aspect: {} for aspect {}", e, aspectSpec.getName());
       return;
diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl
index bcd859f8b5898..e10507b52534e 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl
@@ -9,7 +9,7 @@ record GlossaryTermAssociation {
   */
   @Searchable = {
     "fieldName": "glossaryTerms",
-    "fieldType": "URN_PARTIAL",
+    "fieldType": "URN",
     "addToFilters": true,
     "filterNameOverride": "Glossary Term"
   }
diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl
index 165879eba3998..b04eb0811ec98 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl
@@ -10,7 +10,7 @@ record TagAssociation {
   */
   @Searchable = {
     "fieldName": "tags",
-    "fieldType": "URN_PARTIAL",
+    "fieldType": "URN",
     "hasValuesFieldName": "hasTags",
     "addToFilters": true,
     "filterNameOverride": "Tag"
diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl
index e870fd1ba9cc0..9594281280e21 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl
@@ -28,7 +28,7 @@ record EditableSchemaFieldInfo {
   @Searchable = {
     "/tags/*/tag": {
       "fieldName": "editedFieldTags",
-      "fieldType": "URN_PARTIAL",
+      "fieldType": "URN",
       "boostScore": 0.5
     }
   }
@@ -40,7 +40,7 @@ record EditableSchemaFieldInfo {
    @Searchable = {
     "/terms/*/urn": {
       "fieldName": "editedFieldGlossaryTerms",
-      "fieldType": "URN_PARTIAL",
+      "fieldType": "URN",
       "boostScore": 0.5
     }
   }
diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
index ce8edb560a72f..34197152ac5d8 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
@@ -14,7 +14,7 @@ record SchemaField {
    */
   @Searchable = {
     "fieldName": "fieldPaths",
-    "fieldType": "TEXT_PARTIAL"
+    "fieldType": "TEXT"
   }
   fieldPath: SchemaFieldPath
 
@@ -59,7 +59,7 @@ record SchemaField {
   @Searchable = {
     "/tags/*/tag": {
       "fieldName": "fieldTags",
-      "fieldType": "URN_PARTIAL",
+      "fieldType": "URN",
       "boostScore": 0.5
     }
   }
@@ -71,7 +71,7 @@ record SchemaField {
    @Searchable = {
     "/terms/*/urn": {
       "fieldName": "fieldGlossaryTerms",
-      "fieldType": "URN_PARTIAL",
+      "fieldType": "URN",
       "boostScore": 0.5
     }
   }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
index 4362786e1f0b7..96ccf86387046 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
@@ -1,15 +1,13 @@
 package com.linkedin.gms.factory.common;
 
+import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
 import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
 import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
 import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
-import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
@@ -18,41 +16,19 @@
 
 @Configuration
 @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
-@Import({RestHighLevelClientFactory.class, IndexConventionFactory.class})
+@Import({BaseElasticSearchComponentsFactory.class})
 public class ElasticSearchGraphServiceFactory {
   @Autowired
-  @Qualifier("elasticSearchRestHighLevelClient")
-  private RestHighLevelClient searchClient;
-
-  @Autowired
-  @Qualifier(IndexConventionFactory.INDEX_CONVENTION_BEAN)
-  private IndexConvention indexConvention;
-
-  @Value("${elasticsearch.bulkProcessor.requestsLimit}")
-  private Integer bulkRequestsLimit;
-
-  @Value("${elasticsearch.bulkProcessor.flushPeriod}")
-  private Integer bulkFlushPeriod;
-
-  @Value("${elasticsearch.bulkProcessor.numRetries}")
-  private Integer numRetries;
-
-  @Value("${elasticsearch.bulkProcessor.retryInterval}")
-  private Long retryInterval;
+  @Qualifier("baseElasticSearchComponents")
+  private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
 
   @Bean(name = "elasticSearchGraphService")
   @Nonnull
   protected ElasticSearchGraphService getInstance() {
-    return new ElasticSearchGraphService(
-        searchClient,
-        indexConvention,
-        new ESGraphWriteDAO(
-            searchClient,
-            indexConvention,
-            bulkRequestsLimit,
-            bulkFlushPeriod,
-            numRetries,
-            retryInterval),
-        new ESGraphQueryDAO(searchClient, indexConvention));
+    return new ElasticSearchGraphService(components.getSearchClient(), components.getIndexConvention(),
+        new ESGraphWriteDAO(components.getSearchClient(), components.getIndexConvention(),
+            components.getBulkProcessor()),
+        new ESGraphQueryDAO(components.getSearchClient(), components.getIndexConvention()),
+        components.getIndexBuilder());
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
index 38d3b782b8558..25afaef5e8eb7 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
@@ -1,14 +1,12 @@
 package com.linkedin.gms.factory.common;
 
+import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
 import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.systemmetadata.ESSystemMetadataDAO;
 import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService;
-import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
@@ -17,41 +15,17 @@
 
 @Configuration
 @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
-@Import({RestHighLevelClientFactory.class, IndexConventionFactory.class})
+@Import({BaseElasticSearchComponentsFactory.class})
 public class ElasticSearchSystemMetadataServiceFactory {
   @Autowired
-  @Qualifier("elasticSearchRestHighLevelClient")
-  private RestHighLevelClient searchClient;
-
-  @Autowired
-  @Qualifier(IndexConventionFactory.INDEX_CONVENTION_BEAN)
-  private IndexConvention indexConvention;
-
-  @Value("${elasticsearch.bulkProcessor.requestsLimit}")
-  private Integer bulkRequestsLimit;
-
-  @Value("${elasticsearch.bulkProcessor.flushPeriod}")
-  private Integer bulkFlushPeriod;
-
-  @Value("${elasticsearch.bulkProcessor.numRetries}")
-  private Integer numRetries;
-
-  @Value("${elasticsearch.bulkProcessor.retryInterval}")
-  private Long retryInterval;
+  @Qualifier("baseElasticSearchComponents")
+  private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
 
   @Bean(name = "elasticSearchSystemMetadataService")
   @Nonnull
   protected ElasticSearchSystemMetadataService getInstance() {
-    return new ElasticSearchSystemMetadataService(
-        searchClient,
-        indexConvention,
-        new ESSystemMetadataDAO(
-            searchClient,
-            indexConvention,
-            bulkRequestsLimit,
-            bulkFlushPeriod,
-            numRetries,
-            retryInterval)
-    );
+    return new ElasticSearchSystemMetadataService(components.getSearchClient(), components.getIndexConvention(),
+        new ESSystemMetadataDAO(components.getSearchClient(), components.getIndexConvention(),
+            components.getBulkProcessor()), components.getIndexBuilder());
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
index 9efb9323d00fc..a2816830f33ce 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
@@ -19,7 +19,7 @@
 public class IndexConventionFactory {
   public static final String INDEX_CONVENTION_BEAN = "searchIndexConvention";
 
-  @Value("${elasticsearch.indexPrefix:}")
+  @Value("${elasticsearch.index.prefix:}")
   private String indexPrefix;
 
   @Bean(name = INDEX_CONVENTION_BEAN)
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
new file mode 100644
index 0000000000000..d083c8bec4e43
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
@@ -0,0 +1,56 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.gms.factory.common.IndexConventionFactory;
+import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import javax.annotation.Nonnull;
+import lombok.Value;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.PropertySource;
+
+
+/**
+ * Factory for components required for any services using elasticsearch
+ */
+@Configuration
+@Import({RestHighLevelClientFactory.class})
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class BaseElasticSearchComponentsFactory {
+  @Value
+  public static class BaseElasticSearchComponents {
+    RestHighLevelClient searchClient;
+    IndexConvention indexConvention;
+    BulkProcessor bulkProcessor;
+    ESIndexBuilder indexBuilder;
+  }
+
+  @Autowired
+  @Qualifier("elasticSearchRestHighLevelClient")
+  private RestHighLevelClient searchClient;
+
+  @Autowired
+  @Qualifier(IndexConventionFactory.INDEX_CONVENTION_BEAN)
+  private IndexConvention indexConvention;
+
+  @Autowired
+  @Qualifier("elasticSearchBulkProcessor")
+  private BulkProcessor bulkProcessor;
+
+  @Autowired
+  @Qualifier("elasticSearchIndexBuilder")
+  private ESIndexBuilder indexBuilder;
+
+  @Bean(name = "baseElasticSearchComponents")
+  @Nonnull
+  protected BaseElasticSearchComponents getInstance() {
+    return new BaseElasticSearchComponents(searchClient, indexConvention, bulkProcessor, indexBuilder);
+  }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
new file mode 100644
index 0000000000000..0a7877acce8cf
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
@@ -0,0 +1,52 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
+import javax.annotation.Nonnull;
+import org.elasticsearch.action.bulk.BackoffPolicy;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.elasticsearch.common.unit.TimeValue;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@Import({RestHighLevelClientFactory.class})
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class ElasticSearchBulkProcessorFactory {
+  @Autowired
+  @Qualifier("elasticSearchRestHighLevelClient")
+  private RestHighLevelClient searchClient;
+
+  @Value("${elasticsearch.bulkProcessor.requestsLimit}")
+  private Integer bulkRequestsLimit;
+
+  @Value("${elasticsearch.bulkProcessor.flushPeriod}")
+  private Integer bulkFlushPeriod;
+
+  @Value("${elasticsearch.bulkProcessor.numRetries}")
+  private Integer numRetries;
+
+  @Value("${elasticsearch.bulkProcessor.retryInterval}")
+  private Long retryInterval;
+
+  @Bean(name = "elasticSearchBulkProcessor")
+  @Nonnull
+  protected BulkProcessor getInstance() {
+    return BulkProcessor.builder((request, bulkListener) -> {
+      searchClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener);
+    }, BulkListener.getInstance())
+        .setBulkActions(bulkRequestsLimit)
+        .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod))
+        .setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(retryInterval), numRetries))
+        .build();
+  }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
new file mode 100644
index 0000000000000..46562cb54e338
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
@@ -0,0 +1,36 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import javax.annotation.Nonnull;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@Import({RestHighLevelClientFactory.class})
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class ElasticSearchIndexBuilderFactory {
+  @Autowired
+  @Qualifier("elasticSearchRestHighLevelClient")
+  private RestHighLevelClient searchClient;
+
+  @Value("${elasticsearch.index.numShards}")
+  private Integer numShards;
+
+  @Value("${elasticsearch.index.numReplicas}")
+  private Integer numReplicas;
+
+  @Bean(name = "elasticSearchIndexBuilder")
+  @Nonnull
+  protected ESIndexBuilder getInstance() {
+    return new ESIndexBuilder(searchClient, numShards, numReplicas);
+  }
+}
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
index 40c7717a3d0a4..551085fd7e363 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
@@ -1,22 +1,17 @@
 package com.linkedin.gms.factory.search;
 
-import com.linkedin.gms.factory.common.IndexConventionFactory;
-import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
 import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO;
 import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO;
 import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
-import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
@@ -25,16 +20,11 @@
 
 @Configuration
 @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
-@Import({RestHighLevelClientFactory.class, IndexConventionFactory.class, EntityRegistryFactory.class,
-    SettingsBuilderFactory.class})
+@Import({EntityRegistryFactory.class, SettingsBuilderFactory.class})
 public class ElasticSearchServiceFactory {
   @Autowired
-  @Qualifier("elasticSearchRestHighLevelClient")
-  private RestHighLevelClient searchClient;
-
-  @Autowired
-  @Qualifier(IndexConventionFactory.INDEX_CONVENTION_BEAN)
-  private IndexConvention indexConvention;
+  @Qualifier("baseElasticSearchComponents")
+  private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
 
   @Autowired
   @Qualifier("entityRegistry")
@@ -44,25 +34,16 @@ public class ElasticSearchServiceFactory {
   @Qualifier("settingsBuilder")
   private SettingsBuilder settingsBuilder;
 
-  @Value("${elasticsearch.bulkProcessor.requestsLimit}")
-  private Integer bulkRequestsLimit;
-
-  @Value("${elasticsearch.bulkProcessor.flushPeriod}")
-  private Integer bulkFlushPeriod;
-
-  @Value("${elasticsearch.bulkProcessor.numRetries}")
-  private Integer numRetries;
-
-  @Value("${elasticsearch.bulkProcessor.retryInterval}")
-  private Long retryInterval;
-
   @Bean(name = "elasticSearchService")
   @Nonnull
   protected ElasticSearchService getInstance() {
-    ESSearchDAO esSearchDAO = new ESSearchDAO(entityRegistry, searchClient, indexConvention);
-    return new ElasticSearchService(new ESIndexBuilders(entityRegistry, searchClient, indexConvention, settingsBuilder),
-        esSearchDAO, new ESBrowseDAO(entityRegistry, searchClient, indexConvention),
-        new ESWriteDAO(entityRegistry, searchClient, indexConvention, bulkRequestsLimit, bulkFlushPeriod, numRetries,
-            retryInterval));
+    ESSearchDAO esSearchDAO =
+        new ESSearchDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention());
+    return new ElasticSearchService(
+        new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(),
+            settingsBuilder), esSearchDAO,
+        new ESBrowseDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention()),
+        new ESWriteDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
+            components.getBulkProcessor()));
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java
new file mode 100644
index 0000000000000..f4ba72b4aadce
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java
@@ -0,0 +1,21 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class SearchDocumentTransformerFactory {
+  @Value("${elasticsearch.index.maxArrayLength}")
+  private int maxArrayLength;
+
+  @Bean("searchDocumentTransformer")
+  protected SearchDocumentTransformer getInstance() {
+    return new SearchDocumentTransformer(maxArrayLength);
+  }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
index 2f7ddc4af3bae..06d9cf951025e 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
@@ -1,18 +1,14 @@
 package com.linkedin.gms.factory.timeseries;
 
-import com.linkedin.gms.factory.common.IndexConventionFactory;
-import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
+import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
 import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders;
-import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
@@ -21,37 +17,21 @@
 
 @Configuration
 @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
-@Import({RestHighLevelClientFactory.class, IndexConventionFactory.class, EntityRegistryFactory.class})
+@Import({BaseElasticSearchComponentsFactory.class, EntityRegistryFactory.class})
 public class ElasticSearchTimeseriesAspectServiceFactory {
   @Autowired
-  @Qualifier("elasticSearchRestHighLevelClient")
-  private RestHighLevelClient searchClient;
-
-  @Autowired
-  @Qualifier(IndexConventionFactory.INDEX_CONVENTION_BEAN)
-  private IndexConvention indexConvention;
+  @Qualifier("baseElasticSearchComponents")
+  private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
 
   @Autowired
   @Qualifier("entityRegistry")
   private EntityRegistry entityRegistry;
 
-  @Value("${elasticsearch.bulkProcessor.requestsLimit}")
-  private Integer bulkRequestsLimit;
-
-  @Value("${elasticsearch.bulkProcessor.flushPeriod}")
-  private Integer bulkFlushPeriod;
-
-  @Value("${elasticsearch.bulkProcessor.numRetries}")
-  private Integer numRetries;
-
-  @Value("${elasticsearch.bulkProcessor.retryInterval}")
-  private Long retryInterval;
-
   @Bean(name = "elasticSearchTimeseriesAspectService")
   @Nonnull
   protected ElasticSearchTimeseriesAspectService getInstance() {
-    return new ElasticSearchTimeseriesAspectService(searchClient, indexConvention,
-        new TimeseriesAspectIndexBuilders(entityRegistry, searchClient, indexConvention), entityRegistry,
-        bulkRequestsLimit, bulkFlushPeriod, numRetries, retryInterval);
+    return new ElasticSearchTimeseriesAspectService(components.getSearchClient(), components.getIndexConvention(),
+        new TimeseriesAspectIndexBuilders(components.getIndexBuilder(), entityRegistry,
+            components.getIndexConvention()), entityRegistry, components.getBulkProcessor());
   }
 }
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml
index e41654fff276f..9aa08b31a47c5 100644
--- a/metadata-service/factories/src/main/resources/application.yml
+++ b/metadata-service/factories/src/main/resources/application.yml
@@ -83,7 +83,11 @@ elasticsearch:
     flushPeriod: ${ES_BULK_FLUSH_PERIOD:1}
     numRetries: ${ES_BULK_NUM_RETRIES:3}
     retryInterval: ${ES_BULK_RETRY_INTERVAL:1}
-  indexPrefix: ${INDEX_PREFIX:}
+  index:
+    prefix: ${INDEX_PREFIX:}
+    numShards: ${ELASTICSEARCH_NUM_SHARDS_PER_INDEX:1}
+    numReplicas: ${ELASTICSEARCH_NUM_REPLICAS_PER_INDEX:1}
+    maxArrayLength: ${SEARCH_DOCUMENT_MAX_ARRAY_LENGTH:1000}
 
 # TODO: Kafka topic convention
 kafka:
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
index d4457c6c06ad0..e294fe4ca3692 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
@@ -32,6 +32,7 @@
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.r2.RemoteInvocationException;
+import io.opentelemetry.extension.annotations.WithSpan;
 import java.time.Clock;
 import java.util.List;
 import java.util.Map;
@@ -183,6 +184,7 @@ public void batchUpdate(@Nonnull final Set<Entity> entities, @Nonnull final Auth
      * @throws RemoteInvocationException
      */
     @Nonnull
+    @WithSpan
     public SearchResult search(
         @Nonnull String entity,
         @Nonnull String input,

From a16c432a1bbdc30f607f06e07d4b0f1cb339a911 Mon Sep 17 00:00:00 2001
From: Swaroop Jagadish <67564030+swaroopjagadish@users.noreply.github.com>
Date: Sun, 5 Dec 2021 12:22:17 -0800
Subject: [PATCH 6/6] =?UTF-8?q?feat(metadata-model):=20adding=20metadata?=
 =?UTF-8?q?=20model=20doc=20generation=20and=20upload=E2=80=A6=20(#3667)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    |    3 +
 docs/modeling/metadata-model.md               |   11 +
 .../models/registry/config/Entity.java        |    1 +
 metadata-ingestion/build.gradle               |   13 +-
 metadata-ingestion/scripts/modeldocgen.py     |  582 +++++
 metadata-ingestion/scripts/modeldocgen.sh     |   31 +
 metadata-ingestion/scripts/modeldocupload.sh  |    4 +
 metadata-ingestion/setup.py                   |    1 +
 .../ingestion/extractor/schema_util.py        |   23 +-
 .../integration/hive/hive_mces_golden.json    |  140 +-
 .../trino/trino_hive_mces_golden.json         | 2189 +++++++++--------
 .../integration/trino/trino_mces_golden.json  |  990 ++++----
 .../com/linkedin/schema/SchemaField.pdl       |    5 +-
 .../src/main/resources/entity-registry.yml    |    3 +
 14 files changed, 2395 insertions(+), 1601 deletions(-)
 create mode 100644 metadata-ingestion/scripts/modeldocgen.py
 create mode 100755 metadata-ingestion/scripts/modeldocgen.sh
 create mode 100755 metadata-ingestion/scripts/modeldocupload.sh

diff --git a/.gitignore b/.gitignore
index 1b691ad0aaca4..631630d64c7fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,3 +40,6 @@ MANIFEST
 **/.DS_Store
 
 .vscode
+
+# Metadata Ingestion Generated
+metadata-ingestion/generated/**
diff --git a/docs/modeling/metadata-model.md b/docs/modeling/metadata-model.md
index 585b2378552e3..34649ef99d80a 100644
--- a/docs/modeling/metadata-model.md
+++ b/docs/modeling/metadata-model.md
@@ -39,6 +39,17 @@ For example, here are helpful links to the most popular entities in DataHub's me
 * Feature Table (a.k.a. MLFeatureTable): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Documentation?is_lineage_mode=false)
 * For the full list of entities in the metadata model, browse them [here](https://demo.datahubproject.io/browse/dataset/prod/datahub/entities)
 
+### Generating documentation for the Metadata Model
+
+The metadata model documentation can be generated and uploaded into a running DataHub instance using the following command below.
+
+```console
+./gradlew :metadata-ingestion:modelDocUpload
+```
+
+**_NOTE_**: This will upload the model documentation to the DataHub instance running at the environment variable `$DATAHUB_HOST` (http://localhost:8080 by default)
+
+It will also generate a few files under `metadata-ingestion/generated/docs` such as a dot file called `metadata_graph.dot` that you can use to visualize the relationships among the entities.
 
 ## Querying the Metadata Graph 
 
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java
index 0bd1582fb6349..075098c80d79b 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java
@@ -12,6 +12,7 @@
 @AllArgsConstructor
 public class Entity {
   String name;
+  String doc;
   String keyAspect;
   List<String> aspects;
 }
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 454a29c71248f..f809e89b00066 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -42,6 +42,18 @@ task installDev(type: Exec, dependsOn: [install]) {
   commandLine 'bash', '-x', '-c',
     "${venv_name}/bin/pip install -e .[dev] && touch ${venv_name}/.build_install_dev_sentinel"
 }
+
+task modelDocGen(type: Exec, dependsOn: [codegen, installDev]) {
+    inputs.files(project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc"))
+    outputs.dir('generated/docs')
+    commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/modeldocgen.sh"
+}
+
+task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) {
+    commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/modeldocupload.sh"
+}
+
+
 task lint(type: Exec, dependsOn: installDev) {
   /*
     The find/sed combo below is a temporary work-around for the following mypy issue with airflow 2.2.0:
@@ -79,7 +91,6 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
 
 def testFile = hasProperty('testFile') ? testFile : 'unknown'
 task testSingle(dependsOn: [installDevTest]) {
-  println "$testFile"
   doLast {
   if (testFile != 'unknown')  {
   exec {
diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py
new file mode 100644
index 0000000000000..3426ca5375976
--- /dev/null
+++ b/metadata-ingestion/scripts/modeldocgen.py
@@ -0,0 +1,582 @@
+import json
+import logging
+import unittest.mock
+from dataclasses import Field, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+import avro.schema
+import click
+
+from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
+from datahub.ingestion.api.sink import NoopWriteCallback
+from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
+from datahub.ingestion.sink.file import FileSink, FileSinkConfig
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
+from datahub.metadata.schema_classes import (
+    BrowsePathsClass,
+    ChangeTypeClass,
+    DatasetPropertiesClass,
+    DatasetSnapshotClass,
+    ForeignKeyConstraintClass,
+    GlobalTagsClass,
+    MetadataChangeEventClass,
+    OtherSchemaClass,
+    SchemaFieldDataTypeClass,
+    SchemaMetadataClass,
+    StringTypeClass,
+    SubTypesClass,
+    SystemMetadataClass,
+    TagAssociationClass,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def capitalize_first(something: str) -> str:
+    return something[0:1].upper() + something[1:]
+
+
+@dataclass
+class EntityDefinition:
+    name: str
+    keyAspect: str
+    aspects: List[str] = field(default_factory=list)
+    aspect_map: Optional[Dict[str, Any]] = None
+    relationship_map: Optional[Dict[str, str]] = None
+    doc: Optional[str] = None
+    # schema: Optional[avro.schema.Schema] = None
+    # logical_schema: Optional[avro.schema.Schema] = None
+
+    # @validator("name")
+    # def lower_everything(cls, v: str) -> str:
+    #    return v.lower()
+
+    @property
+    def display_name(self):
+        return capitalize_first(self.name)
+
+
+@dataclass
+class AspectDefinition:
+    name: str
+    EntityUrns: Optional[List[str]] = None
+    schema: Optional[avro.schema.Schema] = None
+    type: Optional[str] = None
+
+
+entity_registry: Dict[str, EntityDefinition] = {}
+
+
+def get_aspects_from_snapshot(
+    snapshot_schema: avro.schema.RecordSchema,
+) -> Dict[str, AspectDefinition]:
+    union_schema: avro.schema.UnionSchema = snapshot_schema.fields[1].type.items
+    aspect_map = {}
+    for aspect_schema in union_schema.schemas:
+        if "Aspect" in aspect_schema.props:
+            aspectDef = AspectDefinition(
+                schema=aspect_schema,
+                name=aspect_schema.props["Aspect"].get("name"),
+            )
+            aspect_map[aspectDef.name] = aspectDef
+
+    return aspect_map
+
+
+aspect_registry: Dict[str, AspectDefinition] = {}
+
+
+# Patch add_name method to NOT complain about duplicate names
+def add_name(self, name_attr, space_attr, new_schema):
+    to_add = avro.schema.Name(name_attr, space_attr, self.default_namespace)
+
+    if self.names:
+        self.names[to_add.fullname] = new_schema
+    return to_add
+
+
+def load_schema_file(schema_file: str) -> None:
+
+    with open(schema_file) as f:
+        raw_schema_text = f.read()
+
+    avro_schema = avro.schema.parse(raw_schema_text)
+
+    if (
+        isinstance(avro_schema, avro.schema.RecordSchema)
+        and "Aspect" in avro_schema.other_props
+    ):
+        # probably an aspect schema
+        record_schema: avro.schema.RecordSchema = avro_schema
+        aspect_def = record_schema.get_prop("Aspect")
+        try:
+            aspect_definition = AspectDefinition(**aspect_def)
+        except Exception as e:
+            import pdb
+
+            breakpoint()
+
+        aspect_definition.schema = record_schema
+        aspect_registry[aspect_definition.name] = aspect_definition
+    elif avro_schema.name == "MetadataChangeEvent":
+        # probably an MCE schema
+        field: Field = avro_schema.fields[1]
+        assert isinstance(field.type, avro.schema.UnionSchema)
+        for member_schema in field.type.schemas:
+            if "Entity" in member_schema.props:
+                entity_def = member_schema.props["Entity"]
+                entity_name = entity_def["name"]
+                entity_definition = entity_registry.get(
+                    entity_name, EntityDefinition(**entity_def)
+                )
+                entity_definition.aspect_map = get_aspects_from_snapshot(member_schema)
+                all_aspects = [a for a in entity_definition.aspect_map.keys()]
+                # in terms of order, we prefer the aspects from snapshot over the aspects from the config registry
+                # so we flip the aspect list here
+                for aspect_name in entity_definition.aspects:
+                    if aspect_name not in all_aspects:
+                        all_aspects.append(aspect_name)
+                entity_definition.aspects = all_aspects
+                entity_registry[entity_name] = entity_definition
+    else:
+        print(f"Ignoring schema {schema_file}")
+
+
+@dataclass
+class Relationship:
+    name: str
+    src: str
+    dst: str
+    doc: Optional[str] = None
+    id: Optional[str] = None
+
+
+@dataclass
+class RelationshipAdjacency:
+    self_loop: List[Relationship] = field(default_factory=list)
+    incoming: List[Relationship] = field(default_factory=list)
+    outgoing: List[Relationship] = field(default_factory=list)
+
+
+@dataclass
+class RelationshipGraph:
+    map: Dict[str, RelationshipAdjacency] = field(default_factory=dict)
+
+    def add_edge(
+        self, src: str, dst: str, label: str, reason: str, edge_id: Optional[str] = None
+    ) -> None:
+        relnship = Relationship(
+            label, src, dst, reason, id=edge_id or f"{src}:{label}:{dst}:{reason}"
+        )
+
+        if src == dst:
+            adjacency = self.map.get(src, RelationshipAdjacency())
+            for reln in adjacency.self_loop:
+                if relnship.id == reln.id:
+                    print(f"Skipping adding edge since ids match {reln.id}")
+                    return
+            adjacency.self_loop.append(relnship)
+            self.map[src] = adjacency
+        else:
+            adjacency = self.map.get(src, RelationshipAdjacency())
+            for reln in adjacency.outgoing:
+                if relnship.id == reln.id:
+                    logger.info(f"Skipping adding edge since ids match {reln.id}")
+                    return
+
+            adjacency.outgoing.append(relnship)
+            self.map[src] = adjacency
+
+            adjacency = self.map.get(dst, RelationshipAdjacency())
+            for reln in adjacency.incoming:
+                if relnship.id == reln.id:
+                    logger.info(f"Skipping adding edge since ids match {reln.id}")
+                    return
+
+            adjacency.incoming.append(relnship)
+            self.map[dst] = adjacency
+
+    def get_adjacency(self, node: str) -> RelationshipAdjacency:
+        return self.map.get(node, RelationshipAdjacency())
+
+
+def make_relnship_docs(relationships: List[Relationship], direction: str) -> str:
+    doc = ""
+    map: Dict[str, List[Relationship]] = {}
+    for relnship in relationships:
+        map[relnship.name] = map.get(relnship.name, [])
+        map[relnship.name].append(relnship)
+    for rel_name, relnships in map.items():
+        doc += f"\n- {rel_name}\n"
+        for relnship in relnships:
+            doc += f"\n   - {relnship.dst if direction == 'outgoing' else relnship.src}{relnship.doc or ''}"
+    return doc
+
+
+def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str:
+    entity_name = entity_display_name[0:1].lower() + entity_display_name[1:]
+    entity_def: Optional[EntityDefinition] = entity_registry.get(entity_name, None)
+    if entity_def:
+        import pdb
+
+        # breakpoint()
+        doc = entity_def.doc or f"This is the {entity_def.display_name} entity."
+        # create relationships section
+        relationships_section = f"\n## Relationships\n"
+        adjacency = graph.get_adjacency(entity_def.display_name)
+        if adjacency.self_loop:
+            relationships_section += f"\n### Self\nThese are the relationships to itself, stored in this entity's aspects"
+        for relnship in adjacency.self_loop:
+            relationships_section += f"\n- {relnship.name} ({relnship.doc[1:] if relnship.doc else ''})"
+
+        if adjacency.outgoing:
+            relationships_section += f"\n### Outgoing\nThese are the relationships stored in this entity's aspects"
+            relationships_section += make_relnship_docs(
+                adjacency.outgoing, direction="outgoing"
+            )
+
+        if adjacency.incoming:
+            relationships_section += f"\n### Incoming\nThese are the relationships stored in other entity's aspects"
+            relationships_section += make_relnship_docs(
+                adjacency.incoming, direction="incoming"
+            )
+
+        # create global metadata graph
+        global_graph_url = "https://github.com/linkedin/datahub/raw/master/docs/imgs/datahub-metadata-model.png"
+        global_graph_section = (
+            f"\n## [Global Metadata Model]({global_graph_url})"
+            + f"\n![Global Graph]({global_graph_url})"
+        )
+        return doc + relationships_section + global_graph_section
+    else:
+        raise Exception(f"Failed to find information for entity: {entity_name}")
+
+
+def generate_stitched_record(relnships_graph: RelationshipGraph) -> List[Any]:
+    def strip_types(field_path: str) -> str:
+        import re
+
+        final_path = field_path
+        final_path = re.sub(r"(\[type=[a-zA-Z]+\]\.)", "", final_path)
+        final_path = re.sub(r"^\[version=2.0\]\.", "", final_path)
+        return final_path
+
+    datasets: List[DatasetSnapshotClass] = []
+
+    for entity_name, entity_def in entity_registry.items():
+        entity_display_name = entity_def.display_name
+        entity_fields = []
+        for aspect_name in entity_def.aspects:
+            if aspect_name not in aspect_registry:
+                print(f"Did not find aspect name: {aspect_name} in aspect_registry")
+                continue
+            import pdb
+
+            # breakpoint()
+            # all aspects should have a schema
+            aspect_schema = aspect_registry[aspect_name].schema
+            assert aspect_schema
+            entity_fields.append(
+                {
+                    "type": aspect_schema.to_json(),
+                    "name": aspect_name,
+                }
+            )
+
+        if entity_fields:
+            names = avro.schema.Names()
+            field_objects = []
+            for f in entity_fields:
+                field = avro.schema.Field(
+                    type_=f["type"],
+                    name=f["name"],
+                    has_default=False,
+                )
+                field_objects.append(field)
+
+            with unittest.mock.patch("avro.schema.Names.add_name", add_name):
+                entity_avro_schema = avro.schema.RecordSchema(
+                    name=entity_name,
+                    namespace="datahub.metadata.model",
+                    names=names,
+                    fields=[],
+                )
+                entity_avro_schema.set_prop("fields", field_objects)
+            rawSchema = json.dumps(entity_avro_schema.to_json())
+            # always add the URN which is the primary key
+            urn_field = SchemaField(
+                fieldPath="urn",
+                type=SchemaFieldDataTypeClass(type=StringTypeClass()),
+                nativeDataType="string",
+                nullable=False,
+                isPartOfKey=True,
+                description=f"The primary identifier for the {entity_name} entity. See the {entity_def.keyAspect} field to understand the structure of this urn.",
+            )
+            schema_fields: List[SchemaField] = [urn_field] + avro_schema_to_mce_fields(
+                rawSchema
+            )
+            foreign_keys: List[ForeignKeyConstraintClass] = []
+            source_dataset_urn = make_dataset_urn(
+                platform=make_data_platform_urn("datahub"),
+                name=f"{entity_display_name}",
+            )
+            for f_field in schema_fields:
+                if f_field.jsonProps:
+                    import pdb
+                    #breakpoint()
+                    json_dict = json.loads(f_field.jsonProps)
+                    if "Aspect" in json_dict:
+                        aspect_info = json_dict["Aspect"]
+                        f_field.globalTags = f_field.globalTags or GlobalTagsClass(
+                            tags=[]
+                        )
+                        f_field.globalTags.tags.append(
+                            TagAssociationClass(tag="urn:li:tag:Aspect")
+                        )
+                        # if this is the key aspect, also add primary-key
+                        if entity_def.keyAspect == aspect_info.get("name"):
+                            f_field.isPartOfKey = True
+
+                        if "timeseries" == aspect_info.get("type", ""):
+                            # f_field.globalTags = f_field.globalTags or GlobalTagsClass(
+                            #    tags=[]
+                            # )
+                            f_field.globalTags.tags.append(
+                                TagAssociationClass(tag="urn:li:tag:Temporal")
+                            )
+                        import pdb
+
+                        # breakpoint()
+                    if "Searchable" in json_dict:
+                        f_field.globalTags = f_field.globalTags or GlobalTagsClass(
+                            tags=[]
+                        )
+                        f_field.globalTags.tags.append(
+                            TagAssociationClass(tag="urn:li:tag:Searchable")
+                        )
+                    if "Relationship" in json_dict:
+                        relationship_info = json_dict["Relationship"]
+                        # detect if we have relationship specified at leaf level or thru path specs
+                        if "entityTypes" not in relationship_info:
+                            # path spec
+                            assert (
+                                len(relationship_info.keys()) == 1
+                            ), "We should never have more than one path spec assigned to a relationship annotation"
+                            final_info = None
+                            for k, v in relationship_info.items():
+                                final_info = v
+                            relationship_info = final_info
+
+                        assert "entityTypes" in relationship_info
+
+                        entity_types: List[str] = relationship_info.get(
+                            "entityTypes", []
+                        )
+                        relnship_name = relationship_info.get("name", None)
+                        for entity_type in entity_types:
+                            destination_entity_name = capitalize_first(entity_type)
+
+                            foreign_dataset_urn = make_dataset_urn(
+                                platform=make_data_platform_urn("datahub"),
+                                name=destination_entity_name,
+                            )
+                            fkey = ForeignKeyConstraintClass(
+                                name=relnship_name,
+                                foreignDataset=foreign_dataset_urn,
+                                foreignFields=[
+                                    f"urn:li:schemaField:({foreign_dataset_urn}, urn)"
+                                ],
+                                sourceFields=[
+                                    f"urn:li:schemaField:({source_dataset_urn},{f_field.fieldPath})"
+                                ],
+                            )
+                            foreign_keys.append(fkey)
+                            relnships_graph.add_edge(
+                                entity_display_name,
+                                destination_entity_name,
+                                fkey.name,
+                                f" via `{strip_types(f_field.fieldPath)}`",
+                                edge_id=f"{entity_display_name}:{fkey.name}:{destination_entity_name}:{strip_types(f_field.fieldPath)}",
+                            )
+
+            schemaMetadata = SchemaMetadataClass(
+                schemaName=f"{entity_name}",
+                platform=make_data_platform_urn("datahub"),
+                platformSchema=OtherSchemaClass(rawSchema=rawSchema),
+                fields=schema_fields,
+                version=0,
+                hash="",
+                foreignKeys=foreign_keys if foreign_keys else None,
+            )
+
+            dataset = DatasetSnapshotClass(
+                urn=make_dataset_urn(
+                    platform=make_data_platform_urn("datahub"),
+                    name=f"{entity_display_name}",
+                ),
+                aspects=[
+                    schemaMetadata,
+                    GlobalTagsClass(
+                        tags=[TagAssociationClass(tag="urn:li:tag:Entity")]
+                    ),
+                    BrowsePathsClass([f"/prod/datahub/entities/{entity_display_name}"]),
+                ],
+            )
+            datasets.append(dataset)
+
+    events: List[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]] = []
+
+    for d in datasets:
+        entity_name = d.urn.split(":")[-1].split(",")[1]
+        d.aspects.append(
+            DatasetPropertiesClass(
+                description=make_entity_docs(entity_name, relnships_graph)
+            )
+        )
+
+        mce = MetadataChangeEventClass(
+            proposedSnapshot=d,
+            systemMetadata=SystemMetadataClass(runId="test-metamodel"),
+        )
+        events.append(mce)
+
+        mcp = MetadataChangeProposalWrapper(
+            entityType="dataset",
+            changeType=ChangeTypeClass.UPSERT,
+            entityUrn=d.urn,
+            aspectName="subTypes",
+            aspect=SubTypesClass(typeNames=["entity"]),
+        )
+        events.append(mcp)
+    return events
+
+
+from datahub.configuration.common import ConfigModel
+
+
+class EntityRegistry(ConfigModel):
+    entities: List[EntityDefinition]
+
+
+def load_registry_file(registry_file: str) -> Dict[str, EntityDefinition]:
+    import yaml
+
+    with open(registry_file, "r") as f:
+        registry = EntityRegistry.parse_obj(yaml.safe_load(f))
+        for entity_def in registry.entities:
+            entity_registry[entity_def.name] = entity_def
+
+    return entity_registry
+
+
+@click.command()
+@click.argument("schema_files", type=click.Path(exists=True), nargs=-1, required=True)
+@click.option("--server", type=str, required=False)
+@click.option("--file", type=str, required=False)
+@click.option("--dot", type=str, required=False)
+@click.option("--png", type=str, required=False)
+def generate(
+    schema_files: List[str],
+    server: Optional[str],
+    file: Optional[str],
+    dot: Optional[str],
+    png: Optional[str]
+) -> None:
+    logger.info(f"server = {server}")
+    logger.info(f"file = {file}")
+    logger.info(f"dot = {dot}")
+    logger.info(f"png = {png}")
+
+    for schema_file in schema_files:
+        if schema_file.endswith(".yml") or schema_file.endswith(".yaml"):
+            # registry file
+            load_registry_file(schema_file)
+        else:
+            # schema file
+            load_schema_file(schema_file)
+            
+    relationship_graph = RelationshipGraph()
+    events = generate_stitched_record(relationship_graph)
+
+    if file:
+        logger.info(f"Will write events to {file}")
+        Path(file).parent.mkdir(parents=True, exist_ok=True)
+        fileSink = FileSink(
+            PipelineContext(run_id="generated-metaModel"),
+            FileSinkConfig(filename=file),
+        )
+        for e in events:
+            fileSink.write_record_async(
+                RecordEnvelope(e, metadata={}), write_callback=NoopWriteCallback()
+            )
+        fileSink.close()
+        pipeline_config = {
+            "source": {
+                "type": "file",
+                "config": {"filename": file},
+            },
+            "sink": {
+                "type": "datahub-rest",
+                "config": {
+                    "server": "${DATAHUB_SERVER:-http://localhost:8080}",
+                    "token": "${DATAHUB_TOKEN:-}",
+                },
+            },
+            "run_id": "modeldoc-generated",
+        }
+        pipeline_file = Path(file).parent.absolute() / "pipeline.yml"
+        with open(pipeline_file, "w") as f:
+            json.dump(pipeline_config, f, indent=2)
+            logger.info(f"Wrote pipeline to {pipeline_file}")
+
+    if server:
+        logger.info(f"Will send events to {server}")
+        assert server.startswith("http://"), "server address must start with http://"
+        emitter = DatahubRestEmitter(gms_server=server)
+        emitter.test_connection()
+        for e in events:
+            emitter.emit(e)
+
+    if dot:
+        logger.info(f"Will write dot file to {dot}")
+
+        import pydot
+
+        graph = pydot.Dot("my_graph", graph_type="graph")
+        for node, adjacency in relationship_graph.map.items():
+            my_node = pydot.Node(
+                node,
+                label=node,
+                shape="box",
+            )
+            graph.add_node(my_node)
+            if adjacency.self_loop:
+                for relnship in adjacency.self_loop:
+                    graph.add_edge(
+                        pydot.Edge(
+                            src=relnship.src, dst=relnship.dst, label=relnship.name
+                        )
+                    )
+            if adjacency.outgoing:
+                for relnship in adjacency.outgoing:
+                    graph.add_edge(
+                        pydot.Edge(
+                            src=relnship.src, dst=relnship.dst, label=relnship.name
+                        )
+                    )
+        Path(dot).parent.mkdir(parents=True, exist_ok=True)
+        graph.write_raw(dot)
+        if png:
+            try:
+                graph.write_png(png)
+            except Exception as e:
+                logger.error("Failed to create png file. Do you have graphviz installed?")
+                raise e
+
+if __name__ == "__main__":
+    logger.setLevel("INFO")
+    generate()
diff --git a/metadata-ingestion/scripts/modeldocgen.sh b/metadata-ingestion/scripts/modeldocgen.sh
new file mode 100755
index 0000000000000..c55be8bbda0bf
--- /dev/null
+++ b/metadata-ingestion/scripts/modeldocgen.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+set -euo pipefail
+
+OUTDIR=./generated/docs
+
+# Note: this assumes that datahub has already been built with `./gradlew build`.
+DATAHUB_ROOT=..
+REGISTRY_ROOT="$DATAHUB_ROOT/metadata-models/src/main/resources"
+SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-models/src/mainGeneratedAvroSchema/avro/com/linkedin"
+FILES="$REGISTRY_ROOT/entity-registry.yml $SCHEMAS_ROOT/mxe/MetadataChangeEvent.avsc"
+# Since we depend on jq, check if jq is installed
+if ! which jq > /dev/null; then
+   echo "jq is not installed. Please install jq and rerun (https://stedolan.github.io/jq/)"
+   exit 1
+fi
+
+find $SCHEMAS_ROOT -name "*.avsc" | sort | while read file
+do
+# Add all other files that are aspects but not included in the above
+        if (jq '.Aspect' -e $file > /dev/null)
+        then
+            FILES="${FILES} ${file}"
+        fi
+        echo $FILES > /tmp/docgen_files.txt
+done
+
+FILES=$(cat /tmp/docgen_files.txt)
+
+rm -r $OUTDIR || true
+#echo $FILES
+python scripts/modeldocgen.py $FILES --dot generated/docs/metadata_graph.dot --file generated/docs/metadata_model_mces.json $@
diff --git a/metadata-ingestion/scripts/modeldocupload.sh b/metadata-ingestion/scripts/modeldocupload.sh
new file mode 100755
index 0000000000000..e04cf82c25fb4
--- /dev/null
+++ b/metadata-ingestion/scripts/modeldocupload.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -euo pipefail
+
+datahub ingest -c generated/docs/pipeline.yml
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 998900bd77707..00d7de96822e2 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -190,6 +190,7 @@ def get_long_description():
     "jsonpickle",
     "build",
     "twine",
+    "pydot",
     *list(
         dependency
         for plugin in [
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
index a32f15a812949..01bd0bf82cab2 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
@@ -1,3 +1,4 @@
+import json
 import logging
 from typing import Any, Callable, Dict, Generator, List, Optional, Union
 
@@ -20,6 +21,7 @@
     TimeTypeClass,
     UnionTypeClass,
 )
+from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass
 
 """A helper file for Avro schema -> MCE schema transformations"""
 
@@ -235,6 +237,7 @@ def emit(self) -> Generator[SchemaField, None, None]:
 
                 schema = self._schema
                 actual_schema = self._actual_schema
+
                 if isinstance(schema, avro.schema.Field):
                     # Field's schema is actually it's type.
                     schema = schema.type
@@ -259,8 +262,24 @@ def emit(self) -> Generator[SchemaField, None, None]:
                 native_data_type = actual_schema.props.get(
                     "native_data_type", native_data_type
                 )
+
+                field_path = self._converter._get_cur_field_path()
+                merged_props = {}
+                merged_props.update(self._schema.other_props)
+                merged_props.update(schema.other_props)
+
+                tags = None
+                if "deprecated" in merged_props:
+                    description = (
+                        f"<span style=\"color:red\">DEPRECATED: {self._schema.other_props['deprecated']}</span>\n"
+                        + description
+                    )
+                    tags = GlobalTagsClass(
+                        tags=[TagAssociationClass(tag="urn:li:tag:Deprecated")]
+                    )
+
                 field = SchemaField(
-                    fieldPath=self._converter._get_cur_field_path(),
+                    fieldPath=field_path,
                     # Populate it with the simple native type for now.
                     nativeDataType=native_data_type,
                     type=self._converter._get_column_type(
@@ -270,6 +289,8 @@ def emit(self) -> Generator[SchemaField, None, None]:
                     recursive=False,
                     nullable=self._converter._is_nullable(schema),
                     isPartOfKey=self._converter._is_key_schema,
+                    globalTags=tags,
+                    jsonProps=json.dumps(merged_props) if merged_props else None,
                 )
                 yield field
 
diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
index 52f83f5941304..e7719534e7b00 100644
--- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
@@ -10,7 +10,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Fri Oct 01 05:17:59 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:27 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore",
@@ -20,7 +20,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1633065479",
+                            "Table Parameters: transient_lastDdlTime": "1638726627",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -75,7 +75,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "bar",
@@ -91,7 +92,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -106,6 +108,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -120,7 +124,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Fri Oct 01 05:17:59 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:28 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test",
@@ -130,7 +134,7 @@
                             "Table Parameters: numRows": "1",
                             "Table Parameters: rawDataSize": "32",
                             "Table Parameters: totalSize": "33",
-                            "Table Parameters: transient_lastDdlTime": "1633065482",
+                            "Table Parameters: transient_lastDdlTime": "1638726632",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -185,7 +189,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
@@ -203,7 +208,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"array<struct<type:string,provider:array<int>>>\"}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
@@ -219,7 +225,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
@@ -237,7 +244,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"array<int>\"}"
                             }
                         ],
                         "primaryKeys": null,
@@ -252,6 +260,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -266,7 +276,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Tue Oct 12 10:51:00 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test",
@@ -276,7 +286,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1634035860",
+                            "Table Parameters: transient_lastDdlTime": "1638726633",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -331,7 +341,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=map].[type=string].recordid",
@@ -350,7 +361,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"map<int,string>\", \"key_type\": {\"type\": \"int\", \"native_data_type\": \"int\", \"_nullable\": true}, \"key_native_data_type\": \"int\"}"
                             }
                         ],
                         "primaryKeys": null,
@@ -365,6 +377,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -379,7 +393,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Mon Oct 11 12:54:49 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test",
@@ -389,7 +403,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1633956889",
+                            "Table Parameters: transient_lastDdlTime": "1638726633",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -444,7 +458,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
@@ -460,7 +475,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"struct<type:string,provider:struct<name:varchar(50),id:tinyint>>\"}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
@@ -476,7 +492,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider",
@@ -492,7 +509,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"struct<name:varchar(50),id:tinyint>\"}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name",
@@ -508,7 +526,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id",
@@ -524,7 +543,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}"
                             }
                         ],
                         "primaryKeys": null,
@@ -539,6 +559,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -553,7 +575,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Fri Oct 01 05:17:56 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:23 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes",
@@ -562,7 +584,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "5812",
-                            "Table Parameters: transient_lastDdlTime": "1633065477",
+                            "Table Parameters: transient_lastDdlTime": "1638726625",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -617,7 +639,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "bar",
@@ -633,7 +656,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -648,6 +672,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -662,7 +688,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Fri Oct 01 05:17:59 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:28 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test",
@@ -672,7 +698,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1633065479",
+                            "Table Parameters: transient_lastDdlTime": "1638726628",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -727,7 +753,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
@@ -743,7 +770,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"struct<type:string,provider:array<int>>\"}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
@@ -759,7 +787,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
@@ -777,7 +806,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"array<int>\"}"
                             }
                         ],
                         "primaryKeys": null,
@@ -792,6 +822,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -806,7 +838,7 @@
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Tue Oct 12 10:29:17 UTC 2021",
+                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test",
@@ -816,7 +848,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1634034557",
+                            "Table Parameters: transient_lastDdlTime": "1638726633",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -869,11 +901,12 @@
                                         }
                                     }
                                 },
-                                "nativeDataType": "uniontype<<int, double, array<string>, struct<a:int,b:string>>>",
+                                "nativeDataType": "union",
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=int].foo",
@@ -891,7 +924,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=double].foo",
@@ -909,7 +943,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=array].[type=string].foo",
@@ -923,11 +958,12 @@
                                         }
                                     }
                                 },
-                                "nativeDataType": "array<string>",
+                                "nativeDataType": "string",
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo",
@@ -941,11 +977,12 @@
                                         }
                                     }
                                 },
-                                "nativeDataType": "struct<a:int,b:string>",
+                                "nativeDataType": "struct0",
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo.[type=int].a",
@@ -961,7 +998,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo.[type=string].b",
@@ -977,7 +1015,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct1].foo",
@@ -991,11 +1030,12 @@
                                         }
                                     }
                                 },
-                                "nativeDataType": "struct<a:int,b:string>",
+                                "nativeDataType": "struct1",
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct1].foo.[type=int].c",
@@ -1011,7 +1051,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct1].foo.[type=double].d",
@@ -1027,7 +1068,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"double\", \"_nullable\": true}"
                             }
                         ],
                         "primaryKeys": null,
@@ -1042,6 +1084,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "hive-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 }
diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
index aeaba2a9da8e9..45f0da917fbe7 100644
--- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
@@ -1,1169 +1,1224 @@
 [
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "comment": "This table has array of structs",
-                                "another.comment": "This table has no partitions",
-                                "numfiles": "4",
-                                "numrows": "1",
-                                "rawdatasize": "32",
-                                "totalsize": "138",
-                                "transient_lastddltime": "1633434492"
-                            },
-                            "externalUrl": null,
-                            "description": "This table has array of structs",
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.array_struct_test",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "property_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": "id of property",
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "another.comment": "This table has no partitions",
+                            "comment": "This table has array of structs",
+                            "numfiles": "1",
+                            "numrows": "1",
+                            "rawdatasize": "32",
+                            "totalsize": "33",
+                            "transient_lastddltime": "1638688532"
+                        },
+                        "externalUrl": null,
+                        "description": "This table has array of structs",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.array_struct_test",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "property_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": "id of property",
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": "service types and providers",
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": "service types and providers",
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(INTEGER())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "ARRAY(INTEGER())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "numfiles": "0",
-                                "numrows": "0",
-                                "rawdatasize": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1634127353"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.map_test",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "keyvalue",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "numfiles": "0",
+                            "numrows": "0",
+                            "rawdatasize": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688536"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.map_test",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "keyvalue",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=map].[type=string].recordid",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=map].[type=string].recordid",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.MapType": {
-                                                "keyType": null,
-                                                "valueType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.MapType": {
+                                            "keyType": null,
+                                            "valueType": null
                                         }
-                                    },
-                                    "nativeDataType": "MAP(INTEGER(), VARCHAR())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "MAP(INTEGER(), VARCHAR())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"MAP(INTEGER(), VARCHAR())\", \"key_type\": {\"type\": \"int\", \"native_data_type\": \"INTEGER()\", \"_nullable\": true}, \"key_native_data_type\": \"INTEGER()\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "numfiles": "0",
-                                "numrows": "0",
-                                "rawdatasize": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1634127353"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.nested_struct_test",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "property_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "numfiles": "0",
+                            "numrows": "0",
+                            "rawdatasize": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688535"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.nested_struct_test",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "property_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR(length=50)\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "SMALLINT()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "SMALLINT()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "transient_lastddltime": "1633435441"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.pokes",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "foo",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "transient_lastddltime": "1638688524"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.pokes",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "foo",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "bar",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "bar",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "baz",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "baz",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "numfiles": "0",
-                                "numrows": "0",
-                                "rawdatasize": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1633434486"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.struct_test",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "property_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "numfiles": "0",
+                            "numrows": "0",
+                            "rawdatasize": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688528"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.struct_test",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "property_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(INTEGER())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "ARRAY(INTEGER())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "numfiles": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1633434491"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.struct_test_view_materialized",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "property_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "numfiles": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688535"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.struct_test_view_materialized",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "property_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(INTEGER())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "ARRAY(INTEGER())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "numfiles": "0",
-                                "numrows": "0",
-                                "rawdatasize": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1633434486"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1._test_table_underscore",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "foo",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "numfiles": "0",
+                            "numrows": "0",
+                            "rawdatasize": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688527"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1._test_table_underscore",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "foo",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "bar",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "bar",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
-                                "numfiles": "0",
-                                "numrows": "0",
-                                "rawdatasize": "0",
-                                "totalsize": "0",
-                                "transient_lastddltime": "1634127353"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.union_test",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}",
+                            "numfiles": "0",
+                            "numrows": "0",
+                            "rawdatasize": "0",
+                            "totalsize": "0",
+                            "transient_lastddltime": "1638688536"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.union_test",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].tag",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].tag",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "SMALLINT()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].field0",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "SMALLINT()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].field0",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=double].field1",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=double].field1",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "DOUBLE()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=array].[type=string].field2",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "DOUBLE()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"DOUBLE()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=array].[type=string].field2",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(VARCHAR())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ARRAY(VARCHAR())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(VARCHAR())\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "ROW([('a', INTEGER()), ('b', VARCHAR())])",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=int].a",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ROW([('a', INTEGER()), ('b', VARCHAR())])",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ROW([('a', INTEGER()), ('b', VARCHAR())])\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=int].a",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=string].b",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=string].b",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                            "customProperties": {
-                                "transient_lastddltime": "1633434492",
-                                "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"",
-                                "is_view": "True"
-                            },
-                            "externalUrl": null,
-                            "description": null,
-                            "uri": null,
-                            "tags": []
-                        }
-                    },
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "hivedb.db1.array_struct_test_view",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "property_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "transient_lastddltime": "1638688535",
+                            "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"",
+                            "is_view": "True"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "hivedb.db1.array_struct_test_view",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "property_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}"
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType": null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(INTEGER())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "ARRAY(INTEGER())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-hive-test",
-            "properties": null
+                }
+            ]
         }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-hive-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
     }
+}
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
index 9de96147ac5c4..2f5130fb996ce 100644
--- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
+++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
@@ -1,532 +1,558 @@
 [
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "library_catalog.librarydb.book",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "library_catalog.librarydb.book",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "name",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "name",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "author",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "author",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "publisher",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "publisher",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "tags",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "tags",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.RecordType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "JSON()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.RecordType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "[version=2.0].[type=struct].[type=array].[type=int].genre_ids",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "JSON()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "[version=2.0].[type=struct].[type=array].[type=int].genre_ids",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.ArrayType": {
-                                                "nestedType" : null
-                                            }
+                                        "com.linkedin.pegasus2avro.schema.ArrayType": {
+                                            "nestedType": null
                                         }
-                                    },
-                                    "nativeDataType": "ARRAY(INTEGER())",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                    }
+                                },
+                                "nativeDataType": "ARRAY(INTEGER())",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}"
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "library_catalog.librarydb.issue_history",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "book_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "library_catalog.librarydb.issue_history",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "book_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "member_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "member_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "issue_date",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "issue_date",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.DateType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "DATE()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "return_date",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "return_date",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.DateType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "DATE()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "library_catalog.librarydb.member",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "library_catalog.librarydb.member",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "name",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "name",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "proposedSnapshot": {
-            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-                "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)",
-                "aspects": [
-                    {
-                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                            "schemaName": "library_catalog.librarydb.book_in_circulation",
-                            "platform": "urn:li:dataPlatform:trino",
-                            "version": 0,
-                            "created": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "lastModified": {
-                                "time": 0,
-                                "actor": "urn:li:corpuser:unknown",
-                                "impersonator": null
-                            },
-                            "deleted": null,
-                            "dataset": null,
-                            "cluster": null,
-                            "hash": "",
-                            "platformSchema": {
-                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
-                                    "tableSchema": ""
-                                }
-                            },
-                            "fields": [
-                                {
-                                    "fieldPath": "id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "library_catalog.librarydb.book_in_circulation",
+                        "platform": "urn:li:dataPlatform:trino",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "name",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "name",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "author",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "author",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "publisher",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "publisher",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.StringType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "VARCHAR(length=50)",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "member_id",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "VARCHAR(length=50)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "member_id",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "INTEGER()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
                                 },
-                                {
-                                    "fieldPath": "issue_date",
-                                    "jsonPath": null,
-                                    "nullable": true,
-                                    "description": null,
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "issue_date",
+                                "jsonPath": null,
+                                "nullable": true,
+                                "description": null,
+                                "type": {
                                     "type": {
-                                        "type": {
-                                            "com.linkedin.pegasus2avro.schema.DateType": {}
-                                        }
-                                    },
-                                    "nativeDataType": "DATE()",
-                                    "recursive": false,
-                                    "globalTags": null,
-                                    "glossaryTerms": null,
-                                    "isPartOfKey": false
-                                }
-                            ],
-                            "primaryKeys": null,
-                            "foreignKeysSpecs": null,
-                            "foreignKeys": null
-                        }
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
                     }
-                ]
-            }
-        },
-        "proposedDelta": null,
-        "systemMetadata": {
-            "lastObserved": 1632398400000,
-            "runId": "trino-test",
-            "properties": null
+                }
+            ]
         }
     },
-    {
-        "auditHeader": null,
-        "entityType": "dataset",
-        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)",
-        "entityKeyAspect": null,
-        "changeType": "UPSERT",
-        "aspectName": "datasetProfile",
-        "aspect": {
-            "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
-            "contentType": "application/json"
-        },
-        "systemMetadata": null
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1632398400000,
+        "runId": "trino-test",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)",
+    "entityKeyAspect": null,
+    "changeType": "UPSERT",
+    "aspectName": "datasetProfile",
+    "aspect": {
+        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
+        "contentType": "application/json"
     },
-    {
-        "auditHeader": null,
-        "entityType": "dataset",
-        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)",
-        "entityKeyAspect": null,
-        "changeType": "UPSERT",
-        "aspectName": "datasetProfile",
-        "aspect": {
-            "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
-            "contentType": "application/json"
-        },
-        "systemMetadata": null
+    "systemMetadata": null
+},
+{
+    "auditHeader": null,
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)",
+    "entityKeyAspect": null,
+    "changeType": "UPSERT",
+    "aspectName": "datasetProfile",
+    "aspect": {
+        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
+        "contentType": "application/json"
     },
-    {
-        "auditHeader": null,
-        "entityType": "dataset",
-        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)",
-        "entityKeyAspect": null,
-        "changeType": "UPSERT",
-        "aspectName": "datasetProfile",
-        "aspect": {
-            "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
-            "contentType": "application/json"
-        },
-        "systemMetadata": null
+    "systemMetadata": null
+},
+{
+    "auditHeader": null,
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)",
+    "entityKeyAspect": null,
+    "changeType": "UPSERT",
+    "aspectName": "datasetProfile",
+    "aspect": {
+        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
+        "contentType": "application/json"
     },
-    {
-        "auditHeader": null,
-        "entityType": "dataset",
-        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)",
-        "entityKeyAspect": null,
-        "changeType": "UPSERT",
-        "aspectName": "datasetProfile",
-        "aspect": {
-            "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
-            "contentType": "application/json"
-        },
-        "systemMetadata": null
-    }
+    "systemMetadata": null
+},
+{
+    "auditHeader": null,
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)",
+    "entityKeyAspect": null,
+    "changeType": "UPSERT",
+    "aspectName": "datasetProfile",
+    "aspect": {
+        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": null
+}
 ]
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
index 34197152ac5d8..98a6fc3cff024 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl
@@ -5,12 +5,12 @@ import com.linkedin.common.GlobalTags
 import com.linkedin.common.GlossaryTerms
 
 /**
- * SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema
+ * SchemaField to describe metadata related to dataset schema.
  */
 record SchemaField {
 
   /**
-   * Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.
+   * Flattened name of the field. Field is computed from jsonPath field.
    */
   @Searchable = {
     "fieldName": "fieldPaths",
@@ -21,6 +21,7 @@ record SchemaField {
   /**
    * Flattened name of a field in JSON Path notation.
    */
+  @Deprecated
   jsonPath: optional string
 
   /**
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 2c2d1114d0cd4..f8c71cf6214b7 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -1,5 +1,6 @@
 entities:
   - name: dataset
+    doc: Datasets represent logical or physical data assets stored or represented in various data platforms. Tables, Views, Streams are all instances of datasets.
     keyAspect: datasetKey
     aspects:
       - viewProperties
@@ -7,10 +8,12 @@ entities:
       - datasetProfile
       - datasetUsageStatistics
   - name: dataHubPolicy
+    doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
     keyAspect: dataHubPolicyKey
     aspects:
       - dataHubPolicyInfo
   - name: corpuser
+    doc: CorpUser represents an identity of a person (or an account) in the enterprise.
     keyAspect: corpUserKey
     aspects:
       - corpUserInfo