From edb82ad91fba8a401c56b82bc4c2916a39a6a6dd Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 18:56:14 -0400
Subject: [PATCH 1/6] docs(ingest/bigquery): Add docs for breaking change:
 match_fully_qualified_names (#9094)

---
 docs/how/updating-datahub.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 3af3b2bdda215..7d8c25b06255a 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -11,11 +11,17 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
   by Looker and LookML source connectors.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
-- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+- #8943 - The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
 This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
 If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
 Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+- #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`.
+This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully
+qualified dataset name, i.e. `<project_name>.<dataset_name>`. If this is not the case, please
+update your pattern (e.g. prepend your old dataset pattern with `.*\.` which matches the project part), 
+or set `match_fully_qualified_names: false` in your recipe. However, note that
+setting this to `false` is deprecated and this flag will be removed entirely in a future release.
 
 ### Potential Downtime
 

From fe18532b29e35af1cd3007e6affc102042b1af61 Mon Sep 17 00:00:00 2001
From: skrydal <piotr.skrydalewicz@gmail.com>
Date: Wed, 25 Oct 2023 00:58:56 +0200
Subject: [PATCH 2/6] docs(update): Added info on breaking change for policies
 (#9093)

Co-authored-by: Pedro Silva <pedro@acryl.io>
---
 docs/how/updating-datahub.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 7d8c25b06255a..57193ea69f2be 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -16,6 +16,39 @@ This is currently enabled by default to preserve compatibility, but will be disa
 If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
 Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+- #8846 - Changed enum values in resource filters used by policies. `RESOURCE_TYPE` became `TYPE` and `RESOURCE_URN` became `URN`.
+Any existing policies using these filters (i.e. defined for particular `urns` or `types` such as `dataset`) need to be upgraded
+manually, for example by retrieving their respective `dataHubPolicyInfo` aspect and changing part using filter i.e.
+```yaml
+   "resources": {
+     "filter": {
+       "criteria": [
+         {
+           "field": "RESOURCE_TYPE",
+           "condition": "EQUALS",
+           "values": [
+             "dataset"
+           ]
+         }
+       ]
+     }
+```
+into
+```yaml
+   "resources": {
+     "filter": {
+       "criteria": [
+         {
+           "field": "TYPE",
+           "condition": "EQUALS",
+           "values": [
+             "dataset"
+           ]
+         }
+       ]
+     }
+```
+for example, using `datahub put` command. Policies can be also removed and re-created via UI.
 - #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`.
 This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully
 qualified dataset name, i.e. `<project_name>.<dataset_name>`. If this is not the case, please

From ca331f58bd24187f9f0ca317216837178e9f41fa Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 25 Oct 2023 09:39:57 +0900
Subject: [PATCH 3/6] docs: add luckyorange script to head (#9080)

---
 docs-website/docusaurus.config.js | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 68ea1ebffa6c9..259ef970d818e 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -13,6 +13,13 @@ module.exports = {
   projectName: "datahub", // Usually your repo name.
   staticDirectories: ["static", "genStatic"],
   stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"],
+  scripts: [
+    {
+      src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38",
+      async: true,
+      defer: true,
+    },
+  ],
   noIndex: isSaas,
   customFields: {
     isSaas: isSaas,

From 9a59c452bf36d750964f6d7f78df84a8c0c5eb66 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 25 Oct 2023 09:40:28 +0900
Subject: [PATCH 4/6] design: refactor docs navbar (#8975)

Co-authored-by: Jeff Merrick <jeff@wireform.io>
---
 docs-website/docusaurus.config.js             | 61 +++++++++----------
 docs-website/src/styles/global.scss           | 27 +++++---
 .../DocsVersionDropdownNavbarItem.js          |  4 ++
 .../src/theme/NavbarItem/styles.module.scss   |  8 +++
 4 files changed, 59 insertions(+), 41 deletions(-)
 create mode 100644 docs-website/src/theme/NavbarItem/styles.module.scss

diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 259ef970d818e..506e263933394 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -57,44 +57,41 @@ module.exports = {
           position: "right",
         },
         {
-          to: "https://demo.datahubproject.io/",
-          label: "Demo",
-          position: "right",
-        },
-        {
-          href: "https://blog.datahubproject.io/",
-          label: "Blog",
-          position: "right",
-        },
-        {
-          href: "https://feature-requests.datahubproject.io/roadmap",
-          label: "Roadmap",
+          type: "dropdown",
+          label: "Resources",
           position: "right",
+          items: [
+            {
+              href: "https://demo.datahubproject.io/",
+              label: "Demo",
+            },
+            {
+              href: "https://blog.datahubproject.io/",
+              label: "Blog",
+            },
+            {
+              href: "https://feature-requests.datahubproject.io/roadmap",
+              label: "Roadmap",
+            },
+            {
+              href: "https://slack.datahubproject.io",
+              label: "Slack",
+            },
+            {
+              href: "https://github.com/datahub-project/datahub",
+              label: "GitHub",
+            },
+            {
+              href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
+              label: "YouTube",
+            },
+          ],
         },
         {
           type: "docsVersionDropdown",
-          position: "right",
+          position: "left",
           dropdownActiveClassDisabled: true,
         },
-        {
-          href: "https://slack.datahubproject.io",
-          "aria-label": "Slack",
-          position: "right",
-          className: "item__icon item__slack",
-        },
-        {
-          href: "https://github.com/datahub-project/datahub",
-          "aria-label": "GitHub",
-          position: "right",
-          className: "item__icon item__github",
-        },
-
-        {
-          href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
-          "aria-label": "YouTube",
-          position: "right",
-          className: "item__icon item__youtube",
-        },
       ],
     },
     footer: {
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss
index 55a54876b41ac..16e3893ed08b7 100644
--- a/docs-website/src/styles/global.scss
+++ b/docs-website/src/styles/global.scss
@@ -144,20 +144,29 @@ div[class^="announcementBar"] {
 
 /** Navbar */
 
-@media only screen and (max-width: 1050px) {
-  .navbar__toggle {
-    display: inherit;
-  }
-  .navbar__item {
-    display: none;
-  }
-}
-
 .navbar {
   .navbar__logo {
     height: 3rem;
   }
+
+  .navbar__link {
+    align-items: center;
+    margin: 0 1rem 0;
+    padding: 0;
+    border-bottom: 2px solid transparent;
+  }
+
+  .dropdown > .navbar__link:after {
+    top: -1px;
+    border-width: 0.3em 0.3em 0;
+    margin-left: 0.4em;
+  }
+
+  .navbar__link--active {
+    border-bottom-color: var(--ifm-navbar-link-hover-color);
+  }
   .navbar__item {
+    padding: 0.25rem 0;
     svg[class*="iconExternalLink"] {
       display: none;
     }
diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
index cc04ab23d3cf3..661d64392e67f 100644
--- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
+++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
@@ -6,6 +6,9 @@ import { translate } from "@docusaurus/Translate";
 import { useLocation } from "@docusaurus/router";
 import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem";
 import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem";
+
+import styles from "./styles.module.scss";
+
 const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId);
 export default function DocsVersionDropdownNavbarItem({
   mobile,
@@ -60,6 +63,7 @@ export default function DocsVersionDropdownNavbarItem({
   return (
     <DropdownNavbarItem
       {...props}
+      className={styles.versionNavItem}
       mobile={mobile}
       label={dropdownLabel}
       to={false} // This component is Swizzled to disable the link here
diff --git a/docs-website/src/theme/NavbarItem/styles.module.scss b/docs-website/src/theme/NavbarItem/styles.module.scss
new file mode 100644
index 0000000000000..d533115a9c9f0
--- /dev/null
+++ b/docs-website/src/theme/NavbarItem/styles.module.scss
@@ -0,0 +1,8 @@
+.versionNavItem {
+  margin-left: 0 !important;
+  padding: 0.2em 1em !important;
+  display: block;
+  border-radius: var(--ifm-button-border-radius) !important;
+  color: var(--ifm-menu-color-active);
+  background: var(--ifm-menu-color-background-active);
+}

From 916235d31a7d914b51a44ee02374270e5a4ea1d3 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 24 Oct 2023 19:59:42 -0700
Subject: [PATCH 5/6] fix(ingest): update athena type mapping (#9061)

---
 .../src/datahub/ingestion/source/sql/athena.py      |  4 +++-
 .../src/datahub/ingestion/source/sql/sql_common.py  |  5 +----
 .../src/datahub/ingestion/source/sql/sql_types.py   |  5 ++---
 .../datahub/utilities/sqlalchemy_type_converter.py  | 13 ++++++++++---
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 06b9ad92677a2..75e8fe1d6f7a6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -37,7 +37,7 @@
     gen_database_key,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
-from datahub.metadata.schema_classes import RecordTypeClass
+from datahub.metadata.schema_classes import MapTypeClass, RecordTypeClass
 from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
 from datahub.utilities.sqlalchemy_type_converter import (
     MapType,
@@ -46,7 +46,9 @@
 
 logger = logging.getLogger(__name__)
 
+assert STRUCT, "required type modules are not available"
 register_custom_type(STRUCT, RecordTypeClass)
+register_custom_type(MapType, MapTypeClass)
 
 
 class CustomAthenaRestDialect(AthenaRestDialect):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index be03858ec3ef9..fad9b9e8018a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -80,7 +80,6 @@
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
-    MapTypeClass,
     SubTypesClass,
     TagAssociationClass,
     UpstreamClass,
@@ -90,7 +89,6 @@
 from datahub.utilities.lossy_collections import LossyList
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
-from datahub.utilities.sqlalchemy_type_converter import MapType
 
 if TYPE_CHECKING:
     from datahub.ingestion.source.ge_data_profiler import (
@@ -140,6 +138,7 @@ class SqlWorkUnit(MetadataWorkUnit):
 
 
 _field_type_mapping: Dict[Type[TypeEngine], Type] = {
+    # Note: to add dialect-specific types to this mapping, use the `register_custom_type` function.
     types.Integer: NumberTypeClass,
     types.Numeric: NumberTypeClass,
     types.Boolean: BooleanTypeClass,
@@ -156,8 +155,6 @@ class SqlWorkUnit(MetadataWorkUnit):
     types.DATETIME: TimeTypeClass,
     types.TIMESTAMP: TimeTypeClass,
     types.JSON: RecordTypeClass,
-    # additional type definitions that are used by the Athena source
-    MapType: MapTypeClass,  # type: ignore
     # Because the postgresql dialect is used internally by many other dialects,
     # we add some postgres types here. This is ok to do because the postgresql
     # dialect is built-in to sqlalchemy.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index ae47623188f42..3b4a7e1dc0287 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -7,7 +7,7 @@
     BytesType,
     DateType,
     EnumType,
-    MapType as MapTypeAvro,
+    MapType,
     NullType,
     NumberType,
     RecordType,
@@ -15,7 +15,6 @@
     TimeType,
     UnionType,
 )
-from datahub.utilities.sqlalchemy_type_converter import MapType
 
 # these can be obtained by running `select format_type(oid, null),* from pg_type;`
 # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
@@ -364,7 +363,7 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "time": TimeType,
     "timestamp": TimeType,
     "row": RecordType,
-    "map": MapTypeAvro,
+    "map": MapType,
     "array": ArrayType,
 }
 
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
index 1d5ec5dae3519..5d2fc6872c7bd 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
@@ -4,7 +4,6 @@
 from typing import Any, Dict, List, Optional, Type, Union
 
 from sqlalchemy import types
-from sqlalchemy_bigquery import STRUCT
 
 from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
@@ -12,6 +11,12 @@
 
 logger = logging.getLogger(__name__)
 
+try:
+    # This is used for both BigQuery and Athena.
+    from sqlalchemy_bigquery import STRUCT
+except ImportError:
+    STRUCT = None
+
 
 class MapType(types.TupleType):
     # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
@@ -42,7 +47,9 @@ def get_avro_type(
     ) -> Dict[str, Any]:
         """Determines the concrete AVRO schema type for a SQLalchemy-typed column"""
 
-        if type(column_type) in cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys():
+        if isinstance(
+            column_type, tuple(cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys())
+        ):
             return {
                 "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)],
                 "native_data_type": str(column_type),
@@ -88,7 +95,7 @@ def get_avro_type(
                 "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable),
                 "key_native_data_type": str(key_type),
             }
-        if isinstance(column_type, STRUCT):
+        if STRUCT and isinstance(column_type, STRUCT):
             fields = []
             for field_def in column_type._STRUCT_fields:
                 field_name, field_type = field_def

From 2d1584b12fe4a40a077457e618f0937132763586 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 23:08:24 -0400
Subject: [PATCH 6/6] feat(ingest/datahub-source): Allow ingesting aspects from
 the entitiesV2 API (#9089)

---
 .../ingestion/source/datahub/config.py        | 19 ++++++-
 .../source/datahub/datahub_api_reader.py      | 49 +++++++++++++++++++
 .../source/datahub/datahub_source.py          | 16 ++++++
 3 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
index 053d136305527..83958dc76754f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
@@ -1,3 +1,4 @@
+import os
 from typing import Optional
 
 from pydantic import Field, root_validator
@@ -67,9 +68,25 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
         ),
     )
 
+    pull_from_datahub_api: bool = Field(
+        default=False,
+        description="Use the DataHub API to fetch versioned aspects.",
+        hidden_from_docs=True,
+    )
+
+    max_workers: int = Field(
+        default=5 * (os.cpu_count() or 4),
+        description="Number of worker threads to use for datahub api ingestion.",
+        hidden_from_docs=True,
+    )
+
     @root_validator
     def check_ingesting_data(cls, values):
-        if not values.get("database_connection") and not values.get("kafka_connection"):
+        if (
+            not values.get("database_connection")
+            and not values.get("kafka_connection")
+            and not values.get("pull_from_datahub_api")
+        ):
             raise ValueError(
                 "Your current config will not ingest any data."
                 " Please specify at least one of `database_connection` or `kafka_connection`, ideally both."
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py
new file mode 100644
index 0000000000000..7ee36736723b2
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py
@@ -0,0 +1,49 @@
+import logging
+from concurrent import futures
+from typing import Dict, Iterable, List
+
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.graph.filters import RemovedStatusFilter
+from datahub.ingestion.source.datahub.config import DataHubSourceConfig
+from datahub.ingestion.source.datahub.report import DataHubSourceReport
+from datahub.metadata._schema_classes import _Aspect
+
+logger = logging.getLogger(__name__)
+
+# Should work for at least mysql, mariadb, postgres
+DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+
+
+class DataHubApiReader:
+    def __init__(
+        self,
+        config: DataHubSourceConfig,
+        report: DataHubSourceReport,
+        graph: DataHubGraph,
+    ):
+        self.config = config
+        self.report = report
+        self.graph = graph
+
+    def get_aspects(self) -> Iterable[MetadataChangeProposalWrapper]:
+        urns = self.graph.get_urns_by_filter(
+            status=RemovedStatusFilter.ALL,
+            batch_size=self.config.database_query_batch_size,
+        )
+        tasks: List[futures.Future[Iterable[MetadataChangeProposalWrapper]]] = []
+        with futures.ThreadPoolExecutor(
+            max_workers=self.config.max_workers
+        ) as executor:
+            for urn in urns:
+                tasks.append(executor.submit(self._get_aspects_for_urn, urn))
+            for task in futures.as_completed(tasks):
+                yield from task.result()
+
+    def _get_aspects_for_urn(self, urn: str) -> Iterable[MetadataChangeProposalWrapper]:
+        aspects: Dict[str, _Aspect] = self.graph.get_entity_semityped(urn)  # type: ignore
+        for aspect in aspects.values():
+            yield MetadataChangeProposalWrapper(
+                entityUrn=urn,
+                aspect=aspect,
+            )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
index 2368febe1ff57..a2f43b8cc62cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
@@ -15,6 +15,7 @@
 from datahub.ingestion.api.source_helpers import auto_workunit_reporter
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.datahub.config import DataHubSourceConfig
+from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader
 from datahub.ingestion.source.datahub.datahub_database_reader import (
     DataHubDatabaseReader,
 )
@@ -58,6 +59,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         logger.info(f"Ingesting DataHub metadata up until {self.report.stop_time}")
         state = self.stateful_ingestion_handler.get_last_run_state()
 
+        if self.config.pull_from_datahub_api:
+            yield from self._get_api_workunits()
+
         if self.config.database_connection is not None:
             yield from self._get_database_workunits(
                 from_createdon=state.database_createdon_datetime
@@ -139,6 +143,18 @@ def _get_kafka_workunits(
                     )
                 self._commit_progress(i)
 
+    def _get_api_workunits(self) -> Iterable[MetadataWorkUnit]:
+        if self.ctx.graph is None:
+            self.report.report_failure(
+                "datahub_api",
+                "Specify datahub_api on your ingestion recipe to ingest from the DataHub API",
+            )
+            return
+
+        reader = DataHubApiReader(self.config, self.report, self.ctx.graph)
+        for mcp in reader.get_aspects():
+            yield mcp.as_workunit()
+
     def _commit_progress(self, i: Optional[int] = None) -> None:
         """Commit progress to stateful storage, if there have been no errors.