-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat: CatalogManager refactor, decoupling sql processors, caches, and…
… internal sql backend tables (#220)
- Loading branch information
1 parent
9564b4c
commit e82d37c
Showing
42 changed files
with
1,546 additions
and
940 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,7 +103,7 @@ jobs: | |
git config --global user.name "octavia-squidington-iii" | ||
git config --global user.email "[email protected]" | ||
git add . | ||
git commit -m "Auto-fix lint and format issues" | ||
git commit -m "Auto-commit `poetry lock` changes" | ||
- name: Push changes to '(${{ steps.pr-info.outputs.repo }})' | ||
if: steps.git-diff.outputs.changes == 'true' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
"""Module for future CDK components. | ||
Components here are planned to move to the CDK. | ||
TODO!: Add GitHub link here before merging. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from airbyte._future_cdk.sql_processor import SqlProcessorBase | ||
|
||
|
||
__all__ = [ | ||
"SqlProcessorBase", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
"""Catalog provider implementation. | ||
A catalog provider wraps a configured catalog and configured streams. This class is responsible for | ||
providing information about the catalog and streams. A catalog provider can also be updated with new | ||
streams as they are discovered, providing a thin layer of abstraction over the configured catalog. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING, Any, final | ||
|
||
from airbyte import exceptions as exc | ||
|
||
|
||
if TYPE_CHECKING: | ||
from airbyte_protocol.models import ( | ||
ConfiguredAirbyteCatalog, | ||
ConfiguredAirbyteStream, | ||
) | ||
|
||
|
||
class CatalogProvider: | ||
"""A catalog provider wraps a configured catalog and configured streams. | ||
This class is responsible for providing information about the catalog and streams. | ||
Note: | ||
- The catalog provider is not responsible for managing the catalog or streams but it may | ||
be updated with new streams as they are discovered. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
configured_catalog: ConfiguredAirbyteCatalog, | ||
) -> None: | ||
"""Initialize the catalog manager with a catalog object reference. | ||
Since the catalog is passed by reference, the catalog manager may be updated with new | ||
streams as they are discovered. | ||
""" | ||
self._catalog: ConfiguredAirbyteCatalog = configured_catalog | ||
|
||
@property | ||
def configured_catalog(self) -> ConfiguredAirbyteCatalog: | ||
return self._catalog | ||
|
||
@property | ||
def stream_names(self) -> list[str]: | ||
return list({stream.stream.name for stream in self.configured_catalog.streams}) | ||
|
||
def get_configured_stream_info( | ||
self, | ||
stream_name: str, | ||
) -> ConfiguredAirbyteStream: | ||
"""Return the column definitions for the given stream.""" | ||
if not self.configured_catalog: | ||
raise exc.PyAirbyteInternalError( | ||
message="Cannot get stream JSON schema without a catalog.", | ||
) | ||
|
||
matching_streams: list[ConfiguredAirbyteStream] = [ | ||
stream | ||
for stream in self.configured_catalog.streams | ||
if stream.stream.name == stream_name | ||
] | ||
if not matching_streams: | ||
raise exc.AirbyteStreamNotFoundError( | ||
stream_name=stream_name, | ||
context={ | ||
"available_streams": [ | ||
stream.stream.name for stream in self.configured_catalog.streams | ||
], | ||
}, | ||
) | ||
|
||
if len(matching_streams) > 1: | ||
raise exc.PyAirbyteInternalError( | ||
message="Multiple streams found with same name.", | ||
context={ | ||
"stream_name": stream_name, | ||
}, | ||
) | ||
|
||
return matching_streams[0] | ||
|
||
@final | ||
def get_stream_json_schema( | ||
self, | ||
stream_name: str, | ||
) -> dict[str, Any]: | ||
"""Return the column definitions for the given stream.""" | ||
return self.get_configured_stream_info(stream_name).stream.json_schema | ||
|
||
def get_stream_properties( | ||
self, | ||
stream_name: str, | ||
) -> dict[str, dict]: | ||
"""Return the names of the top-level properties for the given stream.""" | ||
return self.get_stream_json_schema(stream_name)["properties"] |
Oops, something went wrong.