From 591979c88517a226007b89535f8dfa966eea349e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 15 Feb 2024 23:37:57 -0800 Subject: [PATCH 01/18] add pydoc workflows --- .github/workflows/pydoc_preview.yml | 33 +++++++++++++++++++++++++ .github/workflows/pydoc_publish.yml | 38 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 .github/workflows/pydoc_preview.yml create mode 100644 .github/workflows/pydoc_publish.yml diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml new file mode 100644 index 00000000..a66ec250 --- /dev/null +++ b/.github/workflows/pydoc_preview.yml @@ -0,0 +1,33 @@ +name: Generate Python Documentation + +on: + push: + branches: + - main + pull_request: {} + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v8 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pdoc3 + + - name: Generate documentation + run: | + pdoc --html --output-dir docs . diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml new file mode 100644 index 00000000..a7ceafc1 --- /dev/null +++ b/.github/workflows/pydoc_publish.yml @@ -0,0 +1,38 @@ +name: Generate Python Documentation + +on: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v8 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pdoc3 + + - name: Generate documentation + run: | + pdoc --html --output-dir docs . + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs From 2133a8afb71b0b87a2c4e41a3345776f6b543237 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 15 Feb 2024 23:41:58 -0800 Subject: [PATCH 02/18] fix deps install --- .github/workflows/pydoc_preview.yml | 4 +--- .github/workflows/pydoc_publish.yml | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml index a66ec250..be182ce4 100644 --- a/.github/workflows/pydoc_preview.yml +++ b/.github/workflows/pydoc_preview.yml @@ -24,9 +24,7 @@ jobs: cache: 'poetry' - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pdoc3 + run: poetry install - name: Generate documentation run: | diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index a7ceafc1..3923a8bb 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -23,9 +23,7 @@ jobs: cache: 'poetry' - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pdoc3 + run: poetry install - name: Generate documentation run: | From a972b9445888e54f305231ea6352144879e0635d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 09:58:53 -0800 Subject: [PATCH 03/18] update publish flow --- .github/workflows/pydoc_publish.yml | 38 +++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index 3923a8bb..f0537ff8 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -1,13 +1,31 @@ -name: Generate Python Documentation +name: Publish Documentation Site on: push: branches: - main + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + jobs: - build: + deploy: runs-on: ubuntu-latest + environment: + name: "github-pages" + url: ${{ steps.deployment.outputs.page_url }} steps: - name: Checkout code @@ -21,16 +39,22 @@ jobs: with: python-version: '3.10' cache: 'poetry' + - name: Setup Pages + uses: actions/configure-pages@v4 - name: Install dependencies run: poetry install - name: Generate documentation run: | - pdoc --html --output-dir docs . + poetry run generate-docs - - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@v3 + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./docs + # Upload entire repository + path: '.' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 From 8400476e39f5a351cb08293627f18d8e76c081f9 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:05:05 -0800 Subject: [PATCH 04/18] test on all pushes --- .github/workflows/pydoc_publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index f0537ff8..ffafeecf 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -2,8 +2,8 @@ name: Publish Documentation Site on: push: - branches: - - main + # branches: + # # - main # TODO: uncomment # Allows you to run this workflow manually from the Actions tab workflow_dispatch: From 2d034a1c5541949ef2476f4e49d25932fe84ac90 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:13:13 -0800 Subject: [PATCH 05/18] publish from generated path --- .github/workflows/pydoc_publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index ffafeecf..88602ae5 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -53,7 +53,7 @@ jobs: uses: actions/upload-pages-artifact@v3 with: # Upload entire repository - path: '.' + path: 'docs/generated' - name: Deploy to GitHub Pages id: deployment From 6e3c70fc07a8ebdcce29c616443468d585b6ada8 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:16:10 -0800 Subject: [PATCH 06/18] delete html.jinja2 override template --- docs/frame.html.jinja2 | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 docs/frame.html.jinja2 diff --git a/docs/frame.html.jinja2 b/docs/frame.html.jinja2 deleted file mode 100644 index 379ae376..00000000 --- a/docs/frame.html.jinja2 +++ /dev/null @@ -1,14 +0,0 @@ - -
- {% block module_contents %}{% endblock %} -
- -{% filter minify_css %} - {% block style %} - {# The same CSS files as in pdoc's default template, except for layout.css. - You may leave out Bootstrap Reboot, which corrects inconsistences across browsers - but may conflict with you website's stylesheet. #} - - - {% endblock %} -{% endfilter %} From ea4cfe4a036a9821688ea82c6ddbf3f8340701b1 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:23:42 -0800 Subject: [PATCH 07/18] delete generated content --- docs/generated/airbyte.html | 889 ------------------------ docs/generated/airbyte/caches.html | 992 --------------------------- docs/generated/airbyte/datasets.html | 258 ------- docs/generated/index.html | 7 - 4 files changed, 2146 deletions(-) delete mode 100644 docs/generated/airbyte.html delete mode 100644 docs/generated/airbyte/caches.html delete mode 100644 docs/generated/airbyte/datasets.html delete mode 100644 docs/generated/index.html diff --git a/docs/generated/airbyte.html b/docs/generated/airbyte.html deleted file mode 100644 index 5ad45f6e..00000000 --- a/docs/generated/airbyte.html +++ /dev/null @@ -1,889 +0,0 @@ - -
-
-
- - class - CachedDataset(airbyte.datasets._sql.SQLDataset): - - -
- - -

A dataset backed by a SQL table cache.

- -

Because this dataset includes all records from the underlying table, we also expose the -underlying table as a SQLAlchemy Table object.

-
- - -
-
- - CachedDataset(cache: 'SQLCacheBase', stream_name: str) - - -
- - - - -
-
-
-
@overrides
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - to_sql_table(self) -> 'Table': - - -
- - - - -
-
-
Inherited Members
-
-
airbyte.datasets._sql.SQLDataset
-
stream_name
-
with_filter
- -
-
-
-
-
-
- - class - DuckDBCache(airbyte.caches.duckdb.DuckDBCacheBase): - - -
- - -

A DuckDB implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

-
- - -
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
Inherited Members
-
-
airbyte.caches.base.SQLCacheBase
-
SQLCacheBase
-
type_converter_class
-
use_singleton_connection
-
config
-
file_writer
-
type_converter
-
get_sql_alchemy_url
-
database_name
-
get_sql_engine
-
get_sql_connection
-
get_sql_table_name
-
get_sql_table
-
streams
-
get_records
-
get_pandas_dataframe
-
get_state
-
register_source
- -
-
airbyte.caches.duckdb.DuckDBCacheBase
-
config_class
-
supports_merge_insert
-
get_telemetry_info
- -
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - DuckDBCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the DuckDB cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- db_path: pathlib.Path | str - - -
- - -

Normally db_path is a Path object.

- -

There are some cases, such as when connecting to MotherDuck, where it could be a string that -is not also a path, such as "md:" to connect the user's default MotherDuck DB.

-
- - -
-
-
- schema_name: str - - -
- - -

The name of the schema to write to. Defaults to "main".

-
- - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - def - get_available_connectors() -> list[str]: - - -
- - -

Return a list of all available connectors.

- -

Connectors will be returned in alphabetical order, with the standard prefix "source-".

-
- - -
-
-
- - def - get_source( name: str, config: dict[str, typing.Any] | None = None, *, version: str | None = None, pip_url: str | None = None, local_executable: pathlib.Path | str | None = None, install_if_missing: bool = True) -> Source: - - -
- - -

Get a connector by name and version.

- -

Args: - name: connector name - config: connector config - if not provided, you need to set it later via the set_config - method. - version: connector version - if not provided, the currently installed version will be used. - If no version is installed, the latest available version will be used. The version can - also be set to "latest" to force the use of the latest available version. - pip_url: connector pip URL - if not provided, the pip url will be inferred from the - connector name. - local_executable: If set, the connector will be assumed to already be installed and will be - executed using this path or executable name. Otherwise, the connector will be installed - automatically in a virtual environment. - install_if_missing: Whether to install the connector if it is not available locally. This - parameter is ignored when local_executable is set.

-
- - -
-
-
- - def - get_default_cache() -> DuckDBCache: - - -
- - -

Get a local cache for storing data, using the default database path.

- -

Cache files are stored in the .cache directory, relative to the current -working directory.

-
- - -
-
-
- - def - get_secret( secret_name: str, source: SecretSource | list[SecretSource] = <SecretSource.ANY: 4>, *, prompt: bool = True) -> str: - - -
- - -

Get a secret from the environment.

- -

The optional source argument of enum type SecretSource or list of SecretSource options. -If left blank, the source arg will be SecretSource.ANY. If source is set to a specific -source, then only that source will be checked. If a list of SecretSource entries is passed, -then the sources will be checked using the provided ordering.

- -

If prompt to True or if SecretSource.PROMPT is declared in the source arg, then the -user will be prompted to enter the secret if it is not found in any of the other sources.

-
- - -
-
-
- - def - new_local_cache( cache_name: str | None = None, cache_dir: str | pathlib.Path | None = None, *, cleanup: bool = True) -> DuckDBCache: - - -
- - -

Get a local cache for storing data, using a name string to seed the path.

- -

Args: - cache_name: Name to use for the cache. Defaults to None. - cache_dir: Root directory to store the cache in. Defaults to None. - cleanup: Whether to clean up temporary files. Defaults to True.

- -

Cache files are stored in the .cache directory, relative to the current -working directory.

-
- - -
-
-
- - class - ReadResult(collections.abc.Mapping[str, airbyte.datasets._sql.CachedDataset]): - - -
- - -

A Mapping is a generic container for associating key/value -pairs.

- -

This class provides concrete generic implementations of all -methods except for __getitem__, __iter__, and __len__.

-
- - -
-
- - ReadResult( processed_records: int, cache: airbyte.caches.base.SQLCacheBase, processed_streams: list[str]) - - -
- - - - -
-
-
- processed_records - - -
- - - - -
-
-
- - def - get_sql_engine(self) -> sqlalchemy.engine.base.Engine: - - -
- - - - -
-
-
- streams: collections.abc.Mapping[str, CachedDataset] - - -
- - - - -
-
-
- cache: airbyte.caches.base.SQLCacheBase - - -
- - - - -
-
-
Inherited Members
-
-
collections.abc.Mapping
-
get
-
keys
-
items
-
values
- -
-
-
-
-
-
- - class - SecretSource(enum.Enum): - - -
- - -

An enumeration.

-
- - -
-
- ENV = -<SecretSource.ENV: 1> - - -
- - - - -
-
-
- DOTENV = -<SecretSource.DOTENV: 2> - - -
- - - - -
-
-
- GOOGLE_COLAB = -<SecretSource.GOOGLE_COLAB: 3> - - -
- - - - -
-
-
- ANY = -<SecretSource.ANY: 4> - - -
- - - - -
-
-
- PROMPT = -<SecretSource.PROMPT: 5> - - -
- - - - -
-
-
Inherited Members
-
-
enum.Enum
-
name
-
value
- -
-
-
-
-
-
- - class - Source: - - -
- - -

A class representing a source that can be called.

-
- - -
-
- - Source( executor: airbyte._executor.Executor, name: str, config: dict[str, typing.Any] | None = None, streams: list[str] | None = None, *, validate: bool = False) - - -
- - -

Initialize the source.

- -

If config is provided, it will be validated against the spec if validate is True.

-
- - -
-
-
- executor - - -
- - - - -
-
-
- name - - -
- - - - -
-
-
- - def - set_streams(self, streams: list[str]) -> None: - - -
- - -

Deprecated. See select_streams().

-
- - -
-
-
- - def - select_all_streams(self) -> None: - - -
- - -

Select all streams.

- -

This is a more streamlined equivalent to:

- -
-

source.select_streams(source.get_available_streams()).

-
-
- - -
-
-
- - def - select_streams(self, streams: list[str]) -> None: - - -
- - -

Select the stream names that should be read from the connector.

- -

Currently, if this is not set, all streams will be read.

-
- - -
-
-
- - def - get_selected_streams(self) -> list[str]: - - -
- - -

Get the selected streams.

- -

If no streams are selected, return an empty list.

-
- - -
-
-
- - def - set_config(self, config: dict[str, typing.Any], *, validate: bool = False) -> None: - - -
- - -

Set the config for the connector.

- -

If validate is True, raise an exception if the config fails validation.

- -

If validate is False, validation will be deferred until check() or validate_config() -is called.

-
- - -
-
-
- - def - get_config(self) -> dict[str, typing.Any]: - - -
- - -

Get the config for the connector.

-
- - -
-
-
- - def - validate_config(self, config: dict[str, typing.Any] | None = None) -> None: - - -
- - -

Validate the config against the spec.

- -

If config is not provided, the already-set config will be validated.

-
- - -
-
-
- - def - get_available_streams(self) -> list[str]: - - -
- - -

Get the available streams from the spec.

-
- - -
-
-
- docs_url: str - - -
- - -

Get the URL to the connector's documentation.

-
- - -
-
-
- discovered_catalog: airbyte_protocol.models.airbyte_protocol.AirbyteCatalog - - -
- - -

Get the raw catalog for the given streams.

- -

If the catalog is not yet known, we call discover to get it.

-
- - -
-
-
- configured_catalog: airbyte_protocol.models.airbyte_protocol.ConfiguredAirbyteCatalog - - -
- - -

Get the configured catalog for the given streams.

- -

If the raw catalog is not yet known, we call discover to get it.

- -

If no specific streams are selected, we return a catalog that syncs all available streams.

- -

TODO: We should consider disabling by default the streams that the connector would -disable by default. (For instance, streams that require a premium license are sometimes -disabled by default within the connector.)

-
- - -
-
-
- - def - get_records(self, stream: str) -> airbyte.datasets._lazy.LazyDataset: - - -
- - -

Read a stream from the connector.

- -

This involves the following steps:

- -
    -
  • Call discover to get the catalog
  • -
  • Generate a configured catalog that syncs the given stream in full_refresh mode
  • -
  • Write the configured catalog and the config to a temporary file
  • -
  • execute the connector with read --config --catalog
  • -
  • Listen to the messages and return the first AirbyteRecordMessages that come along.
  • -
  • Make sure the subprocess is killed when the function returns.
  • -
-
- - -
-
-
- - def - check(self) -> None: - - -
- - -

Call check on the connector.

- -

This involves the following steps:

- -
    -
  • Write the config to a temporary file
  • -
  • execute the connector with check --config
  • -
  • Listen to the messages and return the first AirbyteCatalog that comes along.
  • -
  • Make sure the subprocess is killed when the function returns.
  • -
-
- - -
-
-
- - def - install(self) -> None: - - -
- - -

Install the connector if it is not yet installed.

-
- - -
-
-
- - def - uninstall(self) -> None: - - -
- - -

Uninstall the connector if it is installed.

- -

This only works if the use_local_install flag wasn't used and installation is managed by -PyAirbyte.

-
- - -
-
-
- - def - read( self, cache: airbyte.caches.base.SQLCacheBase | None = None, *, write_strategy: str | airbyte.strategies.WriteStrategy = <WriteStrategy.AUTO: 'auto'>, force_full_refresh: bool = False) -> ReadResult: - - -
- - -

Read from the connector and write to the cache.

- -

Args: - cache: The cache to write to. If None, a default cache will be used. - write_strategy: The strategy to use when writing to the cache. If a string, it must be - one of "append", "upsert", "replace", or "auto". If a WriteStrategy, it must be one - of WriteStrategy.APPEND, WriteStrategy.UPSERT, WriteStrategy.REPLACE, or - WriteStrategy.AUTO. - force_full_refresh: If True, the source will operate in full refresh mode. Otherwise, - streams will be read in incremental mode if supported by the connector. This option - must be True when using the "replace" strategy.

-
- - -
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/airbyte/caches.html b/docs/generated/airbyte/caches.html deleted file mode 100644 index 3d6c0be9..00000000 --- a/docs/generated/airbyte/caches.html +++ /dev/null @@ -1,992 +0,0 @@ - -
-
-
- - class - DuckDBCache(airbyte.caches.duckdb.DuckDBCacheBase): - - -
- - -

A DuckDB implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

-
- - -
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte.caches.duckdb.DuckDBCacheBase
-
config_class
-
supports_merge_insert
-
get_telemetry_info
- -
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - DuckDBCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the DuckDB cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- db_path: pathlib.Path | str - - -
- - -

Normally db_path is a Path object.

- -

There are some cases, such as when connecting to MotherDuck, where it could be a string that -is not also a path, such as "md:" to connect the user's default MotherDuck DB.

-
- - -
-
-
- schema_name: str - - -
- - -

The name of the schema to write to. Defaults to "main".

-
- - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - PostgresCache(airbyte.caches.SQLCacheBase): - - -
- - -

A Postgres implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

- -

TOOD: Add optimized bulk load path for Postgres. Could use an alternate file writer -or another import method. (Relatively low priority, since for now it works fine as-is.)

-
- - -
-
- config_class = -<class 'PostgresCacheConfig'> - - -
- - - - -
-
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
- supports_merge_insert = -False - - -
- - - - -
-
-
-
@overrides
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - PostgresCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the Postgres cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- host: str - - -
- - - - -
-
-
- port: int - - -
- - - - -
-
-
- username: str - - -
- - - - -
-
-
- password: str - - -
- - - - -
-
-
- database: str - - -
- - - - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
schema_name
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - SQLCacheBase(airbyte._processors.RecordProcessor): - - -
- - -

A base class to be used for SQL Caches.

- -

Optionally we can use a file cache to store the data in parquet files.

-
- - -
-
- type_converter_class: type[airbyte.types.SQLTypeConverter] = -<class 'airbyte.types.SQLTypeConverter'> - - -
- - - - -
-
-
- config_class: type[airbyte.caches.base.SQLCacheConfigBase] - - -
- - - - -
-
-
- file_writer_class: type[airbyte._file_writers.base.FileWriterBase] - - -
- - - - -
-
-
- supports_merge_insert = -False - - -
- - - - -
-
-
- use_singleton_connection = -False - - -
- - - - -
-
-
- config: airbyte.caches.base.SQLCacheConfigBase - - -
- - - - -
-
-
- file_writer - - -
- - - - -
-
-
- type_converter - - -
- - - - -
-
-
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- database_name: str - - -
- - -

Return the name of the database.

-
- - -
-
-
-
@final
- - def - get_sql_engine(self) -> sqlalchemy.engine.base.Engine: - - -
- - -

Return a new SQL engine to use.

-
- - -
-
-
-
@contextmanager
- - def - get_sql_connection( self) -> collections.abc.Generator[sqlalchemy.engine.base.Connection, None, None]: - - -
- - -

A context manager which returns a new SQL connection for running queries.

- -

If the connection needs to close, it will be closed automatically.

-
- - -
-
-
- - def - get_sql_table_name(self, stream_name: str) -> str: - - -
- - -

Return the name of the SQL table for the given stream.

-
- - -
-
-
-
@final
- - def - get_sql_table(self, stream_name: str) -> sqlalchemy.sql.schema.Table: - - -
- - -

Return the main table object for the stream.

-
- - -
-
-
- streams: dict[str, airbyte.datasets._sql.CachedDataset] - - -
- - -

Return a temporary table name.

-
- - -
-
-
- - def - get_records(self, stream_name: str) -> airbyte.datasets._sql.CachedDataset: - - -
- - -

Uses SQLAlchemy to select all rows from the table.

-
- - -
-
-
- - def - get_pandas_dataframe(self, stream_name: str) -> pandas.core.frame.DataFrame: - - -
- - -

Return a Pandas data frame with the stream's data.

-
- - -
-
-
- - def - get_state(self) -> list[dict]: - - -
- - -

Return the current state of the source.

-
- - -
-
-
-
@overrides
- - def - register_source( self, source_name: str, incoming_source_catalog: airbyte_protocol.models.airbyte_protocol.ConfiguredAirbyteCatalog, stream_names: set[str]) -> None: - - -
- - -

Register the source with the cache.

- -

We use stream_names to determine which streams will receive data, and -we only register the stream if is expected to receive data.

- -

This method is called by the source when it is initialized.

-
- - -
-
-
-
@abc.abstractmethod
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - SnowflakeCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the Snowflake cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- account: str - - -
- - - - -
-
-
- username: str - - -
- - - - -
-
-
- password: str - - -
- - - - -
-
-
- warehouse: str - - -
- - - - -
-
-
- database: str - - -
- - - - -
-
-
- role: str - - -
- - - - -
-
-
- dedupe_mode - - -
- - - - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
schema_name
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - SnowflakeSQLCache(airbyte.caches.SQLCacheBase): - - -
- - -

A Snowflake implementation of the cache.

- -

Parquet is used for local file storage before bulk loading.

-
- - -
-
- config_class = -<class 'SnowflakeCacheConfig'> - - -
- - - - -
-
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
- type_converter_class = -<class 'airbyte.caches.snowflake.SnowflakeTypeConverter'> - - -
- - - - -
-
-
-
@overrides
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/airbyte/datasets.html b/docs/generated/airbyte/datasets.html deleted file mode 100644 index 4b3fb249..00000000 --- a/docs/generated/airbyte/datasets.html +++ /dev/null @@ -1,258 +0,0 @@ - -
-
-
- - class - CachedDataset(airbyte.datasets.SQLDataset): - - -
- - -

A dataset backed by a SQL table cache.

- -

Because this dataset includes all records from the underlying table, we also expose the -underlying table as a SQLAlchemy Table object.

-
- - -
-
- - CachedDataset(cache: 'SQLCacheBase', stream_name: str) - - -
- - - - -
-
-
-
@overrides
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - to_sql_table(self) -> 'Table': - - -
- - - - -
-
-
Inherited Members
-
- -
-
-
-
-
- - class - DatasetBase(abc.ABC): - - -
- - -

Base implementation for all datasets.

-
- - -
-
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
-
- - class - DatasetMap(collections.abc.Mapping): - - -
- - -

A generic interface for a set of streams or datasets.

-
- - -
-
Inherited Members
-
-
collections.abc.Mapping
-
get
-
keys
-
items
-
values
- -
-
-
-
-
-
- - class - LazyDataset(airbyte.datasets.DatasetBase): - - -
- - -

A dataset that is loaded incrementally from a source or a SQL query.

-
- - -
-
- - LazyDataset( iterator: collections.abc.Iterator[collections.abc.Mapping[str, typing.Any]]) - - -
- - - - -
-
-
Inherited Members
-
- -
-
-
-
-
- - class - SQLDataset(airbyte.datasets.DatasetBase): - - -
- - -

A dataset that is loaded incrementally from a SQL query.

- -

The CachedDataset class is a subclass of this class, which simply passes a SELECT over the full -table as the query statement.

-
- - -
-
- - SQLDataset( cache: 'SQLCacheBase', stream_name: str, query_statement: 'Selectable') - - -
- - - - -
-
-
- stream_name: str - - -
- - - - -
-
-
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - with_filter( self, *filter_expressions: 'ClauseElement | str') -> SQLDataset: - - -
- - -

Filter the dataset by a set of column values.

- -

Filters can be specified as either a string or a SQLAlchemy expression.

- -

Filters are lazily applied to the dataset, so they can be chained together. For example:

- -
    dataset.with_filter("id > 5").with_filter("id < 10")
-
- -

is equivalent to:

- -
    dataset.with_filter("id > 5", "id < 10")
-
-
- - -
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/index.html b/docs/generated/index.html deleted file mode 100644 index 6893f472..00000000 --- a/docs/generated/index.html +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - From f800e81dc888e1bc718def531a3b18f4e2ceead8 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:26:11 -0800 Subject: [PATCH 08/18] update docs script --- docs.py | 31 ----------------- docs/.gitignore | 1 + docs/generate.py | 44 ++++++++++++++++++++++++ pyproject.toml | 2 +- tests/docs_tests/test_docs_checked_in.py | 4 +-- 5 files changed, 48 insertions(+), 34 deletions(-) delete mode 100644 docs.py create mode 100644 docs/.gitignore create mode 100644 docs/generate.py diff --git a/docs.py b/docs.py deleted file mode 100644 index 63947eca..00000000 --- a/docs.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from __future__ import annotations - -import os -import pathlib -import shutil - -import pdoc - - -def run() -> None: - """Generate docs for all public modules in airbyte and save them to docs/generated. - - Public modules are: - * The main airbyte module - * All directory modules in airbyte that don't start with an underscore. - """ - public_modules = ["airbyte"] - - # recursively delete the docs/generated folder if it exists - if pathlib.Path("docs/generated").exists(): - shutil.rmtree("docs/generated") - - # All folders in `airbyte` that don't start with "_" are treated as public modules. - for d in os.listdir("airbyte"): - dir_path = pathlib.Path(f"airbyte/{d}") - if dir_path.is_dir() and not d.startswith("_") and (dir_path / "__init__.py").exists(): - public_modules.append(dir_path) - - pdoc.render.configure(template_directory="docs", show_source=False, search=False) - pdoc.pdoc(*public_modules, output_directory=pathlib.Path("docs/generated")) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..86d4c2dd --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +generated diff --git a/docs/generate.py b/docs/generate.py new file mode 100644 index 00000000..2cf8d868 --- /dev/null +++ b/docs/generate.py @@ -0,0 +1,44 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations + +import os +import pathlib +import shutil + +import pdoc + +import airbyte as ab + +import typing + +typing.TYPE_CHECKING = True + + +def run() -> None: + """Generate docs for all public modules in airbyte_lib and save them to docs/generated. + + Public modules are: + * The main airbyte_lib module + * All directory modules in airbyte_lib that don't start with an underscore. + """ + public_modules = ["airbyte"] + + # recursively delete the docs/generated folder if it exists + if pathlib.Path("docs/generated").exists(): + shutil.rmtree("docs/generated") + + # All files and folders in `airbyte_lib` that don't start with "_" are treated as public. + for submodule in os.listdir("airbyte"): + submodule_path = pathlib.Path(f"airbyte/{submodule}") + if not submodule.startswith("_"): + public_modules.append(submodule_path) + + pdoc.render.configure( + template_directory="docs", + show_source=False, + search=False, + ) + pdoc.pdoc( + *public_modules, + output_directory=pathlib.Path("docs/generated"), + ) diff --git a/pyproject.toml b/pyproject.toml index f8defa35..1a6fca73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -247,7 +247,7 @@ module = [ ignore_missing_imports = true # No stubs yet (😢) [tool.poetry.scripts] -generate-docs = "docs:run" +generate-docs = "docs.generate:run" airbyte-lib-validate-source = "airbyte.validate:run" [tool.poe.tasks] diff --git a/tests/docs_tests/test_docs_checked_in.py b/tests/docs_tests/test_docs_checked_in.py index 54614c7c..86ecfe7c 100644 --- a/tests/docs_tests/test_docs_checked_in.py +++ b/tests/docs_tests/test_docs_checked_in.py @@ -2,7 +2,7 @@ import os -import docs +import docs.generate as generate def test_docs_checked_in(): @@ -13,7 +13,7 @@ def test_docs_checked_in(): It will fail if there are any differences. """ - docs.run() + generate.run() # compare the generated docs with the checked in docs diff = os.system("git diff --exit-code docs/generated") From 386a892e22c7353e76826747534f3e142c2031aa Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:28:35 -0800 Subject: [PATCH 09/18] add contributing page --- CONTRIBUTING.md | 35 +++++++++++++++++++++++++++++++++++ README.md | 45 +++++++++++++-------------------------------- 2 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..6c0c4638 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +# Contributing to AirbyteLib + +Learn how you can become a contributor to AirbyteLib. + +## Development + +- Make sure [Poetry is installed](https://python-poetry.org/docs/#). +- Run `poetry install` +- For examples, check out the `examples` folder. They can be run via `poetry run python examples/` +- Unit tests and type checks can be run via `poetry run pytest` + +## Documentation + +Regular documentation lives in the `/docs` folder. Based on the doc strings of public methods, we generate API documentation using [pdoc](https://pdoc.dev). + +To generate the documentation, run: + +```console +poetry run generate-docs +``` + +The `generate-docs` CLI command is mapped to the `run()` function of `docs.py` in the root `airbyte-lib` directory. + +Documentation pages will be generated in the `docs/generated` folder. The `test_docs.py` test in pytest will automatically update generated content. This updates must be manually committed before docs tests will pass. + +## Release + +- In your PR: + - Bump the version in `pyproject.toml` + - Add a changelog entry to the table below +- Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. + +## Versioning + +Versioning follows [Semantic Versioning](https://semver.org/). For new features, bump the minor version. For bug fixes, bump the patch version. For pre-releases, append `dev.N` to the version. For example, `0.1.0dev.1` is the first pre-release of the `0.1.0` version. diff --git a/README.md b/README.md index b5092b97..9f934d73 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,6 @@ -# PyAirbyte +# airbyte-lib -PyAirbyte is a library that allows to run Airbyte syncs embedded into any Python application, without the need to run Airbyte server. - -## Development - -- Make sure [Poetry is installed](https://python-poetry.org/docs/#). -- Run `poetry install` -- For examples, check out the `examples` folder. They can be run via `poetry run python examples/` -- Unit tests and type checks can be run via `poetry run pytest` - -## Release - -- In your PR: - - Bump the version in `pyproject.toml` - - Add a changelog entry to the table below -- Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. +airbyte-lib is a library that allows to run Airbyte syncs embedded into any Python application, without the need to run Airbyte server. ## Secrets Management @@ -25,12 +11,12 @@ AirbyteLib can auto-import secrets from the following sources: 3. [Google Colab secrets](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75). 4. Manual entry via [`getpass`](https://docs.python.org/3.9/library/getpass.html). -_Note: Additional secret store options may be supported in the future. [More info here.](https://github.com/airbytehq/PyAirbyte-private-beta/discussions/5)_ +_Note: Additional secret store options may be supported in the future. [More info here.](https://github.com/airbytehq/airbyte-lib-private-beta/discussions/5)_ ### Retrieving Secrets ```python -from airbyte import get_secret, SecretSource +from airbyte_lib import get_secret, SecretSource source = get_connection("source-github") source.set_config( @@ -44,22 +30,13 @@ The `get_secret()` function accepts an optional `source` argument of enum type ` By default, AirbyteLib will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. -### Versioning - -Versioning follows [Semantic Versioning](https://semver.org/). For new features, bump the minor version. For bug fixes, bump the patch version. For pre-releases, append `dev.N` to the version. For example, `0.1.0dev.1` is the first pre-release of the `0.1.0` version. - -## Documentation - -Regular documentation lives in the `/docs` folder. Based on the doc strings of public methods, we generate API documentation using [pdoc](https://pdoc.dev). To generate the documentation, run `poetry run generate-docs`. The documentation will be generated in the `docs/generate` folder. This needs to be done manually when changing the public interface of the library. - -A unit test validates the documentation is up to date. - ## Connector compatibility -To make a connector compatible with PyAirbyte, the following requirements must be met: -* The connector must be a Python package, with a `pyproject.toml` or a `setup.py` file. -* In the package, there must be a `run.py` file that contains a `run` method. This method should read arguments from the command line, and run the connector with them, outputting messages to stdout. -* The `pyproject.toml` or `setup.py` file must specify a command line entry point for the `run` method called `source-`. This is usually done by adding a `console_scripts` section to the `pyproject.toml` file, or a `entry_points` section to the `setup.py` file. For example: +To make a connector compatible with airbyte-lib, the following requirements must be met: + +- The connector must be a Python package, with a `pyproject.toml` or a `setup.py` file. +- In the package, there must be a `run.py` file that contains a `run` method. This method should read arguments from the command line, and run the connector with them, outputting messages to stdout. +- The `pyproject.toml` or `setup.py` file must specify a command line entry point for the `run` method called `source-`. This is usually done by adding a `console_scripts` section to the `pyproject.toml` file, or a `entry_points` section to the `setup.py` file. For example: ```toml [tool.poetry.scripts] @@ -101,6 +78,10 @@ The script will install the python package in the provided directory, and run th For a more lightweight check, the `--validate-install-only` flag can be used. This will only check that the connector can be installed and returns a spec, no sample config required. +## Contributing + +To learn how you can contribute to AirbyteLib, please see our [AirbyteLib Contributors Guide](./CONTRIBUTING.md). + ## Changelog | Version | PR | Description | From 67b0bdcb9aa41a5f835d43a72c028a861c48707c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:32:50 -0800 Subject: [PATCH 10/18] update main module init --- airbyte/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/airbyte/__init__.py b/airbyte/__init__.py index 9862696e..684cd5c7 100644 --- a/airbyte/__init__.py +++ b/airbyte/__init__.py @@ -1,4 +1,8 @@ -"""AirbyteLib brings Airbyte ELT to every Python developer.""" +"""PyAirbyte brings Airbyte ELT to every Python developer. + +.. include:: ../README.md + +""" from __future__ import annotations from airbyte._factories.cache_factories import get_default_cache, new_local_cache @@ -24,3 +28,5 @@ "SecretSource", "Source", ] + +__docformat__ = "google" From 6d2e5b908d4de41539f0c3985c6fbe576731aec3 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:33:04 -0800 Subject: [PATCH 11/18] renames "PyAirbyte" --- CONTRIBUTING.md | 6 +++--- README.md | 8 ++++---- airbyte/exceptions.py | 8 ++++---- airbyte/secrets.py | 2 +- airbyte/strategies.py | 4 ++-- examples/run_faker.py | 2 +- examples/run_github.py | 2 +- examples/run_pokeapi.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6c0c4638..1222b322 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ -# Contributing to AirbyteLib +# Contributing to PyAirbyte -Learn how you can become a contributor to AirbyteLib. +Learn how you can become a contributor to PyAirbyte. ## Development @@ -28,7 +28,7 @@ Documentation pages will be generated in the `docs/generated` folder. The `test_ - In your PR: - Bump the version in `pyproject.toml` - Add a changelog entry to the table below -- Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. +- Once the PR is merged, go to Github and trigger the `Publish PyAirbyte Manually` workflow. This will publish the new version to PyPI. ## Versioning diff --git a/README.md b/README.md index 9f934d73..9073c9af 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ airbyte-lib is a library that allows to run Airbyte syncs embedded into any Pyth ## Secrets Management -AirbyteLib can auto-import secrets from the following sources: +PyAirbyte can auto-import secrets from the following sources: 1. Environment variables. 2. Variables defined in a local `.env` ("Dotenv") file. @@ -26,9 +26,9 @@ source.set_config( ) ``` -The `get_secret()` function accepts an optional `source` argument of enum type `SecretSource`. If omitted or set to `SecretSource.ANY`, AirbyteLib will search all available secrets sources. If `source` is set to a specific source, then only that source will be checked. If a list of `SecretSource` entries is passed, then the sources will be checked using the provided ordering. +The `get_secret()` function accepts an optional `source` argument of enum type `SecretSource`. If omitted or set to `SecretSource.ANY`, PyAirbyte will search all available secrets sources. If `source` is set to a specific source, then only that source will be checked. If a list of `SecretSource` entries is passed, then the sources will be checked using the provided ordering. -By default, AirbyteLib will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. +By default, PyAirbyte will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. ## Connector compatibility @@ -80,7 +80,7 @@ For a more lightweight check, the `--validate-install-only` flag can be used. Th ## Contributing -To learn how you can contribute to AirbyteLib, please see our [AirbyteLib Contributors Guide](./CONTRIBUTING.md). +To learn how you can contribute to PyAirbyte, please see our [PyAirbyte Contributors Guide](./CONTRIBUTING.md). ## Changelog diff --git a/airbyte/exceptions.py b/airbyte/exceptions.py index 5ccf2018..1b96266b 100644 --- a/airbyte/exceptions.py +++ b/airbyte/exceptions.py @@ -106,7 +106,7 @@ def __repr__(self) -> str: return f"{class_name}({properties_str})" -# AirbyteLib Internal Errors (these are probably bugs) +# PyAirbyte Internal Errors (these are probably bugs) @dataclass @@ -117,12 +117,12 @@ class AirbyteLibInternalError(AirbyteError): help_url = NEW_ISSUE_URL -# AirbyteLib Input Errors (replaces ValueError for user input) +# PyAirbyte Input Errors (replaces ValueError for user input) @dataclass class AirbyteLibInputError(AirbyteError, ValueError): - """The input provided to AirbyteLib did not match expected validation rules. + """The input provided to PyAirbyte did not match expected validation rules. This inherits from ValueError so that it can be used as a drop-in replacement for ValueError in the Airbyte Lib API. @@ -146,7 +146,7 @@ class AirbyteLibNoStreamsSelectedError(AirbyteLibInputError): available_streams: list[str] | None = None -# AirbyteLib Cache Errors +# PyAirbyte Cache Errors class AirbyteLibCacheError(AirbyteError): diff --git a/airbyte/secrets.py b/airbyte/secrets.py index 3ca476d4..5cd6d1b6 100644 --- a/airbyte/secrets.py +++ b/airbyte/secrets.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""Secrets management for AirbyteLib.""" +"""Secrets management for PyAirbyte.""" from __future__ import annotations import contextlib diff --git a/airbyte/strategies.py b/airbyte/strategies.py index 4d0b75a0..371de543 100644 --- a/airbyte/strategies.py +++ b/airbyte/strategies.py @@ -1,13 +1,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""Read and write strategies for AirbyteLib.""" +"""Read and write strategies for PyAirbyte.""" from __future__ import annotations from enum import Enum class WriteStrategy(str, Enum): - """Read strategies for AirbyteLib.""" + """Read strategies for PyAirbyte.""" MERGE = "merge" """Merge new records with existing records. diff --git a/examples/run_faker.py b/examples/run_faker.py index 0a3a5026..690b2051 100644 --- a/examples/run_faker.py +++ b/examples/run_faker.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the Faker source connector. +"""A simple test of PyAirbyte, using the Faker source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_faker.py diff --git a/examples/run_github.py b/examples/run_github.py index 352426aa..2df891f4 100644 --- a/examples/run_github.py +++ b/examples/run_github.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the Faker source connector. +"""A simple test of PyAirbyte, using the Faker source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_github.py diff --git a/examples/run_pokeapi.py b/examples/run_pokeapi.py index 4aee87c9..c1fb6ff6 100644 --- a/examples/run_pokeapi.py +++ b/examples/run_pokeapi.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the PokeAPI source connector. +"""A simple test of PyAirbyte, using the PokeAPI source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_pokeapi.py From b461aa76694510f5ed21683a1b6b282fc48f71ad Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:36:37 -0800 Subject: [PATCH 12/18] more renames --- CONTRIBUTING.md | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1222b322..e2894d1d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,7 +19,7 @@ To generate the documentation, run: poetry run generate-docs ``` -The `generate-docs` CLI command is mapped to the `run()` function of `docs.py` in the root `airbyte-lib` directory. +The `generate-docs` CLI command is mapped to the `run()` function of `docs/generate.py`. Documentation pages will be generated in the `docs/generated` folder. The `test_docs.py` test in pytest will automatically update generated content. This updates must be manually committed before docs tests will pass. diff --git a/README.md b/README.md index 9073c9af..bf3a0ec4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# airbyte-lib +# PyAirbyte -airbyte-lib is a library that allows to run Airbyte syncs embedded into any Python application, without the need to run Airbyte server. +PyAirbyte is a library that allows to run Airbyte syncs embedded into any Python application, without requiring connectivity to a hosted Airbyte instance. ## Secrets Management @@ -32,7 +32,7 @@ By default, PyAirbyte will prompt the user for any requested secrets that are no ## Connector compatibility -To make a connector compatible with airbyte-lib, the following requirements must be met: +To make a connector compatible with PyAirbyte, the following requirements must be met: - The connector must be a Python package, with a `pyproject.toml` or a `setup.py` file. - In the package, there must be a `run.py` file that contains a `run` method. This method should read arguments from the command line, and run the connector with them, outputting messages to stdout. From b9572afbdef66780377d2b0ba2fbc126e07ed427 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:46:18 -0800 Subject: [PATCH 13/18] fix test docs generation --- .github/workflows/pydoc_preview.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml index be182ce4..79e18a75 100644 --- a/.github/workflows/pydoc_preview.yml +++ b/.github/workflows/pydoc_preview.yml @@ -1,4 +1,4 @@ -name: Generate Python Documentation +name: Test Docs Generation on: push: @@ -6,8 +6,9 @@ on: - main pull_request: {} + jobs: - build: + deploy: runs-on: ubuntu-latest steps: @@ -28,4 +29,10 @@ jobs: - name: Generate documentation run: | - pdoc --html --output-dir docs . + poetry run generate-docs + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'docs/generated' From 3d2a5326f026c990ef495a8bf89cbd4c8668686d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 10:47:48 -0800 Subject: [PATCH 14/18] rename jobs --- .github/workflows/pydoc_preview.yml | 2 +- .github/workflows/pydoc_publish.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml index 79e18a75..cb757a31 100644 --- a/.github/workflows/pydoc_preview.yml +++ b/.github/workflows/pydoc_preview.yml @@ -8,7 +8,7 @@ on: jobs: - deploy: + preview_docs: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index 88602ae5..2c232b33 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -21,7 +21,7 @@ concurrency: cancel-in-progress: false jobs: - deploy: + publish_docs: runs-on: ubuntu-latest environment: name: "github-pages" From 79eb7e1d1325df303ce189c61483e60ed6669d26 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 16:39:24 -0800 Subject: [PATCH 15/18] remove viztracer --- poetry.lock | 62 +------------------ pyproject.toml | 1 - .../integration_tests/test_snowflake_cache.py | 2 - .../test_source_faker_integration.py | 1 - 4 files changed, 1 insertion(+), 65 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9819e186..c0427111 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1136,17 +1136,6 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] -[[package]] -name = "objprint" -version = "0.2.3" -description = "A library that can print Python objects in human readable format" -optional = false -python-versions = ">=3.6" -files = [ - {file = "objprint-0.2.3-py3-none-any.whl", hash = "sha256:1721e6f97bae5c5b86c2716a0d45a9dd2c9a4cd9f52cfc8a0dfbe801805554cb"}, - {file = "objprint-0.2.3.tar.gz", hash = "sha256:73d0ad5a7c3151fce634c8892e5c2a050ccae3b1a353bf1316f08b7854da863b"}, -] - [[package]] name = "orjson" version = "3.9.14" @@ -2528,55 +2517,6 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "viztracer" -version = "0.16.2" -description = "A debugging and profiling tool that can trace and visualize python code execution" -optional = false -python-versions = ">=3.8" -files = [ - {file = "viztracer-0.16.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:bdc62e90a2957e4119632e98f8b77d0ff1ab4db7029dd2e265bb3748e0fc0e05"}, - {file = "viztracer-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:789ac930e1c9621f04d275ee3ebb75a5d6109bcd4634796a77934608c60424d0"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee504771e3182045996a966d94d95d71693e59717b2643199162ec754a6e2400"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef9ecf4110d379245f17429d2a10391f3612f60b5618d0d61a30c110e9df2313"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57c2574cc15b688eb0ce4e24a2c30f06c1df3bbe1dd16a1d18676e411e785f96"}, - {file = "viztracer-0.16.2-cp310-cp310-win32.whl", hash = "sha256:9fe652834f5073bf99debc25d8ba6084690fa2f26420621ca38a09efcae71b2f"}, - {file = "viztracer-0.16.2-cp310-cp310-win_amd64.whl", hash = "sha256:d59f57e3e46e116ce77e144f419739d1d8d976a903c51a822ba4ef167e5b37d4"}, - {file = "viztracer-0.16.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:b0bd434c43b7f87f76ddd21cf7371d910edb74b131aaff670a8fcc9f28251e67"}, - {file = "viztracer-0.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1bbbb9c80b08db692993c67e7b10d7b06db3eedc6c38f0d93a40ea31de82076e"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e7842e437d81fb47ef8266b2dde76bf755c95305014eeec8346b2fce9711c0"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bddfe6a6f2a66f363fcca79a694986b0602ba0dc3dede57dc182cdd6d0823585"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4a2639e6f18200b73a70f3e7dca4cbb3ba08e3807023fd526f44ebf2185d1e"}, - {file = "viztracer-0.16.2-cp311-cp311-win32.whl", hash = "sha256:371496734ebb3eafd6a6e033dbf04960618089e021dc7eded95179a8f3700c40"}, - {file = "viztracer-0.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:d9c7670e7fb077fe48c92036766a6772e10a3caf41455d6244b8b1c8d48bbd87"}, - {file = "viztracer-0.16.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2fd8b5aa8143b5be4d696e53e8ac5027c20187c178396839f39f8aa610d5873d"}, - {file = "viztracer-0.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3a8ddc4990154f2d400b09deefc9236d963a733d458b2825bd590ced7e7bf89"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcf8b14dc8dd1567bca3f8cb13e31665a3cbf2ee95552de0afe9179e3a7bde22"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:309cf5d545222adb2581ae6aeb48d3d03d7241d335142408d87c49f1d0793f85"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee749a2a3f4ed662d35eb9378ff0648907aa6321befa16ad1d8bec6034b4d260"}, - {file = "viztracer-0.16.2-cp312-cp312-win32.whl", hash = "sha256:a082dab37b6b8cea43438b80a11a6e859f1b45522b8684a2fb9af03539d83803"}, - {file = "viztracer-0.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:03cd21181fe9a630ac5fb9ff1ee83fb7a67814e51e130f0ed83300e163fbac23"}, - {file = "viztracer-0.16.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:e920d383abae1b9314f2a60dd94e04c83998bfe759556af49d3c422d1d64d11e"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb9941b198fed8ba5b3f9d8105e59d37ab15f7f00b9a576686b1073990806d12"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1b7030aa6f934ff02882dfd48eca5a9442951b8be24c1dc5dc99fabbfb1997c"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:258087076c06d065d2786dc8a0f1f017d655d3753a8fe6836640c005c66a0c43"}, - {file = "viztracer-0.16.2-cp38-cp38-win32.whl", hash = "sha256:f0fd53e2fec972f9332677e6d11332ba789fcccf59060d7b9f309041602dc712"}, - {file = "viztracer-0.16.2-cp38-cp38-win_amd64.whl", hash = "sha256:ab067398029a50cc784d5456c5e8bef339b4bffaa1c3f0f9384a26b57c0efdaa"}, - {file = "viztracer-0.16.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:45879cf54ad9116245e2a6115660307f98ae3aa98a77347f2b336a904f260370"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abc61cfc36b33a301b950554d9e9027a506d580ebf1e764aa6656af0acfa3354"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:419f738bba8204e7ddb422faff3a40576896d030bbbf4fb79ace006147ca60e7"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c594022093bf9eee57ad2b9656f836dca2ed9c0b8e4d94a9d13a6cbc531386fe"}, - {file = "viztracer-0.16.2-cp39-cp39-win32.whl", hash = "sha256:4f98da282e87013a93917c2ae080ba52845e98ed5280faecdc42ee0c7fb74a4a"}, - {file = "viztracer-0.16.2-cp39-cp39-win_amd64.whl", hash = "sha256:64b97120374a572d2320fb795473c051c92d39dfc99fb74754e61e4c212e7617"}, - {file = "viztracer-0.16.2.tar.gz", hash = "sha256:8dff5637a7b42ffdbc1ed3768ce43979e71b09893ff370bc3c3ede54afed93ee"}, -] - -[package.dependencies] -objprint = ">0.1.3" - -[package.extras] -full = ["orjson"] - [[package]] name = "wcmatch" version = "8.4" @@ -2673,4 +2613,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "a61f9755ed5b078d77d06d6930ddbc394a88e480e41ad872547fb542aeb7ec0c" +content-hash = "4f25af1faecf0bdc79f799f2d452f0005c06c92cc6fc063f0760444eb910774d" diff --git a/pyproject.toml b/pyproject.toml index 1a6fca73..ac166d4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,6 @@ google-cloud-secret-manager = "^2.17.0" types-requests = "2.31.0.4" freezegun = "^1.4.0" airbyte-source-faker = "^6.0.0" -viztracer = "^0.16.2" tomli = "^2.0" [build-system] diff --git a/tests/integration_tests/test_snowflake_cache.py b/tests/integration_tests/test_snowflake_cache.py index c76926f6..9781a49f 100644 --- a/tests/integration_tests/test_snowflake_cache.py +++ b/tests/integration_tests/test_snowflake_cache.py @@ -13,8 +13,6 @@ from pathlib import Path import pytest -import ulid -import viztracer from airbyte_cdk.models import ConfiguredAirbyteCatalog diff --git a/tests/integration_tests/test_source_faker_integration.py b/tests/integration_tests/test_source_faker_integration.py index 95dabad6..c62c7d1a 100644 --- a/tests/integration_tests/test_source_faker_integration.py +++ b/tests/integration_tests/test_source_faker_integration.py @@ -14,7 +14,6 @@ import pytest import ulid -import viztracer from airbyte_cdk.models import ConfiguredAirbyteCatalog From dbce8643b5c3f46db161a1c03e519bbf2fe6d2d3 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 17:00:38 -0800 Subject: [PATCH 16/18] fix import errors --- tests/integration_tests/test_snowflake_cache.py | 4 ---- tests/integration_tests/test_source_faker_integration.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integration_tests/test_snowflake_cache.py b/tests/integration_tests/test_snowflake_cache.py index 9781a49f..e92b89b3 100644 --- a/tests/integration_tests/test_snowflake_cache.py +++ b/tests/integration_tests/test_snowflake_cache.py @@ -9,13 +9,9 @@ from collections.abc import Generator import os import sys -import shutil -from pathlib import Path import pytest -from airbyte_cdk.models import ConfiguredAirbyteCatalog - import airbyte as ab from airbyte import caches diff --git a/tests/integration_tests/test_source_faker_integration.py b/tests/integration_tests/test_source_faker_integration.py index c62c7d1a..ab9f5239 100644 --- a/tests/integration_tests/test_source_faker_integration.py +++ b/tests/integration_tests/test_source_faker_integration.py @@ -11,11 +11,11 @@ import sys import shutil from pathlib import Path +import typing import pytest import ulid -from airbyte_cdk.models import ConfiguredAirbyteCatalog import airbyte as ab from airbyte import caches @@ -130,7 +130,7 @@ def test_faker_pks( ) -> None: """Test that the append strategy works as expected.""" - catalog: ConfiguredAirbyteCatalog = source_faker_seed_a.configured_catalog + catalog = source_faker_seed_a.configured_catalog assert catalog.streams[0].primary_key assert catalog.streams[1].primary_key From b3caa17a0dbe4104f0e06733beb7da63e5dcf0fe Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 17:19:44 -0800 Subject: [PATCH 17/18] remove type checking override --- docs/generate.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/generate.py b/docs/generate.py index 2cf8d868..92672d79 100644 --- a/docs/generate.py +++ b/docs/generate.py @@ -9,10 +9,6 @@ import airbyte as ab -import typing - -typing.TYPE_CHECKING = True - def run() -> None: """Generate docs for all public modules in airbyte_lib and save them to docs/generated. From 8202b92f615d839055198a663c5648c4728b189d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 17:39:39 -0800 Subject: [PATCH 18/18] only run publish on `main` --- .github/workflows/pydoc_publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml index 2c232b33..9ca256e4 100644 --- a/.github/workflows/pydoc_publish.yml +++ b/.github/workflows/pydoc_publish.yml @@ -2,8 +2,8 @@ name: Publish Documentation Site on: push: - # branches: - # # - main # TODO: uncomment + branches: + - main # Allows you to run this workflow manually from the Actions tab workflow_dispatch: