From 7861c04b47e1c02b8eb00641d14f050777efceaa Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 16 Feb 2024 17:13:46 -0800 Subject: [PATCH 1/6] add LICENSE --- LICENSE.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..2986bc13 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,38 @@ +# ELv2 + +Elastic License 2.0 \(ELv2\) + +**Acceptance** By using the software, you agree to all of the terms and conditions below. + +**Copyright License** The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below + +**Limitations** You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software. + +You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key. + +You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensorโ€™s trademarks is subject to applicable law. + +**Patents** The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company. + +**Notices** You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms. + +If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software. + +**No Other Rights** These terms do not imply any licenses other than those expressly granted in these terms. + +**Termination** If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently. + +**No Liability** As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim. + +**Definitions** The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it. + +_you_ refers to the individual or entity agreeing to these terms. + +_your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. + +_your licenses_ are all the licenses granted to you for the software under these terms. + +_use_ means anything you do with the software requiring one of your licenses. + +_trademark_ means trademarks, service marks, and similar rights. + From 633e1b4df82060d708eecd43144158ad44119187 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 16 Feb 2024 17:47:28 -0800 Subject: [PATCH 2/6] Add documentation publish and preview workflows (#42) --- .github/workflows/pydoc_preview.yml | 38 + .github/workflows/pydoc_publish.yml | 60 ++ CONTRIBUTING.md | 35 + README.md | 47 +- airbyte/__init__.py | 8 +- airbyte/exceptions.py | 8 +- airbyte/secrets.py | 2 +- airbyte/strategies.py | 4 +- docs.py | 31 - docs/.gitignore | 1 + docs/frame.html.jinja2 | 14 - docs/generate.py | 40 + docs/generated/airbyte.html | 889 ---------------- docs/generated/airbyte/caches.html | 992 ------------------ docs/generated/airbyte/datasets.html | 258 ----- docs/generated/index.html | 7 - examples/run_faker.py | 2 +- examples/run_github.py | 2 +- examples/run_pokeapi.py | 2 +- poetry.lock | 62 +- pyproject.toml | 3 +- tests/docs_tests/test_docs_checked_in.py | 4 +- .../integration_tests/test_snowflake_cache.py | 6 - .../test_source_faker_integration.py | 5 +- 24 files changed, 211 insertions(+), 2309 deletions(-) create mode 100644 .github/workflows/pydoc_preview.yml create mode 100644 .github/workflows/pydoc_publish.yml create mode 100644 CONTRIBUTING.md delete mode 100644 docs.py create mode 100644 docs/.gitignore delete mode 100644 docs/frame.html.jinja2 create mode 100644 docs/generate.py delete mode 100644 docs/generated/airbyte.html delete mode 100644 docs/generated/airbyte/caches.html delete mode 100644 docs/generated/airbyte/datasets.html delete mode 100644 docs/generated/index.html diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml new file mode 100644 index 00000000..cb757a31 --- /dev/null +++ b/.github/workflows/pydoc_preview.yml @@ -0,0 +1,38 @@ +name: Test Docs Generation + +on: + push: + branches: + - main + pull_request: {} + + +jobs: + preview_docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v8 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + + - name: Install dependencies + run: poetry install + + - name: Generate documentation + run: | + poetry run generate-docs + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'docs/generated' diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml new file mode 100644 index 00000000..9ca256e4 --- /dev/null +++ b/.github/workflows/pydoc_publish.yml @@ -0,0 +1,60 @@ +name: Publish Documentation Site + +on: + push: + branches: + - main + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + publish_docs: + runs-on: ubuntu-latest + environment: + name: "github-pages" + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v8 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Setup Pages + uses: actions/configure-pages@v4 + + - name: Install dependencies + run: poetry install + + - name: Generate documentation + run: | + poetry run generate-docs + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'docs/generated' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..e2894d1d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +# Contributing to PyAirbyte + +Learn how you can become a contributor to PyAirbyte. + +## Development + +- Make sure [Poetry is installed](https://python-poetry.org/docs/#). +- Run `poetry install` +- For examples, check out the `examples` folder. They can be run via `poetry run python examples/` +- Unit tests and type checks can be run via `poetry run pytest` + +## Documentation + +Regular documentation lives in the `/docs` folder. Based on the doc strings of public methods, we generate API documentation using [pdoc](https://pdoc.dev). + +To generate the documentation, run: + +```console +poetry run generate-docs +``` + +The `generate-docs` CLI command is mapped to the `run()` function of `docs/generate.py`. + +Documentation pages will be generated in the `docs/generated` folder. The `test_docs.py` test in pytest will automatically update generated content. This updates must be manually committed before docs tests will pass. + +## Release + +- In your PR: + - Bump the version in `pyproject.toml` + - Add a changelog entry to the table below +- Once the PR is merged, go to Github and trigger the `Publish PyAirbyte Manually` workflow. This will publish the new version to PyPI. + +## Versioning + +Versioning follows [Semantic Versioning](https://semver.org/). For new features, bump the minor version. For bug fixes, bump the patch version. For pre-releases, append `dev.N` to the version. For example, `0.1.0dev.1` is the first pre-release of the `0.1.0` version. diff --git a/README.md b/README.md index b5092b97..bf3a0ec4 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,22 @@ # PyAirbyte -PyAirbyte is a library that allows to run Airbyte syncs embedded into any Python application, without the need to run Airbyte server. - -## Development - -- Make sure [Poetry is installed](https://python-poetry.org/docs/#). -- Run `poetry install` -- For examples, check out the `examples` folder. They can be run via `poetry run python examples/` -- Unit tests and type checks can be run via `poetry run pytest` - -## Release - -- In your PR: - - Bump the version in `pyproject.toml` - - Add a changelog entry to the table below -- Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. +PyAirbyte is a library that allows to run Airbyte syncs embedded into any Python application, without requiring connectivity to a hosted Airbyte instance. ## Secrets Management -AirbyteLib can auto-import secrets from the following sources: +PyAirbyte can auto-import secrets from the following sources: 1. Environment variables. 2. Variables defined in a local `.env` ("Dotenv") file. 3. [Google Colab secrets](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75). 4. Manual entry via [`getpass`](https://docs.python.org/3.9/library/getpass.html). -_Note: Additional secret store options may be supported in the future. [More info here.](https://github.com/airbytehq/PyAirbyte-private-beta/discussions/5)_ +_Note: Additional secret store options may be supported in the future. [More info here.](https://github.com/airbytehq/airbyte-lib-private-beta/discussions/5)_ ### Retrieving Secrets ```python -from airbyte import get_secret, SecretSource +from airbyte_lib import get_secret, SecretSource source = get_connection("source-github") source.set_config( @@ -40,26 +26,17 @@ source.set_config( ) ``` -The `get_secret()` function accepts an optional `source` argument of enum type `SecretSource`. If omitted or set to `SecretSource.ANY`, AirbyteLib will search all available secrets sources. If `source` is set to a specific source, then only that source will be checked. If a list of `SecretSource` entries is passed, then the sources will be checked using the provided ordering. - -By default, AirbyteLib will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. - -### Versioning +The `get_secret()` function accepts an optional `source` argument of enum type `SecretSource`. If omitted or set to `SecretSource.ANY`, PyAirbyte will search all available secrets sources. If `source` is set to a specific source, then only that source will be checked. If a list of `SecretSource` entries is passed, then the sources will be checked using the provided ordering. -Versioning follows [Semantic Versioning](https://semver.org/). For new features, bump the minor version. For bug fixes, bump the patch version. For pre-releases, append `dev.N` to the version. For example, `0.1.0dev.1` is the first pre-release of the `0.1.0` version. - -## Documentation - -Regular documentation lives in the `/docs` folder. Based on the doc strings of public methods, we generate API documentation using [pdoc](https://pdoc.dev). To generate the documentation, run `poetry run generate-docs`. The documentation will be generated in the `docs/generate` folder. This needs to be done manually when changing the public interface of the library. - -A unit test validates the documentation is up to date. +By default, PyAirbyte will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. ## Connector compatibility To make a connector compatible with PyAirbyte, the following requirements must be met: -* The connector must be a Python package, with a `pyproject.toml` or a `setup.py` file. -* In the package, there must be a `run.py` file that contains a `run` method. This method should read arguments from the command line, and run the connector with them, outputting messages to stdout. -* The `pyproject.toml` or `setup.py` file must specify a command line entry point for the `run` method called `source-`. This is usually done by adding a `console_scripts` section to the `pyproject.toml` file, or a `entry_points` section to the `setup.py` file. For example: + +- The connector must be a Python package, with a `pyproject.toml` or a `setup.py` file. +- In the package, there must be a `run.py` file that contains a `run` method. This method should read arguments from the command line, and run the connector with them, outputting messages to stdout. +- The `pyproject.toml` or `setup.py` file must specify a command line entry point for the `run` method called `source-`. This is usually done by adding a `console_scripts` section to the `pyproject.toml` file, or a `entry_points` section to the `setup.py` file. For example: ```toml [tool.poetry.scripts] @@ -101,6 +78,10 @@ The script will install the python package in the provided directory, and run th For a more lightweight check, the `--validate-install-only` flag can be used. This will only check that the connector can be installed and returns a spec, no sample config required. +## Contributing + +To learn how you can contribute to PyAirbyte, please see our [PyAirbyte Contributors Guide](./CONTRIBUTING.md). + ## Changelog | Version | PR | Description | diff --git a/airbyte/__init__.py b/airbyte/__init__.py index 9862696e..684cd5c7 100644 --- a/airbyte/__init__.py +++ b/airbyte/__init__.py @@ -1,4 +1,8 @@ -"""AirbyteLib brings Airbyte ELT to every Python developer.""" +"""PyAirbyte brings Airbyte ELT to every Python developer. + +.. include:: ../README.md + +""" from __future__ import annotations from airbyte._factories.cache_factories import get_default_cache, new_local_cache @@ -24,3 +28,5 @@ "SecretSource", "Source", ] + +__docformat__ = "google" diff --git a/airbyte/exceptions.py b/airbyte/exceptions.py index 5ccf2018..1b96266b 100644 --- a/airbyte/exceptions.py +++ b/airbyte/exceptions.py @@ -106,7 +106,7 @@ def __repr__(self) -> str: return f"{class_name}({properties_str})" -# AirbyteLib Internal Errors (these are probably bugs) +# PyAirbyte Internal Errors (these are probably bugs) @dataclass @@ -117,12 +117,12 @@ class AirbyteLibInternalError(AirbyteError): help_url = NEW_ISSUE_URL -# AirbyteLib Input Errors (replaces ValueError for user input) +# PyAirbyte Input Errors (replaces ValueError for user input) @dataclass class AirbyteLibInputError(AirbyteError, ValueError): - """The input provided to AirbyteLib did not match expected validation rules. + """The input provided to PyAirbyte did not match expected validation rules. This inherits from ValueError so that it can be used as a drop-in replacement for ValueError in the Airbyte Lib API. @@ -146,7 +146,7 @@ class AirbyteLibNoStreamsSelectedError(AirbyteLibInputError): available_streams: list[str] | None = None -# AirbyteLib Cache Errors +# PyAirbyte Cache Errors class AirbyteLibCacheError(AirbyteError): diff --git a/airbyte/secrets.py b/airbyte/secrets.py index 3ca476d4..5cd6d1b6 100644 --- a/airbyte/secrets.py +++ b/airbyte/secrets.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""Secrets management for AirbyteLib.""" +"""Secrets management for PyAirbyte.""" from __future__ import annotations import contextlib diff --git a/airbyte/strategies.py b/airbyte/strategies.py index 4d0b75a0..371de543 100644 --- a/airbyte/strategies.py +++ b/airbyte/strategies.py @@ -1,13 +1,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""Read and write strategies for AirbyteLib.""" +"""Read and write strategies for PyAirbyte.""" from __future__ import annotations from enum import Enum class WriteStrategy(str, Enum): - """Read strategies for AirbyteLib.""" + """Read strategies for PyAirbyte.""" MERGE = "merge" """Merge new records with existing records. diff --git a/docs.py b/docs.py deleted file mode 100644 index 63947eca..00000000 --- a/docs.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from __future__ import annotations - -import os -import pathlib -import shutil - -import pdoc - - -def run() -> None: - """Generate docs for all public modules in airbyte and save them to docs/generated. - - Public modules are: - * The main airbyte module - * All directory modules in airbyte that don't start with an underscore. - """ - public_modules = ["airbyte"] - - # recursively delete the docs/generated folder if it exists - if pathlib.Path("docs/generated").exists(): - shutil.rmtree("docs/generated") - - # All folders in `airbyte` that don't start with "_" are treated as public modules. - for d in os.listdir("airbyte"): - dir_path = pathlib.Path(f"airbyte/{d}") - if dir_path.is_dir() and not d.startswith("_") and (dir_path / "__init__.py").exists(): - public_modules.append(dir_path) - - pdoc.render.configure(template_directory="docs", show_source=False, search=False) - pdoc.pdoc(*public_modules, output_directory=pathlib.Path("docs/generated")) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..86d4c2dd --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +generated diff --git a/docs/frame.html.jinja2 b/docs/frame.html.jinja2 deleted file mode 100644 index 379ae376..00000000 --- a/docs/frame.html.jinja2 +++ /dev/null @@ -1,14 +0,0 @@ - -
- {% block module_contents %}{% endblock %} -
- -{% filter minify_css %} - {% block style %} - {# The same CSS files as in pdoc's default template, except for layout.css. - You may leave out Bootstrap Reboot, which corrects inconsistences across browsers - but may conflict with you website's stylesheet. #} - - - {% endblock %} -{% endfilter %} diff --git a/docs/generate.py b/docs/generate.py new file mode 100644 index 00000000..92672d79 --- /dev/null +++ b/docs/generate.py @@ -0,0 +1,40 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations + +import os +import pathlib +import shutil + +import pdoc + +import airbyte as ab + + +def run() -> None: + """Generate docs for all public modules in airbyte_lib and save them to docs/generated. + + Public modules are: + * The main airbyte_lib module + * All directory modules in airbyte_lib that don't start with an underscore. + """ + public_modules = ["airbyte"] + + # recursively delete the docs/generated folder if it exists + if pathlib.Path("docs/generated").exists(): + shutil.rmtree("docs/generated") + + # All files and folders in `airbyte_lib` that don't start with "_" are treated as public. + for submodule in os.listdir("airbyte"): + submodule_path = pathlib.Path(f"airbyte/{submodule}") + if not submodule.startswith("_"): + public_modules.append(submodule_path) + + pdoc.render.configure( + template_directory="docs", + show_source=False, + search=False, + ) + pdoc.pdoc( + *public_modules, + output_directory=pathlib.Path("docs/generated"), + ) diff --git a/docs/generated/airbyte.html b/docs/generated/airbyte.html deleted file mode 100644 index 5ad45f6e..00000000 --- a/docs/generated/airbyte.html +++ /dev/null @@ -1,889 +0,0 @@ - -
-
-
- - class - CachedDataset(airbyte.datasets._sql.SQLDataset): - - -
- - -

A dataset backed by a SQL table cache.

- -

Because this dataset includes all records from the underlying table, we also expose the -underlying table as a SQLAlchemy Table object.

-
- - -
-
- - CachedDataset(cache: 'SQLCacheBase', stream_name: str) - - -
- - - - -
-
-
-
@overrides
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - to_sql_table(self) -> 'Table': - - -
- - - - -
-
-
Inherited Members
-
-
airbyte.datasets._sql.SQLDataset
-
stream_name
-
with_filter
- -
-
-
-
-
-
- - class - DuckDBCache(airbyte.caches.duckdb.DuckDBCacheBase): - - -
- - -

A DuckDB implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

-
- - -
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
Inherited Members
-
-
airbyte.caches.base.SQLCacheBase
-
SQLCacheBase
-
type_converter_class
-
use_singleton_connection
-
config
-
file_writer
-
type_converter
-
get_sql_alchemy_url
-
database_name
-
get_sql_engine
-
get_sql_connection
-
get_sql_table_name
-
get_sql_table
-
streams
-
get_records
-
get_pandas_dataframe
-
get_state
-
register_source
- -
-
airbyte.caches.duckdb.DuckDBCacheBase
-
config_class
-
supports_merge_insert
-
get_telemetry_info
- -
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - DuckDBCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the DuckDB cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- db_path: pathlib.Path | str - - -
- - -

Normally db_path is a Path object.

- -

There are some cases, such as when connecting to MotherDuck, where it could be a string that -is not also a path, such as "md:" to connect the user's default MotherDuck DB.

-
- - -
-
-
- schema_name: str - - -
- - -

The name of the schema to write to. Defaults to "main".

-
- - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - def - get_available_connectors() -> list[str]: - - -
- - -

Return a list of all available connectors.

- -

Connectors will be returned in alphabetical order, with the standard prefix "source-".

-
- - -
-
-
- - def - get_source( name: str, config: dict[str, typing.Any] | None = None, *, version: str | None = None, pip_url: str | None = None, local_executable: pathlib.Path | str | None = None, install_if_missing: bool = True) -> Source: - - -
- - -

Get a connector by name and version.

- -

Args: - name: connector name - config: connector config - if not provided, you need to set it later via the set_config - method. - version: connector version - if not provided, the currently installed version will be used. - If no version is installed, the latest available version will be used. The version can - also be set to "latest" to force the use of the latest available version. - pip_url: connector pip URL - if not provided, the pip url will be inferred from the - connector name. - local_executable: If set, the connector will be assumed to already be installed and will be - executed using this path or executable name. Otherwise, the connector will be installed - automatically in a virtual environment. - install_if_missing: Whether to install the connector if it is not available locally. This - parameter is ignored when local_executable is set.

-
- - -
-
-
- - def - get_default_cache() -> DuckDBCache: - - -
- - -

Get a local cache for storing data, using the default database path.

- -

Cache files are stored in the .cache directory, relative to the current -working directory.

-
- - -
-
-
- - def - get_secret( secret_name: str, source: SecretSource | list[SecretSource] = <SecretSource.ANY: 4>, *, prompt: bool = True) -> str: - - -
- - -

Get a secret from the environment.

- -

The optional source argument of enum type SecretSource or list of SecretSource options. -If left blank, the source arg will be SecretSource.ANY. If source is set to a specific -source, then only that source will be checked. If a list of SecretSource entries is passed, -then the sources will be checked using the provided ordering.

- -

If prompt to True or if SecretSource.PROMPT is declared in the source arg, then the -user will be prompted to enter the secret if it is not found in any of the other sources.

-
- - -
-
-
- - def - new_local_cache( cache_name: str | None = None, cache_dir: str | pathlib.Path | None = None, *, cleanup: bool = True) -> DuckDBCache: - - -
- - -

Get a local cache for storing data, using a name string to seed the path.

- -

Args: - cache_name: Name to use for the cache. Defaults to None. - cache_dir: Root directory to store the cache in. Defaults to None. - cleanup: Whether to clean up temporary files. Defaults to True.

- -

Cache files are stored in the .cache directory, relative to the current -working directory.

-
- - -
-
-
- - class - ReadResult(collections.abc.Mapping[str, airbyte.datasets._sql.CachedDataset]): - - -
- - -

A Mapping is a generic container for associating key/value -pairs.

- -

This class provides concrete generic implementations of all -methods except for __getitem__, __iter__, and __len__.

-
- - -
-
- - ReadResult( processed_records: int, cache: airbyte.caches.base.SQLCacheBase, processed_streams: list[str]) - - -
- - - - -
-
-
- processed_records - - -
- - - - -
-
-
- - def - get_sql_engine(self) -> sqlalchemy.engine.base.Engine: - - -
- - - - -
-
-
- streams: collections.abc.Mapping[str, CachedDataset] - - -
- - - - -
-
-
- cache: airbyte.caches.base.SQLCacheBase - - -
- - - - -
-
-
Inherited Members
-
-
collections.abc.Mapping
-
get
-
keys
-
items
-
values
- -
-
-
-
-
-
- - class - SecretSource(enum.Enum): - - -
- - -

An enumeration.

-
- - -
-
- ENV = -<SecretSource.ENV: 1> - - -
- - - - -
-
-
- DOTENV = -<SecretSource.DOTENV: 2> - - -
- - - - -
-
-
- GOOGLE_COLAB = -<SecretSource.GOOGLE_COLAB: 3> - - -
- - - - -
-
-
- ANY = -<SecretSource.ANY: 4> - - -
- - - - -
-
-
- PROMPT = -<SecretSource.PROMPT: 5> - - -
- - - - -
-
-
Inherited Members
-
-
enum.Enum
-
name
-
value
- -
-
-
-
-
-
- - class - Source: - - -
- - -

A class representing a source that can be called.

-
- - -
-
- - Source( executor: airbyte._executor.Executor, name: str, config: dict[str, typing.Any] | None = None, streams: list[str] | None = None, *, validate: bool = False) - - -
- - -

Initialize the source.

- -

If config is provided, it will be validated against the spec if validate is True.

-
- - -
-
-
- executor - - -
- - - - -
-
-
- name - - -
- - - - -
-
-
- - def - set_streams(self, streams: list[str]) -> None: - - -
- - -

Deprecated. See select_streams().

-
- - -
-
-
- - def - select_all_streams(self) -> None: - - -
- - -

Select all streams.

- -

This is a more streamlined equivalent to:

- -
-

source.select_streams(source.get_available_streams()).

-
-
- - -
-
-
- - def - select_streams(self, streams: list[str]) -> None: - - -
- - -

Select the stream names that should be read from the connector.

- -

Currently, if this is not set, all streams will be read.

-
- - -
-
-
- - def - get_selected_streams(self) -> list[str]: - - -
- - -

Get the selected streams.

- -

If no streams are selected, return an empty list.

-
- - -
-
-
- - def - set_config(self, config: dict[str, typing.Any], *, validate: bool = False) -> None: - - -
- - -

Set the config for the connector.

- -

If validate is True, raise an exception if the config fails validation.

- -

If validate is False, validation will be deferred until check() or validate_config() -is called.

-
- - -
-
-
- - def - get_config(self) -> dict[str, typing.Any]: - - -
- - -

Get the config for the connector.

-
- - -
-
-
- - def - validate_config(self, config: dict[str, typing.Any] | None = None) -> None: - - -
- - -

Validate the config against the spec.

- -

If config is not provided, the already-set config will be validated.

-
- - -
-
-
- - def - get_available_streams(self) -> list[str]: - - -
- - -

Get the available streams from the spec.

-
- - -
-
-
- docs_url: str - - -
- - -

Get the URL to the connector's documentation.

-
- - -
-
-
- discovered_catalog: airbyte_protocol.models.airbyte_protocol.AirbyteCatalog - - -
- - -

Get the raw catalog for the given streams.

- -

If the catalog is not yet known, we call discover to get it.

-
- - -
-
-
- configured_catalog: airbyte_protocol.models.airbyte_protocol.ConfiguredAirbyteCatalog - - -
- - -

Get the configured catalog for the given streams.

- -

If the raw catalog is not yet known, we call discover to get it.

- -

If no specific streams are selected, we return a catalog that syncs all available streams.

- -

TODO: We should consider disabling by default the streams that the connector would -disable by default. (For instance, streams that require a premium license are sometimes -disabled by default within the connector.)

-
- - -
-
-
- - def - get_records(self, stream: str) -> airbyte.datasets._lazy.LazyDataset: - - -
- - -

Read a stream from the connector.

- -

This involves the following steps:

- -
    -
  • Call discover to get the catalog
  • -
  • Generate a configured catalog that syncs the given stream in full_refresh mode
  • -
  • Write the configured catalog and the config to a temporary file
  • -
  • execute the connector with read --config --catalog
  • -
  • Listen to the messages and return the first AirbyteRecordMessages that come along.
  • -
  • Make sure the subprocess is killed when the function returns.
  • -
-
- - -
-
-
- - def - check(self) -> None: - - -
- - -

Call check on the connector.

- -

This involves the following steps:

- -
    -
  • Write the config to a temporary file
  • -
  • execute the connector with check --config
  • -
  • Listen to the messages and return the first AirbyteCatalog that comes along.
  • -
  • Make sure the subprocess is killed when the function returns.
  • -
-
- - -
-
-
- - def - install(self) -> None: - - -
- - -

Install the connector if it is not yet installed.

-
- - -
-
-
- - def - uninstall(self) -> None: - - -
- - -

Uninstall the connector if it is installed.

- -

This only works if the use_local_install flag wasn't used and installation is managed by -PyAirbyte.

-
- - -
-
-
- - def - read( self, cache: airbyte.caches.base.SQLCacheBase | None = None, *, write_strategy: str | airbyte.strategies.WriteStrategy = <WriteStrategy.AUTO: 'auto'>, force_full_refresh: bool = False) -> ReadResult: - - -
- - -

Read from the connector and write to the cache.

- -

Args: - cache: The cache to write to. If None, a default cache will be used. - write_strategy: The strategy to use when writing to the cache. If a string, it must be - one of "append", "upsert", "replace", or "auto". If a WriteStrategy, it must be one - of WriteStrategy.APPEND, WriteStrategy.UPSERT, WriteStrategy.REPLACE, or - WriteStrategy.AUTO. - force_full_refresh: If True, the source will operate in full refresh mode. Otherwise, - streams will be read in incremental mode if supported by the connector. This option - must be True when using the "replace" strategy.

-
- - -
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/airbyte/caches.html b/docs/generated/airbyte/caches.html deleted file mode 100644 index 3d6c0be9..00000000 --- a/docs/generated/airbyte/caches.html +++ /dev/null @@ -1,992 +0,0 @@ - -
-
-
- - class - DuckDBCache(airbyte.caches.duckdb.DuckDBCacheBase): - - -
- - -

A DuckDB implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

-
- - -
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte.caches.duckdb.DuckDBCacheBase
-
config_class
-
supports_merge_insert
-
get_telemetry_info
- -
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - DuckDBCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the DuckDB cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- db_path: pathlib.Path | str - - -
- - -

Normally db_path is a Path object.

- -

There are some cases, such as when connecting to MotherDuck, where it could be a string that -is not also a path, such as "md:" to connect the user's default MotherDuck DB.

-
- - -
-
-
- schema_name: str - - -
- - -

The name of the schema to write to. Defaults to "main".

-
- - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - PostgresCache(airbyte.caches.SQLCacheBase): - - -
- - -

A Postgres implementation of the cache.

- -

Parquet is used for local file storage before bulk loading. -Unlike the Snowflake implementation, we can't use the COPY command to load data -so we insert as values instead.

- -

TOOD: Add optimized bulk load path for Postgres. Could use an alternate file writer -or another import method. (Relatively low priority, since for now it works fine as-is.)

-
- - -
-
- config_class = -<class 'PostgresCacheConfig'> - - -
- - - - -
-
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
- supports_merge_insert = -False - - -
- - - - -
-
-
-
@overrides
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - PostgresCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the Postgres cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- host: str - - -
- - - - -
-
-
- port: int - - -
- - - - -
-
-
- username: str - - -
- - - - -
-
-
- password: str - - -
- - - - -
-
-
- database: str - - -
- - - - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
schema_name
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - SQLCacheBase(airbyte._processors.RecordProcessor): - - -
- - -

A base class to be used for SQL Caches.

- -

Optionally we can use a file cache to store the data in parquet files.

-
- - -
-
- type_converter_class: type[airbyte.types.SQLTypeConverter] = -<class 'airbyte.types.SQLTypeConverter'> - - -
- - - - -
-
-
- config_class: type[airbyte.caches.base.SQLCacheConfigBase] - - -
- - - - -
-
-
- file_writer_class: type[airbyte._file_writers.base.FileWriterBase] - - -
- - - - -
-
-
- supports_merge_insert = -False - - -
- - - - -
-
-
- use_singleton_connection = -False - - -
- - - - -
-
-
- config: airbyte.caches.base.SQLCacheConfigBase - - -
- - - - -
-
-
- file_writer - - -
- - - - -
-
-
- type_converter - - -
- - - - -
-
-
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- database_name: str - - -
- - -

Return the name of the database.

-
- - -
-
-
-
@final
- - def - get_sql_engine(self) -> sqlalchemy.engine.base.Engine: - - -
- - -

Return a new SQL engine to use.

-
- - -
-
-
-
@contextmanager
- - def - get_sql_connection( self) -> collections.abc.Generator[sqlalchemy.engine.base.Connection, None, None]: - - -
- - -

A context manager which returns a new SQL connection for running queries.

- -

If the connection needs to close, it will be closed automatically.

-
- - -
-
-
- - def - get_sql_table_name(self, stream_name: str) -> str: - - -
- - -

Return the name of the SQL table for the given stream.

-
- - -
-
-
-
@final
- - def - get_sql_table(self, stream_name: str) -> sqlalchemy.sql.schema.Table: - - -
- - -

Return the main table object for the stream.

-
- - -
-
-
- streams: dict[str, airbyte.datasets._sql.CachedDataset] - - -
- - -

Return a temporary table name.

-
- - -
-
-
- - def - get_records(self, stream_name: str) -> airbyte.datasets._sql.CachedDataset: - - -
- - -

Uses SQLAlchemy to select all rows from the table.

-
- - -
-
-
- - def - get_pandas_dataframe(self, stream_name: str) -> pandas.core.frame.DataFrame: - - -
- - -

Return a Pandas data frame with the stream's data.

-
- - -
-
-
- - def - get_state(self) -> list[dict]: - - -
- - -

Return the current state of the source.

-
- - -
-
-
-
@overrides
- - def - register_source( self, source_name: str, incoming_source_catalog: airbyte_protocol.models.airbyte_protocol.ConfiguredAirbyteCatalog, stream_names: set[str]) -> None: - - -
- - -

Register the source with the cache.

- -

We use stream_names to determine which streams will receive data, and -we only register the stream if is expected to receive data.

- -

This method is called by the source when it is initialized.

-
- - -
-
-
-
@abc.abstractmethod
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
-
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
-
- - class - SnowflakeCacheConfig(airbyte.caches.base.SQLCacheConfigBase, airbyte._file_writers.parquet.ParquetWriterConfig): - - -
- - -

Configuration for the Snowflake cache.

- -

Also inherits config from the ParquetWriter, which is responsible for writing files to disk.

-
- - -
-
- account: str - - -
- - - - -
-
-
- username: str - - -
- - - - -
-
-
- password: str - - -
- - - - -
-
-
- warehouse: str - - -
- - - - -
-
-
- database: str - - -
- - - - -
-
-
- role: str - - -
- - - - -
-
-
- dedupe_mode - - -
- - - - -
-
-
-
@overrides
- - def - get_sql_alchemy_url(self) -> str: - - -
- - -

Return the SQLAlchemy URL to use.

-
- - -
-
-
- - def - get_database_name(self) -> str: - - -
- - -

Return the name of the database.

-
- - -
-
-
Inherited Members
-
-
pydantic.main.BaseModel
-
BaseModel
-
Config
-
dict
-
json
-
parse_obj
-
parse_raw
-
parse_file
-
from_orm
-
construct
-
copy
-
schema
-
schema_json
-
validate
-
update_forward_refs
- -
-
airbyte.caches.base.SQLCacheConfigBase
-
schema_name
-
table_prefix
-
table_suffix
- -
-
airbyte._file_writers.base.FileWriterConfigBase
-
cache_dir
-
cleanup
- -
-
-
-
-
-
- - class - SnowflakeSQLCache(airbyte.caches.SQLCacheBase): - - -
- - -

A Snowflake implementation of the cache.

- -

Parquet is used for local file storage before bulk loading.

-
- - -
-
- config_class = -<class 'SnowflakeCacheConfig'> - - -
- - - - -
-
-
- file_writer_class = -<class 'airbyte._file_writers.parquet.ParquetWriter'> - - -
- - - - -
-
-
- type_converter_class = -<class 'airbyte.caches.snowflake.SnowflakeTypeConverter'> - - -
- - - - -
-
-
-
@overrides
- - def - get_telemetry_info(self) -> airbyte.telemetry.CacheTelemetryInfo: - - -
- - - - -
-
-
Inherited Members
-
- -
airbyte._processors.RecordProcessor
-
skip_finalize_step
-
source_catalog
-
process_stdin
-
process_input_stream
-
process_airbyte_messages
- -
-
-
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/airbyte/datasets.html b/docs/generated/airbyte/datasets.html deleted file mode 100644 index 4b3fb249..00000000 --- a/docs/generated/airbyte/datasets.html +++ /dev/null @@ -1,258 +0,0 @@ - -
-
-
- - class - CachedDataset(airbyte.datasets.SQLDataset): - - -
- - -

A dataset backed by a SQL table cache.

- -

Because this dataset includes all records from the underlying table, we also expose the -underlying table as a SQLAlchemy Table object.

-
- - -
-
- - CachedDataset(cache: 'SQLCacheBase', stream_name: str) - - -
- - - - -
-
-
-
@overrides
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - to_sql_table(self) -> 'Table': - - -
- - - - -
-
-
Inherited Members
-
- -
-
-
-
-
- - class - DatasetBase(abc.ABC): - - -
- - -

Base implementation for all datasets.

-
- - -
-
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
-
- - class - DatasetMap(collections.abc.Mapping): - - -
- - -

A generic interface for a set of streams or datasets.

-
- - -
-
Inherited Members
-
-
collections.abc.Mapping
-
get
-
keys
-
items
-
values
- -
-
-
-
-
-
- - class - LazyDataset(airbyte.datasets.DatasetBase): - - -
- - -

A dataset that is loaded incrementally from a source or a SQL query.

-
- - -
-
- - LazyDataset( iterator: collections.abc.Iterator[collections.abc.Mapping[str, typing.Any]]) - - -
- - - - -
-
-
Inherited Members
-
- -
-
-
-
-
- - class - SQLDataset(airbyte.datasets.DatasetBase): - - -
- - -

A dataset that is loaded incrementally from a SQL query.

- -

The CachedDataset class is a subclass of this class, which simply passes a SELECT over the full -table as the query statement.

-
- - -
-
- - SQLDataset( cache: 'SQLCacheBase', stream_name: str, query_statement: 'Selectable') - - -
- - - - -
-
-
- stream_name: str - - -
- - - - -
-
-
- - def - to_pandas(self) -> pandas.core.frame.DataFrame: - - -
- - -

Return a pandas DataFrame representation of the dataset.

- -

The base implementation simply passes the record iterator to Panda's DataFrame constructor.

-
- - -
-
-
- - def - with_filter( self, *filter_expressions: 'ClauseElement | str') -> SQLDataset: - - -
- - -

Filter the dataset by a set of column values.

- -

Filters can be specified as either a string or a SQLAlchemy expression.

- -

Filters are lazily applied to the dataset, so they can be chained together. For example:

- -
    dataset.with_filter("id > 5").with_filter("id < 10")
-
- -

is equivalent to:

- -
    dataset.with_filter("id > 5", "id < 10")
-
-
- - -
-
-
- - - - \ No newline at end of file diff --git a/docs/generated/index.html b/docs/generated/index.html deleted file mode 100644 index 6893f472..00000000 --- a/docs/generated/index.html +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/examples/run_faker.py b/examples/run_faker.py index 0a3a5026..690b2051 100644 --- a/examples/run_faker.py +++ b/examples/run_faker.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the Faker source connector. +"""A simple test of PyAirbyte, using the Faker source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_faker.py diff --git a/examples/run_github.py b/examples/run_github.py index 352426aa..2df891f4 100644 --- a/examples/run_github.py +++ b/examples/run_github.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the Faker source connector. +"""A simple test of PyAirbyte, using the Faker source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_github.py diff --git a/examples/run_pokeapi.py b/examples/run_pokeapi.py index 4aee87c9..c1fb6ff6 100644 --- a/examples/run_pokeapi.py +++ b/examples/run_pokeapi.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""A simple test of AirbyteLib, using the PokeAPI source connector. +"""A simple test of PyAirbyte, using the PokeAPI source connector. Usage (from PyAirbyte root directory): > poetry run python ./examples/run_pokeapi.py diff --git a/poetry.lock b/poetry.lock index 9819e186..c0427111 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1136,17 +1136,6 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] -[[package]] -name = "objprint" -version = "0.2.3" -description = "A library that can print Python objects in human readable format" -optional = false -python-versions = ">=3.6" -files = [ - {file = "objprint-0.2.3-py3-none-any.whl", hash = "sha256:1721e6f97bae5c5b86c2716a0d45a9dd2c9a4cd9f52cfc8a0dfbe801805554cb"}, - {file = "objprint-0.2.3.tar.gz", hash = "sha256:73d0ad5a7c3151fce634c8892e5c2a050ccae3b1a353bf1316f08b7854da863b"}, -] - [[package]] name = "orjson" version = "3.9.14" @@ -2528,55 +2517,6 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "viztracer" -version = "0.16.2" -description = "A debugging and profiling tool that can trace and visualize python code execution" -optional = false -python-versions = ">=3.8" -files = [ - {file = "viztracer-0.16.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:bdc62e90a2957e4119632e98f8b77d0ff1ab4db7029dd2e265bb3748e0fc0e05"}, - {file = "viztracer-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:789ac930e1c9621f04d275ee3ebb75a5d6109bcd4634796a77934608c60424d0"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee504771e3182045996a966d94d95d71693e59717b2643199162ec754a6e2400"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef9ecf4110d379245f17429d2a10391f3612f60b5618d0d61a30c110e9df2313"}, - {file = "viztracer-0.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57c2574cc15b688eb0ce4e24a2c30f06c1df3bbe1dd16a1d18676e411e785f96"}, - {file = "viztracer-0.16.2-cp310-cp310-win32.whl", hash = "sha256:9fe652834f5073bf99debc25d8ba6084690fa2f26420621ca38a09efcae71b2f"}, - {file = "viztracer-0.16.2-cp310-cp310-win_amd64.whl", hash = "sha256:d59f57e3e46e116ce77e144f419739d1d8d976a903c51a822ba4ef167e5b37d4"}, - {file = "viztracer-0.16.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:b0bd434c43b7f87f76ddd21cf7371d910edb74b131aaff670a8fcc9f28251e67"}, - {file = "viztracer-0.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1bbbb9c80b08db692993c67e7b10d7b06db3eedc6c38f0d93a40ea31de82076e"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e7842e437d81fb47ef8266b2dde76bf755c95305014eeec8346b2fce9711c0"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bddfe6a6f2a66f363fcca79a694986b0602ba0dc3dede57dc182cdd6d0823585"}, - {file = "viztracer-0.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4a2639e6f18200b73a70f3e7dca4cbb3ba08e3807023fd526f44ebf2185d1e"}, - {file = "viztracer-0.16.2-cp311-cp311-win32.whl", hash = "sha256:371496734ebb3eafd6a6e033dbf04960618089e021dc7eded95179a8f3700c40"}, - {file = "viztracer-0.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:d9c7670e7fb077fe48c92036766a6772e10a3caf41455d6244b8b1c8d48bbd87"}, - {file = "viztracer-0.16.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2fd8b5aa8143b5be4d696e53e8ac5027c20187c178396839f39f8aa610d5873d"}, - {file = "viztracer-0.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3a8ddc4990154f2d400b09deefc9236d963a733d458b2825bd590ced7e7bf89"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcf8b14dc8dd1567bca3f8cb13e31665a3cbf2ee95552de0afe9179e3a7bde22"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:309cf5d545222adb2581ae6aeb48d3d03d7241d335142408d87c49f1d0793f85"}, - {file = "viztracer-0.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee749a2a3f4ed662d35eb9378ff0648907aa6321befa16ad1d8bec6034b4d260"}, - {file = "viztracer-0.16.2-cp312-cp312-win32.whl", hash = "sha256:a082dab37b6b8cea43438b80a11a6e859f1b45522b8684a2fb9af03539d83803"}, - {file = "viztracer-0.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:03cd21181fe9a630ac5fb9ff1ee83fb7a67814e51e130f0ed83300e163fbac23"}, - {file = "viztracer-0.16.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:e920d383abae1b9314f2a60dd94e04c83998bfe759556af49d3c422d1d64d11e"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb9941b198fed8ba5b3f9d8105e59d37ab15f7f00b9a576686b1073990806d12"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1b7030aa6f934ff02882dfd48eca5a9442951b8be24c1dc5dc99fabbfb1997c"}, - {file = "viztracer-0.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:258087076c06d065d2786dc8a0f1f017d655d3753a8fe6836640c005c66a0c43"}, - {file = "viztracer-0.16.2-cp38-cp38-win32.whl", hash = "sha256:f0fd53e2fec972f9332677e6d11332ba789fcccf59060d7b9f309041602dc712"}, - {file = "viztracer-0.16.2-cp38-cp38-win_amd64.whl", hash = "sha256:ab067398029a50cc784d5456c5e8bef339b4bffaa1c3f0f9384a26b57c0efdaa"}, - {file = "viztracer-0.16.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:45879cf54ad9116245e2a6115660307f98ae3aa98a77347f2b336a904f260370"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abc61cfc36b33a301b950554d9e9027a506d580ebf1e764aa6656af0acfa3354"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:419f738bba8204e7ddb422faff3a40576896d030bbbf4fb79ace006147ca60e7"}, - {file = "viztracer-0.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c594022093bf9eee57ad2b9656f836dca2ed9c0b8e4d94a9d13a6cbc531386fe"}, - {file = "viztracer-0.16.2-cp39-cp39-win32.whl", hash = "sha256:4f98da282e87013a93917c2ae080ba52845e98ed5280faecdc42ee0c7fb74a4a"}, - {file = "viztracer-0.16.2-cp39-cp39-win_amd64.whl", hash = "sha256:64b97120374a572d2320fb795473c051c92d39dfc99fb74754e61e4c212e7617"}, - {file = "viztracer-0.16.2.tar.gz", hash = "sha256:8dff5637a7b42ffdbc1ed3768ce43979e71b09893ff370bc3c3ede54afed93ee"}, -] - -[package.dependencies] -objprint = ">0.1.3" - -[package.extras] -full = ["orjson"] - [[package]] name = "wcmatch" version = "8.4" @@ -2673,4 +2613,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "a61f9755ed5b078d77d06d6930ddbc394a88e480e41ad872547fb542aeb7ec0c" +content-hash = "4f25af1faecf0bdc79f799f2d452f0005c06c92cc6fc063f0760444eb910774d" diff --git a/pyproject.toml b/pyproject.toml index f8defa35..ac166d4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,6 @@ google-cloud-secret-manager = "^2.17.0" types-requests = "2.31.0.4" freezegun = "^1.4.0" airbyte-source-faker = "^6.0.0" -viztracer = "^0.16.2" tomli = "^2.0" [build-system] @@ -247,7 +246,7 @@ module = [ ignore_missing_imports = true # No stubs yet (๐Ÿ˜ข) [tool.poetry.scripts] -generate-docs = "docs:run" +generate-docs = "docs.generate:run" airbyte-lib-validate-source = "airbyte.validate:run" [tool.poe.tasks] diff --git a/tests/docs_tests/test_docs_checked_in.py b/tests/docs_tests/test_docs_checked_in.py index 54614c7c..86ecfe7c 100644 --- a/tests/docs_tests/test_docs_checked_in.py +++ b/tests/docs_tests/test_docs_checked_in.py @@ -2,7 +2,7 @@ import os -import docs +import docs.generate as generate def test_docs_checked_in(): @@ -13,7 +13,7 @@ def test_docs_checked_in(): It will fail if there are any differences. """ - docs.run() + generate.run() # compare the generated docs with the checked in docs diff = os.system("git diff --exit-code docs/generated") diff --git a/tests/integration_tests/test_snowflake_cache.py b/tests/integration_tests/test_snowflake_cache.py index c76926f6..e92b89b3 100644 --- a/tests/integration_tests/test_snowflake_cache.py +++ b/tests/integration_tests/test_snowflake_cache.py @@ -9,14 +9,8 @@ from collections.abc import Generator import os import sys -import shutil -from pathlib import Path import pytest -import ulid -import viztracer - -from airbyte_cdk.models import ConfiguredAirbyteCatalog import airbyte as ab from airbyte import caches diff --git a/tests/integration_tests/test_source_faker_integration.py b/tests/integration_tests/test_source_faker_integration.py index 95dabad6..ab9f5239 100644 --- a/tests/integration_tests/test_source_faker_integration.py +++ b/tests/integration_tests/test_source_faker_integration.py @@ -11,12 +11,11 @@ import sys import shutil from pathlib import Path +import typing import pytest import ulid -import viztracer -from airbyte_cdk.models import ConfiguredAirbyteCatalog import airbyte as ab from airbyte import caches @@ -131,7 +130,7 @@ def test_faker_pks( ) -> None: """Test that the append strategy works as expected.""" - catalog: ConfiguredAirbyteCatalog = source_faker_seed_a.configured_catalog + catalog = source_faker_seed_a.configured_catalog assert catalog.streams[0].primary_key assert catalog.streams[1].primary_key From a2de7cc3da058c0ab68b4c0ddb3a5af003d7c2ba Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 16 Feb 2024 20:39:58 -0800 Subject: [PATCH 3/6] CI: Add auto-release draft and semantic PR titles check (#45) --- .github/release.drafter.yml | 41 ++++++++++++++++++++++ .github/workflows/release_drafter.yml | 24 +++++++++++++ .github/workflows/semantic_pr_check.yml | 45 +++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 .github/release.drafter.yml create mode 100644 .github/workflows/release_drafter.yml create mode 100644 .github/workflows/semantic_pr_check.yml diff --git a/.github/release.drafter.yml b/.github/release.drafter.yml new file mode 100644 index 00000000..c8e68fc4 --- /dev/null +++ b/.github/release.drafter.yml @@ -0,0 +1,41 @@ +name-template: 'v$RESOLVED_VERSION' +tag-template: 'v$RESOLVED_VERSION' +categories: + - title: '๐Ÿš€ Features' + labels: + - 'feature' + - 'enhancement' + - title: '๐Ÿ› Bug Fixes' + labels: + - 'fix' + - 'bugfix' + - 'bug' + - title: '๐Ÿงฐ Maintenance' + label: 'chore' +change-template: '- $TITLE @$AUTHOR (#$NUMBER)' +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +version-resolver: + major: + labels: + - 'major' + minor: + labels: + - 'minor' + patch: + labels: + - 'patch' + default: patch +template: | + ## Changes + + $CHANGES +autolabeler: + - label: 'chore' + title: + - '/chore\:/i' + - label: 'bug' + title: + - '/fix\:/i' + - label: 'enhancement' + title: + - '/feature/i' diff --git a/.github/workflows/release_drafter.yml b/.github/workflows/release_drafter.yml new file mode 100644 index 00000000..feadbd4a --- /dev/null +++ b/.github/workflows/release_drafter.yml @@ -0,0 +1,24 @@ +name: Release Drafter + +on: + push: + branches: + - main + +permissions: + contents: read + +jobs: + update_release_draft: + permissions: + contents: write + pull-requests: read + runs-on: ubuntu-latest + steps: + # Drafts the next Release notes as Pull Requests are merged into "main" + - uses: release-drafter/release-drafter@v5 + with: + config-name: release-drafter.yml + disable-autolabeler: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/semantic_pr_check.yml b/.github/workflows/semantic_pr_check.yml new file mode 100644 index 00000000..161973b8 --- /dev/null +++ b/.github/workflows/semantic_pr_check.yml @@ -0,0 +1,45 @@ +name: "Verify Semantic PR Title" + +on: + pull_request_target: + types: + - opened + - edited + - synchronize + +permissions: + pull-requests: read + +jobs: + validate_pr_title: + name: Validate PR title + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + # Configure which types are allowed (newline-delimited). + # See: https://github.com/commitizen/conventional-commit-types/blob/master/index.json + types: | + fix + feat + docs + ci + chore + build + test + + # # We don't use scopes as of now + # scopes: | + # core + # ui + # JIRA-\d+ + + # Require capitalization for the first letter of the subject. + subjectPattern: ^[A-Z].*$ + # The variables `subject` and `title` can be used within the message. + subjectPatternError: | + The subject "{subject}" found in the pull request title "{title}" + didn't match the configured pattern. Please ensure that the subject + start with an uppercase character. From 001a3b4aa58f5a4d347f7b72d804fb2fd0b9378e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 20:42:46 -0800 Subject: [PATCH 4/6] ci: fix casing of semantic pr types --- .github/workflows/semantic_pr_check.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/semantic_pr_check.yml b/.github/workflows/semantic_pr_check.yml index 161973b8..7e311577 100644 --- a/.github/workflows/semantic_pr_check.yml +++ b/.github/workflows/semantic_pr_check.yml @@ -22,13 +22,13 @@ jobs: # Configure which types are allowed (newline-delimited). # See: https://github.com/commitizen/conventional-commit-types/blob/master/index.json types: | - fix - feat - docs - ci - chore - build - test + Fix + Feat + Docs + CI + Chore + Build + Test # # We don't use scopes as of now # scopes: | From 529ed70a51226e29be2014799f25c07392c343d8 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 16 Feb 2024 20:45:05 -0800 Subject: [PATCH 5/6] ci: fix release drafter config filename --- .github/{release.drafter.yml => release-drafter.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{release.drafter.yml => release-drafter.yml} (100%) diff --git a/.github/release.drafter.yml b/.github/release-drafter.yml similarity index 100% rename from .github/release.drafter.yml rename to .github/release-drafter.yml From 65b86dd9c7e5a9da3da4c8ba5dfd297635fabba2 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 16 Feb 2024 20:46:51 -0800 Subject: [PATCH 6/6] CI: Add PyPi publish workflow (#44) --- .github/workflows/pypi_publish.yml | 43 +++++++++++++++++++++ CONTRIBUTING.md | 15 +++++-- README.md | 2 +- pyproject.toml | 12 ++++-- tests/docs_tests/test_validate_changelog.py | 23 ----------- 5 files changed, 64 insertions(+), 31 deletions(-) create mode 100644 .github/workflows/pypi_publish.yml delete mode 100644 tests/docs_tests/test_validate_changelog.py diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml new file mode 100644 index 00000000..a7734361 --- /dev/null +++ b/.github/workflows/pypi_publish.yml @@ -0,0 +1,43 @@ +name: Publish to PyPi + +on: + push: + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: hynek/build-and-inspect-python-package@v2 + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [build] + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + contents: write # Needed to upload artifacts to the release + environment: + name: PyPi + url: https://pypi.org/p/airbyte + if: startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/download-artifact@v4 + with: + name: Packages + path: dist + - name: Upload wheel to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: dist/*.whl + tag: ${{ github.ref }} + overwrite: true + file_glob: true + + - name: Publish + uses: pypa/gh-action-pypi-publish@v1.8.11 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e2894d1d..a85e8c56 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,10 +25,17 @@ Documentation pages will be generated in the `docs/generated` folder. The `test_ ## Release -- In your PR: - - Bump the version in `pyproject.toml` - - Add a changelog entry to the table below -- Once the PR is merged, go to Github and trigger the `Publish PyAirbyte Manually` workflow. This will publish the new version to PyPI. +Releases are published automatically to PyPi in response to a "published" event on a GitHub Release Tag. + +To publish to PyPi, simply [create a GitHub Release](https://github.com/airbytehq/PyAirbyte/releases/new) with the correct version. Once you publish the release on GitHub it will automatically trigger a PyPi publish workflow in GitHub actions. + +> **Warning** +> +> Be careful - "Cmd+Enter" will not 'save' but will instead 'publish'. (If you want to save a draft, use the mouse. ๐Ÿ˜…) + +> **Note** +> +> There is no version to bump. Version is calculated during build and publish, using the [poetry-dynamic-versioning](https://github.com/mtkennerly/poetry-dynamic-versioning) plugin. ## Versioning diff --git a/README.md b/README.md index bf3a0ec4..2b1b27a4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # PyAirbyte -PyAirbyte is a library that allows to run Airbyte syncs embedded into any Python application, without requiring connectivity to a hosted Airbyte instance. +PyAirbyte brings the power of Airbyte to every Python developer. ## Secrets Management diff --git a/pyproject.toml b/pyproject.toml index ac166d4c..4781bc4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,17 @@ [tool.poetry] name = "airbyte" description = "PyAirbyte" -version = "0.1.0" authors = ["Airbyte "] readme = "README.md" packages = [{include = "airbyte"}] +# This project uses dynamic versioning +# https://github.com/mtkennerly/poetry-dynamic-versioning +version = "0.0.0" + +[tool.poetry-dynamic-versioning] +enable = true + [tool.poetry.dependencies] python = "^3.9" @@ -51,8 +57,8 @@ airbyte-source-faker = "^6.0.0" tomli = "^2.0" [build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] +build-backend = "poetry_dynamic_versioning.backend" [tool.pytest.ini_options] markers = [ diff --git a/tests/docs_tests/test_validate_changelog.py b/tests/docs_tests/test_validate_changelog.py deleted file mode 100644 index 7481d014..00000000 --- a/tests/docs_tests/test_validate_changelog.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. - -import tomli - - -def test_validate_changelog(): - """ - Publishing a version involves bumping the version in pyproject.toml and adding a changelog entry. - This test ensures that the changelog entry is present. - """ - - # get the version from pyproject.toml - with open("pyproject.toml") as f: - contents = tomli.loads(f.read()) - version = contents["tool"]["poetry"]["version"] - - # get the changelog - with open("README.md") as f: - readme = f.read() - changelog = readme.split("## Changelog")[-1] - - # check that the changelog contains the version - assert version in changelog, f"Version {version} is missing from the changelog in README.md. Please add it."