From 57e26f6cf19100250083c27c5c98fbe7dbd8926d Mon Sep 17 00:00:00 2001 From: siddhant Date: Thu, 16 Feb 2023 10:18:56 +0530 Subject: [PATCH 01/50] kobo toolbox --- .../source-kobotoolbox/.dockerignore | 6 ++ .../connectors/source-kobotoolbox/Dockerfile | 38 +++++++++ .../connectors/source-kobotoolbox/README.md | 82 +++++++++++++++++++ .../connectors/source-kobotoolbox/__init__.py | 3 + .../acceptance-test-config.yml | 39 +++++++++ .../acceptance-test-docker.sh | 16 ++++ .../source-kobotoolbox/build.gradle | 9 ++ .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 ++ .../integration_tests/acceptance.py | 16 ++++ .../integration_tests/configured_catalog.json | 13 +++ .../integration_tests/invalid_config.json | 3 + .../integration_tests/sample_config.json | 3 + .../integration_tests/sample_state.json | 5 ++ .../connectors/source-kobotoolbox/main.py | 13 +++ .../source-kobotoolbox/requirements.txt | 2 + .../connectors/source-kobotoolbox/setup.py | 29 +++++++ .../source_kobotoolbox/__init__.py | 8 ++ .../source_kobotoolbox/manifest.yaml | 69 ++++++++++++++++ .../source_kobotoolbox/schemas/TODO.md | 16 ++++ .../source_kobotoolbox/schemas/customers.json | 16 ++++ .../source_kobotoolbox/schemas/employees.json | 19 +++++ .../source_kobotoolbox/source.py | 18 ++++ 23 files changed, 431 insertions(+) create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/.dockerignore create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/Dockerfile create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/README.md create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/__init__.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml create mode 100755 airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/build.gradle create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/main.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/requirements.txt create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/setup.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/__init__.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py diff --git a/airbyte-integrations/connectors/source-kobotoolbox/.dockerignore b/airbyte-integrations/connectors/source-kobotoolbox/.dockerignore new file mode 100644 index 000000000000..39a284db1c82 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_kobotoolbox +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile new file mode 100644 index 000000000000..99df17a18777 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_kobotoolbox ./source_kobotoolbox + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-kobotoolbox diff --git a/airbyte-integrations/connectors/source-kobotoolbox/README.md b/airbyte-integrations/connectors/source-kobotoolbox/README.md new file mode 100644 index 000000000000..a237dd556090 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/README.md @@ -0,0 +1,82 @@ +# Kobotoolbox Source + +This is the repository for the Kobotoolbox configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/kobotoolbox). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-kobotoolbox:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/kobotoolbox) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_kobotoolbox/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source kobotoolbox test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-kobotoolbox:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-kobotoolbox:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-kobotoolbox:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-kobotoolbox:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-kobotoolbox:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-kobotoolbox:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with Docker, run: +``` +./acceptance-test-docker.sh +``` + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-kobotoolbox:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-kobotoolbox:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-kobotoolbox/__init__.py b/airbyte-integrations/connectors/source-kobotoolbox/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml new file mode 100644 index 000000000000..436a654d36aa --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml @@ -0,0 +1,39 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-kobotoolbox:dev +acceptance_tests: + spec: + tests: + - spec_path: "source_kobotoolbox/spec.yaml" + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] +# TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file +# expect_records: +# path: "integration_tests/expected_records.jsonl" +# extra_fields: no +# exact_order: no +# extra_records: yes + incremental: + bypass_reason: "This connector does not implement incremental sync" +# TODO uncomment this block this block if your connector implements incremental sync: +# tests: +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog.json" +# future_state: +# future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-docker.sh new file mode 100755 index 000000000000..a8d6ac4bb608 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/connector-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/connector-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-kobotoolbox/build.gradle b/airbyte-integrations/connectors/source-kobotoolbox/build.gradle new file mode 100644 index 000000000000..61c38b469487 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-connector-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_kobotoolbox' +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/__init__.py b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..52b0f2c2118f --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py new file mode 100644 index 000000000000..9e6409236281 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..c65f3845a757 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -0,0 +1,13 @@ +{ + "streams": [ + { + "stream": { + "name": "forms", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json new file mode 100644 index 000000000000..f3732995784f --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "todo-wrong-field": "this should be an incomplete config file, used in standard tests" +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json new file mode 100644 index 000000000000..ecc4913b84c7 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "fix-me": "TODO" +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json new file mode 100644 index 000000000000..3587e579822d --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/main.py b/airbyte-integrations/connectors/source-kobotoolbox/main.py new file mode 100644 index 000000000000..a8d216dc7d6d --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_kobotoolbox import SourceKobotoolbox + +if __name__ == "__main__": + source = SourceKobotoolbox() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt b/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt new file mode 100644 index 000000000000..cc57334ef619 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/connector-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-kobotoolbox/setup.py b/airbyte-integrations/connectors/source-kobotoolbox/setup.py new file mode 100644 index 000000000000..ccbdbde4bbc2 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_kobotoolbox", + description="Source implementation for Kobotoolbox.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/__init__.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/__init__.py new file mode 100644 index 000000000000..e689e3311aa1 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceKobotoolbox + +__all__ = ["SourceKobotoolbox"] diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml new file mode 100644 index 000000000000..48c96a7e2f4e --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml @@ -0,0 +1,69 @@ +version: "0.1.0" + +definitions: + selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_pointer: [] + requester: + type: HttpRequester + url_base: "https://kf.kobotoolbox.org/api/v2/assets" + http_method: "GET" + authenticator: + type: BasicHttpAuthenticator + username: "{{ config['username'] }}" + password: "{{ config['password'] }}" + retriever: + type: SimpleRetriever + record_selector: + $ref: "*ref(definitions.selector)" + paginator: + type: NoPagination + requester: + $ref: "*ref(definitions.requester)" + base_stream: + type: DeclarativeStream + retriever: + $ref: "*ref(definitions.retriever)" + forms_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "forms" + primary_key: "_id" + path: "/{{ config['uid'] }}/data.json" + +streams: + - "*ref(definitions.forms_stream)" + +check: + stream_names: + - "forms" + +spec: + documentation_url: https://docs.airbyte.com/integrations/sources/kobotoolbox + connection_specification: + title: Kobotoolbox Spec + type: object + required: + - username + - password + - uid + additionalProperties: true + properties: + username: + type: string + title: Username + description: Username to authenticate into the KoboToolbox server + order: 1 + password: + type: string + title: Password + description: Password to authenticate into the KoboToolbox server + airbyte_secret: true + order: 2 + uid: + type: string + title: UID + description: UID to pull data from + order: 3 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md new file mode 100644 index 000000000000..ddd6e61aa500 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md @@ -0,0 +1,16 @@ +# TODO: Define your stream schemas +Your connector must describe the schema of each stream it can output using [JSONSchema](https://json-schema.org). + +You can describe the schema of your streams using one `.json` file per stream. + +## Static schemas +From the `kobotoolbox.yaml` configuration file, you read the `.json` files in the `schemas/` directory. You can refer to a schema in your configuration file using the `schema_loader` component's `file_path` field. For example: +``` +schema_loader: + type: JsonSchema + file_path: "./source_kobotoolbox/schemas/customers.json" +``` +Every stream specified in the configuration file should have a corresponding `.json` schema file. + +Delete this file once you're done. Or don't. Up to you :) + diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json new file mode 100644 index 000000000000..9a4b13485836 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "signup_date": { + "type": ["null", "string"], + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json new file mode 100644 index 000000000000..2fa01a0fa1ff --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json @@ -0,0 +1,19 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "years_of_service": { + "type": ["null", "integer"] + }, + "start_date": { + "type": ["null", "string"], + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py new file mode 100644 index 000000000000..a1a920c8fb2b --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourceKobotoolbox(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) From 03d7d0a29e07f3947537008ab340c5ae93ba25c4 Mon Sep 17 00:00:00 2001 From: siddhant Date: Thu, 23 Feb 2023 15:55:47 +0530 Subject: [PATCH 02/50] connector working here --- .../connectors/source-kobotoolbox/README.md | 63 ++++++++++-- .../source-kobotoolbox/build.gradle | 2 +- .../integration_tests/acceptance.py | 2 +- .../integration_tests/configured_catalog.json | 6 +- .../source-kobotoolbox/requirements.txt | 1 + .../sample_files/configured_catalog.json | 19 ++++ .../connectors/source-kobotoolbox/setup.py | 5 +- .../source_kobotoolbox/manifest.yaml | 69 ------------- .../source_kobotoolbox/schemas/TODO.md | 16 ---- .../source_kobotoolbox/schemas/customers.json | 16 ---- .../source_kobotoolbox/schemas/employees.json | 19 ---- .../schemas/kobo_tool_stream.json | 96 +++++++++++++++++++ .../source_kobotoolbox/source.py | 93 ++++++++++++++++-- .../source_kobotoolbox/spec.yaml | 26 +++++ .../{__init__.py => unit_tests/unit_test.py} | 4 + 15 files changed, 292 insertions(+), 145 deletions(-) create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml rename airbyte-integrations/connectors/source-kobotoolbox/{__init__.py => unit_tests/unit_test.py} (57%) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/README.md b/airbyte-integrations/connectors/source-kobotoolbox/README.md index a237dd556090..60d9a6a32da3 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/README.md +++ b/airbyte-integrations/connectors/source-kobotoolbox/README.md @@ -1,14 +1,36 @@ # Kobotoolbox Source -This is the repository for the Kobotoolbox configuration based source connector. +This is the repository for the Kobotoolbox source connector, written in Python. For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/kobotoolbox). ## Local development -#### Building via Gradle -You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.9.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` -To build using Gradle, from the Airbyte repository root, run: +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +From the Airbyte repository root, run: ``` ./gradlew :airbyte-integrations:connectors:source-kobotoolbox:build ``` @@ -16,12 +38,20 @@ To build using Gradle, from the Airbyte repository root, run: #### Create credentials **If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/kobotoolbox) to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_kobotoolbox/spec.yaml` file. -Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. See `integration_tests/sample_config.json` for a sample config file. **If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source kobotoolbox test creds` and place them into `secrets/config.json`. +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + ### Locally running the connector docker image #### Build @@ -46,15 +76,32 @@ docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-kobotoolbox:dev discov docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-kobotoolbox:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json ``` ## Testing + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` #### Acceptance Tests Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. - -To run your integration tests with Docker, run: +To run your integration tests with acceptance tests, from the connector root, run ``` -./acceptance-test-docker.sh +python -m pytest integration_tests -p integration_tests.acceptance ``` +To run your integration tests with docker ### Using gradle to run tests All commands should be run from airbyte project root. diff --git a/airbyte-integrations/connectors/source-kobotoolbox/build.gradle b/airbyte-integrations/connectors/source-kobotoolbox/build.gradle index 61c38b469487..6145c717e597 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/build.gradle +++ b/airbyte-integrations/connectors/source-kobotoolbox/build.gradle @@ -5,5 +5,5 @@ plugins { } airbytePython { - moduleDirectory 'source_kobotoolbox' + moduleDirectory 'source_kobotoolbox_singer' } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py index 9e6409236281..43ce950d77ca 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/acceptance.py @@ -11,6 +11,6 @@ @pytest.fixture(scope="session", autouse=True) def connector_setup(): """This fixture is a placeholder for external resources that acceptance test might require.""" - # TODO: setup test dependencies if needed. otherwise remove the TODO comments + # TODO: setup test dependencies yield # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index c65f3845a757..b999c2ba3abf 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -2,9 +2,11 @@ "streams": [ { "stream": { - "name": "forms", + "name": "table_name", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "default_cursor_field": ["column_name"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt b/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt index cc57334ef619..9ce85523c234 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt +++ b/airbyte-integrations/connectors/source-kobotoolbox/requirements.txt @@ -1,2 +1,3 @@ +# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. -e ../../bases/connector-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json new file mode 100644 index 000000000000..6245aaabad89 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json @@ -0,0 +1,19 @@ +{ + "streams": [ + { + "stream": { + "name": "kobo_tool_stream", + "json_schema": {}, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_cursor": false, + "default_cursor_field": [ + "column_name" + ] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/setup.py b/airbyte-integrations/connectors/source-kobotoolbox/setup.py index ccbdbde4bbc2..4a20829198f7 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/setup.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/setup.py @@ -6,12 +6,11 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.1", + "airbyte-cdk~=0.2", ] TEST_REQUIREMENTS = [ "pytest~=6.2", - "pytest-mock~=3.6.1", "connector-acceptance-test", ] @@ -22,7 +21,7 @@ author_email="contact@airbyte.io", packages=find_packages(), install_requires=MAIN_REQUIREMENTS, - package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + package_data={"": ["*.json", "*.yaml"]}, extras_require={ "tests": TEST_REQUIREMENTS, }, diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml deleted file mode 100644 index 48c96a7e2f4e..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/manifest.yaml +++ /dev/null @@ -1,69 +0,0 @@ -version: "0.1.0" - -definitions: - selector: - type: RecordSelector - extractor: - type: DpathExtractor - field_pointer: [] - requester: - type: HttpRequester - url_base: "https://kf.kobotoolbox.org/api/v2/assets" - http_method: "GET" - authenticator: - type: BasicHttpAuthenticator - username: "{{ config['username'] }}" - password: "{{ config['password'] }}" - retriever: - type: SimpleRetriever - record_selector: - $ref: "*ref(definitions.selector)" - paginator: - type: NoPagination - requester: - $ref: "*ref(definitions.requester)" - base_stream: - type: DeclarativeStream - retriever: - $ref: "*ref(definitions.retriever)" - forms_stream: - $ref: "*ref(definitions.base_stream)" - $options: - name: "forms" - primary_key: "_id" - path: "/{{ config['uid'] }}/data.json" - -streams: - - "*ref(definitions.forms_stream)" - -check: - stream_names: - - "forms" - -spec: - documentation_url: https://docs.airbyte.com/integrations/sources/kobotoolbox - connection_specification: - title: Kobotoolbox Spec - type: object - required: - - username - - password - - uid - additionalProperties: true - properties: - username: - type: string - title: Username - description: Username to authenticate into the KoboToolbox server - order: 1 - password: - type: string - title: Password - description: Password to authenticate into the KoboToolbox server - airbyte_secret: true - order: 2 - uid: - type: string - title: UID - description: UID to pull data from - order: 3 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md deleted file mode 100644 index ddd6e61aa500..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/TODO.md +++ /dev/null @@ -1,16 +0,0 @@ -# TODO: Define your stream schemas -Your connector must describe the schema of each stream it can output using [JSONSchema](https://json-schema.org). - -You can describe the schema of your streams using one `.json` file per stream. - -## Static schemas -From the `kobotoolbox.yaml` configuration file, you read the `.json` files in the `schemas/` directory. You can refer to a schema in your configuration file using the `schema_loader` component's `file_path` field. For example: -``` -schema_loader: - type: JsonSchema - file_path: "./source_kobotoolbox/schemas/customers.json" -``` -Every stream specified in the configuration file should have a corresponding `.json` schema file. - -Delete this file once you're done. Or don't. Up to you :) - diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json deleted file mode 100644 index 9a4b13485836..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/customers.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "signup_date": { - "type": ["null", "string"], - "format": "date-time" - } - } -} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json deleted file mode 100644 index 2fa01a0fa1ff..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/employees.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "years_of_service": { - "type": ["null", "integer"] - }, - "start_date": { - "type": ["null", "string"], - "format": "date-time" - } - } -} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json new file mode 100644 index 000000000000..15559a39aacb --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "_id": { + "type": [ + "string", + "null" + ] + }, + "formhub/uuid": { + "type": [ + "string", + "null" + ] + }, + "starttime": { + "type": [ + "string", + "null" + ] + }, + "__version__": { + "type": [ + "string", + "null" + ] + }, + "meta/instanceID": { + "type": [ + "string", + "null" + ] + }, + "_xform_id_string": { + "type": [ + "string", + "null" + ] + }, + "_uuid": { + "type": [ + "string", + "null" + ] + }, + "_attachments": { + "type": [ + "array", + "null" + ] + }, + "_status": { + "type": [ + "string", + "null" + ] + }, + "_geolocation": { + "type": [ + "array", + "null" + ] + }, + "_tags": { + "type": [ + "array", + "null" + ] + }, + "_notes": { + "type": [ + "array", + "null" + ] + }, + "_validation_status": { + "type": [ + "string", + "null" + ] + }, + "_submission_time": { + "type": [ + "string", + "null" + ] + }, + "_submitted_by": { + "type": [ + "string", + "null" + ] + } + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index a1a920c8fb2b..20524f9bb8f9 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -2,17 +2,90 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource -""" -This file provides the necessary constructs to interpret a provided declarative YAML configuration file into -source connector. +import json +import requests +from abc import ABC +from datetime import datetime +from typing import Dict, Generator +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import IncrementalMixin, Stream +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +from airbyte_cdk.logger import AirbyteLogger +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources import Source -WARNING: Do not modify this file. -""" +class KoboToolStream(HttpStream): + primary_key = None -# Declarative Source -class SourceKobotoolbox(YamlDeclarativeSource): - def __init__(self): - super().__init__(**{"path_to_yaml": "manifest.yaml"}) + def __init__(self, config: Mapping[str, Any], **kwargs): + super().__init__() + self.uid = config["uid"] + token = self.get_access_token(config) + self.auth_token = token[0] + + @property + def url_base(self) -> str: + return f"https://kf.kobotoolbox.org/api/v2/assets/{self.uid}/" + + def get_access_token(self, config) -> Tuple[str, any]: + url = f"https://kf.kobotoolbox.org/token/?format=json" + + try: + response = requests.post(url, auth=(config["username"], config["password"])) + response.raise_for_status() + json_response = response.json() + return json_response.get("token", None), None if json_response is not None else None, None + except requests.exceptions.RequestException as e: + return None, e + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return {} + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + return "data.json" + + def request_headers( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> Mapping[str, Any]: + return {"Authorization": "Token " + self.auth_token} + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + result = json_response.get('results') + + for a in result: + yield a + +class SourceKobotoolbox(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + return True, None + + # def generate_streams(self, config: str) -> List[Stream]: + # access_token = self.get_access_token(config) + # print(access_token) + # auth = access_token[0] + # print(f'==================>{auth}') + # url = f"https://kf.kobotoolbox.org/api/v2/assets.json" + # response = requests.get(url, auth=(config["username"], config["password"])) + # json_response = response.json() + # key_list=json_response.get('results') + # key = "uid" + # forms = [a_dict[key] for a_dict in key_list] + # print(f'=======assss=======>>>>{forms}') + + # streams = [] + # for form_id in forms: + # stream = KoboToolStream(config=config, form_id=form_id, auth=auth) + # stream.append(stream) + # return streams + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [KoboToolStream(config=config)] \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml new file mode 100644 index 000000000000..acd2f35bddd2 --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -0,0 +1,26 @@ +documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Kobotoolbox Spec + type: object + required: + - username + - password + - uid + properties: + username: + type: string + title: Username + description: Username to authenticate into the KoboToolBox server + order: 1 + password: + type: string + title: Password + description: Password to authenticate into the KoboToolBox server + airbyte_secret: true + order: 2 + uid: + type: string + title: "UID" + description: Passw + order: 3 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/__init__.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py similarity index 57% rename from airbyte-integrations/connectors/source-kobotoolbox/__init__.py rename to airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py index c941b3045795..219ae0142c72 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/__init__.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py @@ -1,3 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # + + +def test_example_method(): + assert True From 1c15b7b709cb3fa75975e75f6122d94518350b12 Mon Sep 17 00:00:00 2001 From: siddhant Date: Fri, 24 Feb 2023 09:35:40 +0530 Subject: [PATCH 03/50] normalisation is done here --- .../connectors/source-kobotoolbox/Dockerfile | 11 +- .../connectors/source-kobotoolbox/setup.py | 2 +- .../source_kobotoolbox/helpers.py | 114 ++++++++++++++++++ .../schemas/kobo_tool_stream.json | 96 --------------- .../source_kobotoolbox/source.py | 61 ++++++---- .../source_kobotoolbox/spec.yaml | 8 +- 6 files changed, 157 insertions(+), 135 deletions(-) create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json diff --git a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile index 99df17a18777..88aad6a5fb05 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile +++ b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile @@ -1,14 +1,14 @@ -FROM python:3.9.11-alpine3.15 as base +FROM python:3.10.8-slim-bullseye as base # build and load all requirements FROM base as builder WORKDIR /airbyte/integration_code # upgrade pip to the latest version -RUN apk --no-cache upgrade \ +RUN apt-get update && apt-get install -y && rm -rf /var/lib/apt/lists/* \ + # apk --no-cache upgrade \ && pip install --upgrade pip \ - && apk --no-cache add tzdata build-base - + && python3 -m pip install --upgrade setuptools COPY setup.py ./ # install necessary packages to a temporary folder @@ -24,9 +24,6 @@ COPY --from=builder /install /usr/local COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime RUN echo "Etc/UTC" > /etc/timezone -# bash is installed for more convenient debugging. -RUN apk --no-cache add bash - # copy payload code only COPY main.py ./ COPY source_kobotoolbox ./source_kobotoolbox diff --git a/airbyte-integrations/connectors/source-kobotoolbox/setup.py b/airbyte-integrations/connectors/source-kobotoolbox/setup.py index 4a20829198f7..3a92c39a7567 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/setup.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.2", + "airbyte-cdk~=0.2" ] TEST_REQUIREMENTS = [ diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py new file mode 100644 index 000000000000..c01db08b2c1e --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py @@ -0,0 +1,114 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import base64 +from datetime import datetime + +import requests +from bigquery_schema_generator.generate_schema import SchemaGenerator +from gbqschema_converter.gbqschema_to_jsonschema import json_representation as converter +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + + +class Helpers(object): + @staticmethod + def get_json_schema(): + json_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "_id": { + "type": [ + "string", + "null" + ] + }, + "formhub/uuid": { + "type": [ + "string", + "null" + ] + }, + "starttime": { + "type": [ + "string", + "null" + ] + }, + "__version__": { + "type": [ + "string", + "null" + ] + }, + "meta/instanceID": { + "type": [ + "string", + "null" + ] + }, + "_xform_id_string": { + "type": [ + "string", + "null" + ] + }, + "_uuid": { + "type": [ + "string", + "null" + ] + }, + "_attachments": { + "type": [ + "array", + "null" + ] + }, + "_status": { + "type": [ + "string", + "null" + ] + }, + "_geolocation": { + "type": [ + "array", + "null" + ] + }, + "_tags": { + "type": [ + "array", + "null" + ] + }, + "_notes": { + "type": [ + "array", + "null" + ] + }, + "_validation_status": { + "type": [ + "string", + "null" + ] + }, + "_submission_time": { + "type": [ + "string", + "null" + ] + }, + "_submitted_by": { + "type": [ + "string", + "null" + ] + } + } + } + return json_schema diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json deleted file mode 100644 index 15559a39aacb..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/schemas/kobo_tool_stream.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "_id": { - "type": [ - "string", - "null" - ] - }, - "formhub/uuid": { - "type": [ - "string", - "null" - ] - }, - "starttime": { - "type": [ - "string", - "null" - ] - }, - "__version__": { - "type": [ - "string", - "null" - ] - }, - "meta/instanceID": { - "type": [ - "string", - "null" - ] - }, - "_xform_id_string": { - "type": [ - "string", - "null" - ] - }, - "_uuid": { - "type": [ - "string", - "null" - ] - }, - "_attachments": { - "type": [ - "array", - "null" - ] - }, - "_status": { - "type": [ - "string", - "null" - ] - }, - "_geolocation": { - "type": [ - "array", - "null" - ] - }, - "_tags": { - "type": [ - "array", - "null" - ] - }, - "_notes": { - "type": [ - "array", - "null" - ] - }, - "_validation_status": { - "type": [ - "string", - "null" - ] - }, - "_submission_time": { - "type": [ - "string", - "null" - ] - }, - "_submitted_by": { - "type": [ - "string", - "null" - ] - } - } -} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 20524f9bb8f9..2f2fe867ccd5 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -15,26 +15,36 @@ from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_cdk.sources import Source +from .helpers import Helpers class KoboToolStream(HttpStream): primary_key = None - def __init__(self, config: Mapping[str, Any], **kwargs): + def __init__(self, config: Mapping[str, Any], form_id, schema, **kwargs): super().__init__() - self.uid = config["uid"] + self.form_id = form_id token = self.get_access_token(config) self.auth_token = token[0] + self.schema = schema @property def url_base(self) -> str: - return f"https://kf.kobotoolbox.org/api/v2/assets/{self.uid}/" + return f"https://kf.kobotoolbox.org/api/v2/assets/{self.form_id}/" + + @property + def name(self) -> str: + return self.form_id + + def get_json_schema(self): + return self.schema def get_access_token(self, config) -> Tuple[str, any]: url = f"https://kf.kobotoolbox.org/token/?format=json" try: - response = requests.post(url, auth=(config["username"], config["password"])) + response = requests.post(url, auth=( + config["username"], config["password"])) response.raise_for_status() json_response = response.json() return json_response.get("token", None), None if json_response is not None else None, None @@ -48,7 +58,7 @@ def request_params( def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: return None - + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return "data.json" @@ -64,28 +74,31 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for a in result: yield a + class SourceKobotoolbox(AbstractSource): def check_connection(self, logger, config) -> Tuple[bool, any]: return True, None - # def generate_streams(self, config: str) -> List[Stream]: - # access_token = self.get_access_token(config) - # print(access_token) - # auth = access_token[0] - # print(f'==================>{auth}') - # url = f"https://kf.kobotoolbox.org/api/v2/assets.json" - # response = requests.get(url, auth=(config["username"], config["password"])) - # json_response = response.json() - # key_list=json_response.get('results') - # key = "uid" - # forms = [a_dict[key] for a_dict in key_list] - # print(f'=======assss=======>>>>{forms}') - - # streams = [] - # for form_id in forms: - # stream = KoboToolStream(config=config, form_id=form_id, auth=auth) - # stream.append(stream) - # return streams + def base_schema(self): + return Helpers.get_json_schema() + + def generate_streams(self, config: str) -> List[Stream]: + url = f"https://kf.kobotoolbox.org/api/v2/assets.json" + response = requests.get(url, auth=( + config["username"], config["password"])) + json_response = response.json() + key_list = json_response.get('results') + key = "uid" + forms = [a_dict[key] for a_dict in key_list] + streams = [] + + streams = [] + for form_id in forms: + stream = KoboToolStream( + config=config, form_id=form_id, schema=self.base_schema()) + streams.append(stream) + return streams def streams(self, config: Mapping[str, Any]) -> List[Stream]: - return [KoboToolStream(config=config)] \ No newline at end of file + streams = self.generate_streams(config=config) + return streams diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index acd2f35bddd2..918254dd1e84 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -6,7 +6,6 @@ connectionSpecification: required: - username - password - - uid properties: username: type: string @@ -18,9 +17,4 @@ connectionSpecification: title: Password description: Password to authenticate into the KoboToolBox server airbyte_secret: true - order: 2 - uid: - type: string - title: "UID" - description: Passw - order: 3 \ No newline at end of file + order: 2 \ No newline at end of file From f7bb4a1eae02176724e7edfb638df2407ab8b42b Mon Sep 17 00:00:00 2001 From: siddhant Date: Fri, 3 Mar 2023 16:14:28 +0530 Subject: [PATCH 04/50] refactor --- .../source-kobotoolbox/source_kobotoolbox/helpers.py | 9 --------- docs/integrations/sources/kobotoolbox.md | 0 2 files changed, 9 deletions(-) create mode 100644 docs/integrations/sources/kobotoolbox.md diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py index c01db08b2c1e..3f2b6da30b3d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py @@ -2,15 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import base64 -from datetime import datetime - -import requests -from bigquery_schema_generator.generate_schema import SchemaGenerator -from gbqschema_converter.gbqschema_to_jsonschema import json_representation as converter -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry - class Helpers(object): @staticmethod diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md new file mode 100644 index 000000000000..e69de29bb2d1 From ed9aa211469ed9b0eeacdbd2ae0abfb5b6909d4f Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 9 Mar 2023 16:48:00 +0530 Subject: [PATCH 05/50] kobo tool box added pagination in data read --- .../source_kobotoolbox/source.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 2f2fe867ccd5..04035c8562bc 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -5,6 +5,7 @@ import json import requests +from urllib.parse import parse_qs, urlparse from abc import ABC from datetime import datetime from typing import Dict, Generator @@ -20,6 +21,7 @@ class KoboToolStream(HttpStream): primary_key = None + PAGINATION_LIMIT = 30000 def __init__(self, config: Mapping[str, Any], form_id, schema, **kwargs): super().__init__() @@ -54,10 +56,21 @@ def get_access_token(self, config) -> Tuple[str, any]: def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: - return {} + + params = {"start": 0, "limit": self.PAGINATION_LIMIT} + if next_page_token: + params.update(next_page_token) + + return params def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - return None + json_response: Mapping[str, str] = response.json() + next = json_response.get('next') + params = None + if next is not None: + parsed_url = urlparse(next) + params = dict(parse_qs(parsed_url.query)) + return params def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return "data.json" From 8d8dc50252e386d51f02d945fcfeb8a37ecdcf8c Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 9 Mar 2023 17:03:56 +0530 Subject: [PATCH 06/50] cleanup --- .../source-kobotoolbox/source_kobotoolbox/source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 04035c8562bc..f97c64d26556 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -65,10 +65,10 @@ def request_params( def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: json_response: Mapping[str, str] = response.json() - next = json_response.get('next') + next_url = json_response.get('next') params = None - if next is not None: - parsed_url = urlparse(next) + if next_url is not None: + parsed_url = urlparse(next_url) params = dict(parse_qs(parsed_url.query)) return params From 9e627f8fd60ee81bb613d8673186b997ba786bba Mon Sep 17 00:00:00 2001 From: Ishan Date: Fri, 10 Mar 2023 18:57:23 +0530 Subject: [PATCH 07/50] updated stream name with the form name --- .../source_kobotoolbox/source.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index f97c64d26556..c305ddcd975f 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -4,6 +4,7 @@ import json +import re import requests from urllib.parse import parse_qs, urlparse from abc import ABC @@ -23,12 +24,13 @@ class KoboToolStream(HttpStream): primary_key = None PAGINATION_LIMIT = 30000 - def __init__(self, config: Mapping[str, Any], form_id, schema, **kwargs): + def __init__(self, config: Mapping[str, Any], form_id, schema, name, **kwargs): super().__init__() self.form_id = form_id token = self.get_access_token(config) self.auth_token = token[0] self.schema = schema + self.stream_name = name @property def url_base(self) -> str: @@ -36,7 +38,11 @@ def url_base(self) -> str: @property def name(self) -> str: - return self.form_id + # Return the english substring as stream name. If not found return form uid + regex = re.compile('[^a-zA-Z ]') + s = regex.sub('', self.stream_name) + s = s.strip() + return s if len(s) > 0 else self.form_id def get_json_schema(self): return self.schema @@ -101,15 +107,12 @@ def generate_streams(self, config: str) -> List[Stream]: config["username"], config["password"])) json_response = response.json() key_list = json_response.get('results') - key = "uid" - forms = [a_dict[key] for a_dict in key_list] streams = [] - - streams = [] - for form_id in forms: + for form_dict in key_list: stream = KoboToolStream( - config=config, form_id=form_id, schema=self.base_schema()) + config=config, form_id=form_dict['uid'], schema=self.base_schema(), name=form_dict['name']) streams.append(stream) + return streams def streams(self, config: Mapping[str, Any]) -> List[Stream]: From 2626ce4def06781a69311ba6583fe4cb58f927df Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 15 Mar 2023 16:17:22 +0530 Subject: [PATCH 08/50] unit test cases, acceptance test cases, cleanup and refactoring of code --- .../acceptance-test-config.yml | 70 ++++++------- .../integration_tests/configured_catalog.json | 24 +++-- .../integration_tests/invalid_config.json | 3 +- .../integration_tests/sample_config.json | 3 - .../integration_tests/sample_state.json | 5 - .../connectors/source-kobotoolbox/setup.py | 1 + .../source_kobotoolbox/helpers.py | 97 +------------------ .../source_kobotoolbox/source.py | 95 ++++++++++++------ .../unit_tests/test_source.py | 31 ++++++ .../unit_tests/test_stream.py | 86 ++++++++++++++++ .../unit_tests/unit_test.py | 7 -- 11 files changed, 234 insertions(+), 188 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py diff --git a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml index 436a654d36aa..ced77ef70af9 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml @@ -2,38 +2,38 @@ # for more information about how to configure these tests connector_image: airbyte/source-kobotoolbox:dev acceptance_tests: - spec: - tests: - - spec_path: "source_kobotoolbox/spec.yaml" - connection: - tests: - - config_path: "secrets/config.json" - status: "succeed" - - config_path: "integration_tests/invalid_config.json" - status: "failed" - discovery: - tests: - - config_path: "secrets/config.json" - basic_read: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - empty_streams: [] -# TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file -# expect_records: -# path: "integration_tests/expected_records.jsonl" -# extra_fields: no -# exact_order: no -# extra_records: yes - incremental: - bypass_reason: "This connector does not implement incremental sync" -# TODO uncomment this block this block if your connector implements incremental sync: -# tests: -# - config_path: "secrets/config.json" -# configured_catalog_path: "integration_tests/configured_catalog.json" -# future_state: -# future_state_path: "integration_tests/abnormal_state.json" - full_refresh: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" + spec: + tests: + - spec_path: "source_kobotoolbox/spec.yaml" + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + # TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file + # expect_records: + # path: "integration_tests/expected_records.jsonl" + # extra_fields: no + # exact_order: no + # extra_records: yes + incremental: + bypass_reason: "This connector does not implement incremental sync" + # TODO uncomment this block this block if your connector implements incremental sync: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # future_state: + # future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index b999c2ba3abf..85116054db69 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -1,15 +1,13 @@ { - "streams": [ - { - "stream": { - "name": "table_name", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false, - "default_cursor_field": ["column_name"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] + "streams": [ + { + "stream": { + "name": "Enrollment", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json index f3732995784f..ce8bb8d0c83b 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/invalid_config.json @@ -1,3 +1,4 @@ { - "todo-wrong-field": "this should be an incomplete config file, used in standard tests" + "username": "thisisarandomusername", + "password": "thisisarandompassword" } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json deleted file mode 100644 index ecc4913b84c7..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "fix-me": "TODO" -} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json deleted file mode 100644 index 3587e579822d..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/sample_state.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "todo-stream-name": { - "todo-field-name": "value" - } -} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/setup.py b/airbyte-integrations/connectors/source-kobotoolbox/setup.py index 3a92c39a7567..4cf8821a5415 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/setup.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/setup.py @@ -12,6 +12,7 @@ TEST_REQUIREMENTS = [ "pytest~=6.2", "connector-acceptance-test", + "requests_mock" ] setup( diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py index 3f2b6da30b3d..eccaf0ce7de1 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py @@ -6,100 +6,5 @@ class Helpers(object): @staticmethod def get_json_schema(): - json_schema = { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "_id": { - "type": [ - "string", - "null" - ] - }, - "formhub/uuid": { - "type": [ - "string", - "null" - ] - }, - "starttime": { - "type": [ - "string", - "null" - ] - }, - "__version__": { - "type": [ - "string", - "null" - ] - }, - "meta/instanceID": { - "type": [ - "string", - "null" - ] - }, - "_xform_id_string": { - "type": [ - "string", - "null" - ] - }, - "_uuid": { - "type": [ - "string", - "null" - ] - }, - "_attachments": { - "type": [ - "array", - "null" - ] - }, - "_status": { - "type": [ - "string", - "null" - ] - }, - "_geolocation": { - "type": [ - "array", - "null" - ] - }, - "_tags": { - "type": [ - "array", - "null" - ] - }, - "_notes": { - "type": [ - "array", - "null" - ] - }, - "_validation_status": { - "type": [ - "string", - "null" - ] - }, - "_submission_time": { - "type": [ - "string", - "null" - ] - }, - "_submitted_by": { - "type": [ - "string", - "null" - ] - } - } - } + json_schema = {} return json_schema diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index c305ddcd975f..a0ec5124e403 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -6,7 +6,7 @@ import json import re import requests -from urllib.parse import parse_qs, urlparse +from urllib.parse import parse_qsl, urlparse from abc import ABC from datetime import datetime from typing import Dict, Generator @@ -22,19 +22,19 @@ class KoboToolStream(HttpStream): primary_key = None - PAGINATION_LIMIT = 30000 - def __init__(self, config: Mapping[str, Any], form_id, schema, name, **kwargs): + def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pagination_limit, auth_token, **kwargs): super().__init__() self.form_id = form_id - token = self.get_access_token(config) - self.auth_token = token[0] + self.auth_token = auth_token self.schema = schema self.stream_name = name + self.API_URL = api_url + self.PAGINATION_LIMIT = pagination_limit @property def url_base(self) -> str: - return f"https://kf.kobotoolbox.org/api/v2/assets/{self.form_id}/" + return f"{self.API_URL}/assets/{self.form_id}/" @property def name(self) -> str: @@ -47,18 +47,6 @@ def name(self) -> str: def get_json_schema(self): return self.schema - def get_access_token(self, config) -> Tuple[str, any]: - url = f"https://kf.kobotoolbox.org/token/?format=json" - - try: - response = requests.post(url, auth=( - config["username"], config["password"])) - response.raise_for_status() - json_response = response.json() - return json_response.get("token", None), None if json_response is not None else None, None - except requests.exceptions.RequestException as e: - return None, e - def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: @@ -75,7 +63,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, params = None if next_url is not None: parsed_url = urlparse(next_url) - params = dict(parse_qs(parsed_url.query)) + params = dict(parse_qsl(parsed_url.query)) return params def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: @@ -93,28 +81,79 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for a in result: yield a - class SourceKobotoolbox(AbstractSource): + API_URL = "https://kf.kobotoolbox.org/api/v2" + TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" + PAGINATION_LIMIT = 30000 + + @classmethod + def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: + # check if the credentials are provided correctly, because for now these value are not required in spec + if not config.get("username"): + return False, "username in credentials is not provided" + + if not config.get("password"): + return False, "password in credentials is not provided" + + return True, None + + def get_access_token(self, config) -> Tuple[str, any]: + url = self.TOKEN_URL + + try: + response = requests.post(url, auth=( + config["username"], config["password"])) + response.raise_for_status() + json_response = response.json() + return (json_response.get("token", None), None) if json_response is not None else (None, None) + except requests.exceptions.RequestException as e: + return None, e + def check_connection(self, logger, config) -> Tuple[bool, any]: + is_valid_credentials, msg = self._check_credentials(config) + if not is_valid_credentials: + return is_valid_credentials, msg + + url = f"{self.API_URL}/assets.json" + response = requests.get(url, auth=( + config["username"], config["password"])) + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as err: + return False, 'Something went wrong. Please check your credentials' + return True, None def base_schema(self): return Helpers.get_json_schema() - def generate_streams(self, config: str) -> List[Stream]: - url = f"https://kf.kobotoolbox.org/api/v2/assets.json" + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + + # Fetch all assets(forms) + url = f"{self.API_URL}/assets.json" response = requests.get(url, auth=( config["username"], config["password"])) json_response = response.json() key_list = json_response.get('results') + + # Generate a auth token for all streams + auth_token, msg = self.get_access_token(config) + if auth_token is None: + return [] + + # Generate array of stream objects streams = [] for form_dict in key_list: stream = KoboToolStream( - config=config, form_id=form_dict['uid'], schema=self.base_schema(), name=form_dict['name']) + config=config, + form_id=form_dict['uid'], + schema=self.base_schema(), + name=form_dict['name'], + api_url=self.API_URL, + pagination_limit=self.PAGINATION_LIMIT, + auth_token=auth_token + ) streams.append(stream) - return streams - - def streams(self, config: Mapping[str, Any]) -> List[Stream]: - streams = self.generate_streams(config=config) - return streams + return streams \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py new file mode 100644 index 000000000000..221e84ff217d --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py @@ -0,0 +1,31 @@ +import pytest +from source_kobotoolbox.source import SourceKobotoolbox + + +@pytest.mark.parametrize('config, err_msg', [ + ( + {"password": "some_password"}, + "username in credentials is not provided" + ), + ( + {"username": "username"}, + "password in credentials is not provided" + ), + ( + {"username": "username", "password": "some_password"}, + 'Something went wrong. Please check your credentials' + ) +]) +def test_check_connection(config, err_msg): + response = SourceKobotoolbox().check_connection(logger=None, config=config) + assert response == (False, err_msg) + + +@pytest.mark.parametrize('config', [ + ( + {"username": "username", "password": "some_password"} + ) +]) +def test_streams(config): + response = SourceKobotoolbox().streams(config) + assert response == [] \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py new file mode 100644 index 000000000000..2e2d1eb9b85c --- /dev/null +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py @@ -0,0 +1,86 @@ +import pytest +from unittest.mock import Mock +import requests +from source_kobotoolbox.source import KoboToolStream + +API_URL = "https://kf.kobotoolbox.org/api/v2" +PAGINATION_LIMIT = 30000 + +stream_config = { + "config": {"username": "username", "password": "my_password"}, + "form_id": "my_form_id", + "schema": {}, + "name": "my_form", + "api_url": API_URL, + "pagination_limit": PAGINATION_LIMIT, + "auth_token": "my_token_123" +} + +@pytest.mark.parametrize('config', [(stream_config)]) +def test_stream_base_url(config): + stream = KoboToolStream(**config) + assert stream.url_base == f"{config['api_url']}/assets/{config['form_id']}/" + +@pytest.mark.parametrize('config', [(stream_config)]) +def test_json_schema(config): + stream = KoboToolStream(**config) + assert stream.get_json_schema() == {} + +@pytest.mark.parametrize('config, next_page_token', [(stream_config, None)]) +def test_request_params(config, next_page_token): + stream = KoboToolStream(**config) + assert stream.request_params({}, None, next_page_token) == {'start': 0, 'limit': config['pagination_limit']} + +@pytest.mark.parametrize('config, total_records, params, next_page_token', [ + ( + stream_config, + 50000, + {'start': 100, 'limit': 100}, + {'start': '200', 'limit': '100'} + ), + ( + stream_config, + 1729, + {'start': 1700, 'limit': 100}, + None + ) +]) +def test_next_page_token(config, params, next_page_token, total_records): + stream = KoboToolStream(**config) + response = Mock(spec=requests.Response) + + def fetch_next_page(params, total_records=total_records): + prev = None + next1 = None + if params['limit'] + params['start'] < total_records: + next1 = {'limit': params['limit'], 'start': params['limit'] + params['start']} + + if params['start'] > 0: + prev = params + + return (prev, next1) + + def fetch_request(response, params, url, total_records=total_records): + + (prev, next1) = fetch_next_page(params) + + if prev is not None: + prev = f"{url}?limit={prev['limit']}&start={prev['start']}" + + if next1 is not None: + next1 = f"{url}?limit={next1['limit']}&start={next1['start']}" + + response.json.return_value = { + "count": total_records, + "next": next1, + "previous": prev, + "results": [] + } + + return response + + url = stream.url_base + stream.path() + + response = fetch_request(response, params, url) + + assert next_page_token == stream.next_page_token(response) \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py deleted file mode 100644 index 219ae0142c72..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/unit_test.py +++ /dev/null @@ -1,7 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -def test_example_method(): - assert True From f40965d3daac5f37cbf1557a5664a967059c2b9d Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 16 Mar 2023 12:50:48 +0530 Subject: [PATCH 09/50] added svg logo, documentation .md file and added the connector in seed --- .../src/main/resources/icons/kobotoolbox.svg | 146 ++++++++++++++++++ .../resources/seed/source_definitions.yaml | 8 + docs/integrations/sources/kobotoolbox.md | 51 ++++++ 3 files changed, 205 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/icons/kobotoolbox.svg diff --git a/airbyte-config/init/src/main/resources/icons/kobotoolbox.svg b/airbyte-config/init/src/main/resources/icons/kobotoolbox.svg new file mode 100644 index 000000000000..a1074199554d --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/kobotoolbox.svg @@ -0,0 +1,146 @@ + + + + + + + + \ No newline at end of file diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7709769c0f49..e7cc07e57500 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -922,6 +922,14 @@ icon: klaviyo.svg sourceType: api releaseStage: generally_available +- name: Kobotoolbox + sourceDefinitionId: d2648a70-30ff-49d6-af45-d0b11fe06c47 + dockerRepository: airbyte/source-kobotoolbox + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox + icon: kobotoolbox.svg + sourceType: api + releaseStage: alpha - name: Kyriba sourceDefinitionId: 547dc08e-ab51-421d-953b-8f3745201a8c dockerRepository: airbyte/source-kyriba diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md index e69de29bb2d1..ff70729c2dc4 100644 --- a/docs/integrations/sources/kobotoolbox.md +++ b/docs/integrations/sources/kobotoolbox.md @@ -0,0 +1,51 @@ +# Kobotoolbox + +This page contains the setup guide and reference information for the Kobotoolbox source connector. + +## Prerequisites + +**For Airbyte Open Source:** + +- Username of Kobotoolbox account +- Password of Kobotoolbox account + +## Setup guide + +### Step 1: Set up Kobotoolbox account and forms + +1. Signup on [Kobotoolbox](https://www.kobotoolbox.org/sign-up/) to create an account. +2. Create and deploy your custom form by following instructions given [here](https://support.kobotoolbox.org/new_form.html) + +### Step 2: Set up the Kobotoolbox connector in Airbyte + +**For Airbyte Open Source:** + +1. Go to local Airbyte page. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ New Source**. +3. On the source setup page, select **Kobotoolbox** from the Source type dropdown and enter a name for this connector. +4. Enter the **username** and **password** of your kobotoolbox account +5. Enter the Start Date +6. Click **Set up source**. + +## Supported sync modes + +​ +The Kobotoolbox source connector supports the following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): +​ + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) + +## Supported Streams + +The Kobotoolbox connector supports **forms** as the streams. + +## Connector-specific features + +​The Kobotoolbox box uses the [api v2](https://kf.kobotoolbox.org/api/v2) + +## Changelog + +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :--------------- | +| 0.1.0 | 2023-03-16 | [22024](https://github.com/airbytehq/airbyte/pull/22024) | Initial Realease | From e9074463d5be107c305fd91859140c8acae61845 Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 16 Mar 2023 13:38:59 +0530 Subject: [PATCH 10/50] minor changes --- .../source_kobotoolbox/spec.yaml | 37 ++++++++++--------- docs/integrations/sources/kobotoolbox.md | 6 +-- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 918254dd1e84..d8ce0d2ea7b9 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -1,20 +1,21 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox +supportsNormalization: false connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Kobotoolbox Spec - type: object - required: - - username - - password - properties: - username: - type: string - title: Username - description: Username to authenticate into the KoboToolBox server - order: 1 - password: - type: string - title: Password - description: Password to authenticate into the KoboToolBox server - airbyte_secret: true - order: 2 \ No newline at end of file + $schema: http://json-schema.org/draft-07/schema# + title: Kobotoolbox Spec + type: object + required: + - username + - password + properties: + username: + type: string + title: Username + description: Username to authenticate into the KoboToolBox server + order: 1 + password: + type: string + title: Password + description: Password to authenticate into the KoboToolBox server + airbyte_secret: true + order: 2 diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md index ff70729c2dc4..ca466e3abd91 100644 --- a/docs/integrations/sources/kobotoolbox.md +++ b/docs/integrations/sources/kobotoolbox.md @@ -46,6 +46,6 @@ The Kobotoolbox connector supports **forms** as the streams. ## Changelog -| Version | Date | Pull Request | Subject | -| :------ | :--------- | :------------------------------------------------------- | :--------------- | -| 0.1.0 | 2023-03-16 | [22024](https://github.com/airbytehq/airbyte/pull/22024) | Initial Realease | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :-------------- | +| 0.1.0 | 2023-03-16 | [22024](https://github.com/airbytehq/airbyte/pull/22024) | Initial Release | From ceb312f6657e1eb488b79443ec70166c837f6ba3 Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 16 Mar 2023 15:44:47 +0530 Subject: [PATCH 11/50] updated the PR number in docs --- docs/integrations/sources/kobotoolbox.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md index ca466e3abd91..5ef75527deef 100644 --- a/docs/integrations/sources/kobotoolbox.md +++ b/docs/integrations/sources/kobotoolbox.md @@ -48,4 +48,4 @@ The Kobotoolbox connector supports **forms** as the streams. | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :-------------- | -| 0.1.0 | 2023-03-16 | [22024](https://github.com/airbytehq/airbyte/pull/22024) | Initial Release | +| 0.1.0 | 2023-03-16 | [24138](https://github.com/airbytehq/airbyte/pull/22024) | Initial Release | From 62cb19be1c8428724dc54f6b2feade92d39faff7 Mon Sep 17 00:00:00 2001 From: Ishan Date: Fri, 17 Mar 2023 15:33:18 +0530 Subject: [PATCH 12/50] incremental sync --- .../source_kobotoolbox/source.py | 35 +++++++++++++++++-- .../source_kobotoolbox/spec.yaml | 6 ++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index a0ec5124e403..218643eb648f 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -20,8 +20,10 @@ from .helpers import Helpers -class KoboToolStream(HttpStream): - primary_key = None +class KoboToolStream(HttpStream, IncrementalMixin): + primary_key = "_id" + cursor_field = "_submission_time" + submission_date_format = "%Y-%m-%dT%H:%M:%S" def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pagination_limit, auth_token, **kwargs): super().__init__() @@ -31,6 +33,8 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pa self.stream_name = name self.API_URL = api_url self.PAGINATION_LIMIT = pagination_limit + self._cursor_value = None + self.start_time = datetime.strptime(config['start_time'], '%Y-%m-%d') @property def url_base(self) -> str: @@ -43,6 +47,19 @@ def name(self) -> str: s = regex.sub('', self.stream_name) s = s.strip() return s if len(s) > 0 else self.form_id + + # State will be a dict : {'_submission_time': '2023-03-03'} + + @property + def state(self) -> Mapping[str, Any]: + if self._cursor_value: + return {self.cursor_field: self._cursor_value.strftime('%Y-%m-%d')} + else: + return {self.cursor_field: self.start_time.strftime('%Y-%m-%d')} + + @state.setter + def state(self, value: Mapping[str, Any]): + self._cursor_value = datetime.strptime(value[self.cursor_field], self.submission_date_format) def get_json_schema(self): return self.schema @@ -51,7 +68,12 @@ def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: - params = {"start": 0, "limit": self.PAGINATION_LIMIT} + params = { + "start": 0, + "limit": self.PAGINATION_LIMIT, + "query": json.dumps({self.cursor_field: {"$gt": self.state[self.cursor_field]}}) + } + if next_page_token: params.update(next_page_token) @@ -81,6 +103,13 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for a in result: yield a + def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: + for record in super().read_records(*args, **kwargs): + self._cursor_value = record[self.cursor_field] + # if self._cursor_value: + # self._cursor_value = max(self._cursor_value, latest_record_date) + yield record + class SourceKobotoolbox(AbstractSource): API_URL = "https://kf.kobotoolbox.org/api/v2" TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index d8ce0d2ea7b9..23b79d73c0dd 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -19,3 +19,9 @@ connectionSpecification: description: Password to authenticate into the KoboToolBox server airbyte_secret: true order: 2 + start_time: + type: string + title: Start Time + description: Start Time for Kobotoolbox + default: "2023-03-15" + order: 3 From 72c3204dddd79c7d1f34460d8cc813a7bb0021f7 Mon Sep 17 00:00:00 2001 From: Ishan Date: Mon, 20 Mar 2023 17:19:12 +0530 Subject: [PATCH 13/50] incremental sync in kobo connector --- .../integration_tests/configured_catalog.json | 2 +- .../sample_files/configured_catalog.json | 17 +++++++---------- .../source_kobotoolbox/helpers.py | 19 ++++++++++++++++++- .../source_kobotoolbox/source.py | 16 +++++++++------- .../source_kobotoolbox/spec.yaml | 5 +++-- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index 85116054db69..0fb5ff2e9817 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -2,7 +2,7 @@ "streams": [ { "stream": { - "name": "Enrollment", + "name": "Daily Issue Form", "json_schema": {}, "supported_sync_modes": ["full_refresh"] }, diff --git a/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json index 6245aaabad89..881eba93c7bb 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/sample_files/configured_catalog.json @@ -4,16 +4,13 @@ "stream": { "name": "kobo_tool_stream", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ], - "source_defined_cursor": false, - "default_cursor_field": [ - "column_name" - ] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_submission_time"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["_submission_time"], + "destination_sync_mode": "append" } ] -} \ No newline at end of file +} diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py index eccaf0ce7de1..01928e4b96d4 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py @@ -6,5 +6,22 @@ class Helpers(object): @staticmethod def get_json_schema(): - json_schema = {} + json_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "_id": { + "type": [ + "string", + "null" + ] + }, + "_submission_time": { + "type": [ + "string", + "null" + ] + }, + }, + } return json_schema diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 218643eb648f..7d19e7584407 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -20,10 +20,12 @@ from .helpers import Helpers + class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" cursor_field = "_submission_time" submission_date_format = "%Y-%m-%dT%H:%M:%S" + start_date_format = "%Y-%m-%d" def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pagination_limit, auth_token, **kwargs): super().__init__() @@ -34,7 +36,7 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pa self.API_URL = api_url self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None - self.start_time = datetime.strptime(config['start_time'], '%Y-%m-%d') + self.start_time = config['start_time'] @property def url_base(self) -> str: @@ -53,13 +55,13 @@ def name(self) -> str: @property def state(self) -> Mapping[str, Any]: if self._cursor_value: - return {self.cursor_field: self._cursor_value.strftime('%Y-%m-%d')} + return {self.cursor_field: self._cursor_value} else: - return {self.cursor_field: self.start_time.strftime('%Y-%m-%d')} + return {self.cursor_field: self.start_time} @state.setter def state(self, value: Mapping[str, Any]): - self._cursor_value = datetime.strptime(value[self.cursor_field], self.submission_date_format) + self._cursor_value = value[self.cursor_field] def get_json_schema(self): return self.schema @@ -71,8 +73,10 @@ def request_params( params = { "start": 0, "limit": self.PAGINATION_LIMIT, - "query": json.dumps({self.cursor_field: {"$gt": self.state[self.cursor_field]}}) + "sort": json.dumps({self.cursor_field: 1}) } + submission_time = datetime.strptime(self.state[self.cursor_field], self.submission_date_format) + params["query"] = json.dumps({self.cursor_field: {"$gt": submission_time.strftime(self.start_date_format)}}) if next_page_token: params.update(next_page_token) @@ -106,8 +110,6 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: for record in super().read_records(*args, **kwargs): self._cursor_value = record[self.cursor_field] - # if self._cursor_value: - # self._cursor_value = max(self._cursor_value, latest_record_date) yield record class SourceKobotoolbox(AbstractSource): diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 23b79d73c0dd..18a7dbaa99bd 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -22,6 +22,7 @@ connectionSpecification: start_time: type: string title: Start Time - description: Start Time for Kobotoolbox - default: "2023-03-15" + description: Any data before this date will not be replicated. + default: "2023-03-15T00:00:00" order: 3 + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ From 20bf345b3195f62506e863aa5ea4c97eecf3c80a Mon Sep 17 00:00:00 2001 From: siddhant Date: Tue, 21 Mar 2023 13:17:23 +0530 Subject: [PATCH 14/50] fixed issue --- .../source-kobotoolbox/source_kobotoolbox/helpers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py index eccaf0ce7de1..706c2fa0d17a 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py @@ -6,5 +6,9 @@ class Helpers(object): @staticmethod def get_json_schema(): - json_schema = {} + json_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": {"_id": {"type": "string"}, "indexed_on": {"type": "string", "format": "date-time"}}, + } return json_schema From fe9f8d69268aeb1e6063f3e7f383bc4fab1b82d3 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 22 Mar 2023 10:24:35 +0530 Subject: [PATCH 15/50] yaml file 2 space indentation and removing supportsNomarlization option --- .../source_kobotoolbox/spec.yaml | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index d8ce0d2ea7b9..2b1fd9b4518e 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -1,21 +1,21 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox -supportsNormalization: false connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Kobotoolbox Spec - type: object - required: - - username - - password + $schema: http://json-schema.org/draft-07/schema# + title: Kobotoolbox Spec + type: object + required: + - username + - password properties: - username: - type: string - title: Username - description: Username to authenticate into the KoboToolBox server - order: 1 + username: + type: string + title: Username + description: Username to authenticate into the KoboToolBox server + order: 1 password: - type: string - title: Password - description: Password to authenticate into the KoboToolBox server - airbyte_secret: true - order: 2 + type: string + title: Password + description: Password to authenticate into the KoboToolBox server + airbyte_secret: true + order: 2 + \ No newline at end of file From 04bdea0e0675c4b7394189dba6e7ae78a2d409e0 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 22 Mar 2023 10:25:43 +0530 Subject: [PATCH 16/50] added additionalProperties in spec.yaml file --- .../connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 2b1fd9b4518e..9142c6d4634a 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -3,6 +3,7 @@ connectionSpecification: $schema: http://json-schema.org/draft-07/schema# title: Kobotoolbox Spec type: object + additionalProperties: true required: - username - password From 93b48b86a7cf5c8d08d171ebf967e718b1836ca5 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 22 Mar 2023 11:28:46 +0530 Subject: [PATCH 17/50] helpers file not needed - base schema function removed --- .../source_kobotoolbox/helpers.py | 27 ------------------- .../source_kobotoolbox/source.py | 26 ++++++++++++++---- 2 files changed, 21 insertions(+), 32 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py deleted file mode 100644 index 01928e4b96d4..000000000000 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/helpers.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -class Helpers(object): - @staticmethod - def get_json_schema(): - json_schema = { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "_id": { - "type": [ - "string", - "null" - ] - }, - "_submission_time": { - "type": [ - "string", - "null" - ] - }, - }, - } - return json_schema diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 7d19e7584407..fd0d6c7e7f39 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -17,9 +17,28 @@ from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_cdk.sources import Source -from .helpers import Helpers +stream_json_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": True, + "properties": { + "_id": { + "type": [ + "string", + "null" + ] + }, + "_submission_time": { + "type": [ + "string", + "null" + ] + }, + } +} + class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" @@ -156,9 +175,6 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: return True, None - def base_schema(self): - return Helpers.get_json_schema() - def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Fetch all assets(forms) @@ -179,7 +195,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: stream = KoboToolStream( config=config, form_id=form_dict['uid'], - schema=self.base_schema(), + schema=stream_json_schema, name=form_dict['name'], api_url=self.API_URL, pagination_limit=self.PAGINATION_LIMIT, From 70eae8dd410fc4048d517a81b2b140ea220d61c8 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 22 Mar 2023 16:47:31 +0530 Subject: [PATCH 18/50] incremental sync test cases and cleanup, formatting --- .../acceptance-test-config.yml | 12 +-- .../integration_tests/abnormal_state.json | 6 +- .../integration_tests/configured_catalog.json | 9 +- .../connectors/source-kobotoolbox/setup.py | 10 +- .../source_kobotoolbox/source.py | 95 ++++++++----------- .../source_kobotoolbox/spec.yaml | 50 +++++----- .../unit_tests/test_source.py | 6 +- .../unit_tests/test_stream.py | 23 +++-- 8 files changed, 101 insertions(+), 110 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml index ced77ef70af9..c583f1f8522c 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-kobotoolbox/acceptance-test-config.yml @@ -26,13 +26,11 @@ acceptance_tests: # exact_order: no # extra_records: yes incremental: - bypass_reason: "This connector does not implement incremental sync" - # TODO uncomment this block this block if your connector implements incremental sync: - # tests: - # - config_path: "secrets/config.json" - # configured_catalog_path: "integration_tests/configured_catalog.json" - # future_state: - # future_state_path: "integration_tests/abnormal_state.json" + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + future_state: + future_state_path: "integration_tests/abnormal_state.json" full_refresh: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json index 52b0f2c2118f..d552c6d01d3e 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json @@ -1,5 +1,5 @@ { - "todo-stream-name": { - "todo-field-name": "todo-abnormal-value" - } + "Daily Issue Form": { + "_submission_time": "2024-01-01T00:00:00" + } } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index 0fb5ff2e9817..b4e6420d5a0e 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -4,10 +4,13 @@ "stream": { "name": "Daily Issue Form", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_submission_time"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["_submission_time"], + "destination_sync_mode": "append_dedup" } ] } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/setup.py b/airbyte-integrations/connectors/source-kobotoolbox/setup.py index 4cf8821a5415..7a577f54d4df 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/setup.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/setup.py @@ -5,15 +5,9 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.2" -] +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2"] -TEST_REQUIREMENTS = [ - "pytest~=6.2", - "connector-acceptance-test", - "requests_mock" -] +TEST_REQUIREMENTS = ["pytest~=6.2", "connector-acceptance-test", "requests_mock"] setup( name="source_kobotoolbox", diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index fd0d6c7e7f39..ef7548906287 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -5,19 +5,13 @@ import json import re -import requests +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple from urllib.parse import parse_qsl, urlparse -from abc import ABC -from datetime import datetime -from typing import Dict, Generator + +import requests from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import IncrementalMixin, Stream -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator -from airbyte_cdk.sources import Source - stream_json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", @@ -26,17 +20,12 @@ "properties": { "_id": { "type": [ - "string", - "null" - ] - }, - "_submission_time": { - "type": [ - "string", - "null" + "number", + "null", ] }, - } + "_submission_time": {"type": ["string", "null"]}, + }, } @@ -55,7 +44,7 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pa self.API_URL = api_url self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None - self.start_time = config['start_time'] + self.start_time = config["start_time"] @property def url_base(self) -> str: @@ -64,23 +53,23 @@ def url_base(self) -> str: @property def name(self) -> str: # Return the english substring as stream name. If not found return form uid - regex = re.compile('[^a-zA-Z ]') - s = regex.sub('', self.stream_name) + regex = re.compile("[^a-zA-Z ]") + s = regex.sub("", self.stream_name) s = s.strip() - return s if len(s) > 0 else self.form_id - + return s if len(s) > 0 else self.form_id + # State will be a dict : {'_submission_time': '2023-03-03'} - + @property def state(self) -> Mapping[str, Any]: if self._cursor_value: return {self.cursor_field: self._cursor_value} else: return {self.cursor_field: self.start_time} - + @state.setter def state(self, value: Mapping[str, Any]): - self._cursor_value = value[self.cursor_field] + self._cursor_value = value[self.cursor_field] def get_json_schema(self): return self.schema @@ -89,13 +78,9 @@ def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: - params = { - "start": 0, - "limit": self.PAGINATION_LIMIT, - "sort": json.dumps({self.cursor_field: 1}) - } - submission_time = datetime.strptime(self.state[self.cursor_field], self.submission_date_format) - params["query"] = json.dumps({self.cursor_field: {"$gt": submission_time.strftime(self.start_date_format)}}) + params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} + # submission_time = datetime.strptime(self.state[self.cursor_field], self.submission_date_format) + params["query"] = json.dumps({self.cursor_field: {"$gt": self.state[self.cursor_field]}}) if next_page_token: params.update(next_page_token) @@ -104,7 +89,7 @@ def request_params( def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: json_response: Mapping[str, str] = response.json() - next_url = json_response.get('next') + next_url = json_response.get("next") params = None if next_url is not None: parsed_url = urlparse(next_url) @@ -121,16 +106,17 @@ def request_headers( def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: json_response = response.json() - result = json_response.get('results') + result = json_response.get("results") - for a in result: - yield a + for record in result: + yield record def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: for record in super().read_records(*args, **kwargs): self._cursor_value = record[self.cursor_field] yield record + class SourceKobotoolbox(AbstractSource): API_URL = "https://kf.kobotoolbox.org/api/v2" TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" @@ -144,15 +130,14 @@ def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: if not config.get("password"): return False, "password in credentials is not provided" - + return True, None - + def get_access_token(self, config) -> Tuple[str, any]: url = self.TOKEN_URL try: - response = requests.post(url, auth=( - config["username"], config["password"])) + response = requests.post(url, auth=(config["username"], config["password"])) response.raise_for_status() json_response = response.json() return (json_response.get("token", None), None) if json_response is not None else (None, None) @@ -163,26 +148,24 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: is_valid_credentials, msg = self._check_credentials(config) if not is_valid_credentials: return is_valid_credentials, msg - + url = f"{self.API_URL}/assets.json" - response = requests.get(url, auth=( - config["username"], config["password"])) + response = requests.get(url, auth=(config["username"], config["password"])) try: response.raise_for_status() - except requests.exceptions.HTTPError as err: - return False, 'Something went wrong. Please check your credentials' + except requests.exceptions.HTTPError: + return False, "Something went wrong. Please check your credentials" return True, None def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Fetch all assets(forms) - url = f"{self.API_URL}/assets.json" - response = requests.get(url, auth=( - config["username"], config["password"])) + url = f"{self.API_URL}/assets.json" + response = requests.get(url, auth=(config["username"], config["password"])) json_response = response.json() - key_list = json_response.get('results') + key_list = json_response.get("results") # Generate a auth token for all streams auth_token, msg = self.get_access_token(config) @@ -193,14 +176,14 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams = [] for form_dict in key_list: stream = KoboToolStream( - config=config, - form_id=form_dict['uid'], - schema=stream_json_schema, - name=form_dict['name'], + config=config, + form_id=form_dict["uid"], + schema=stream_json_schema, + name=form_dict["name"], api_url=self.API_URL, pagination_limit=self.PAGINATION_LIMIT, - auth_token=auth_token + auth_token=auth_token, ) streams.append(stream) - return streams \ No newline at end of file + return streams diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index c8f5011b82a1..67a9e159585d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -1,28 +1,28 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Kobotoolbox Spec - type: object - additionalProperties: true - required: - - username - - password + $schema: http://json-schema.org/draft-07/schema# + title: Kobotoolbox Spec + type: object + additionalProperties: true + required: + - username + - password properties: - username: - type: string - title: Username - description: Username to authenticate into the KoboToolBox server - order: 1 - password: - type: string - title: Password - description: Password to authenticate into the KoboToolBox server - airbyte_secret: true - order: 2 - start_time: - type: string - title: Start Time - description: Any data before this date will not be replicated. - default: "2023-03-15T00:00:00" - order: 3 - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ + username: + type: string + title: Username + description: Username to authenticate into the KoboToolBox server + order: 1 + password: + type: string + title: Password + description: Password to authenticate into the KoboToolBox server + airbyte_secret: true + order: 2 + start_time: + type: string + title: Start Time + description: Any data before this date will not be replicated. + default: "2023-03-15T00:00:00" + order: 3 + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py index 221e84ff217d..6227aa07be94 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_source.py @@ -1,3 +1,7 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + import pytest from source_kobotoolbox.source import SourceKobotoolbox @@ -28,4 +32,4 @@ def test_check_connection(config, err_msg): ]) def test_streams(config): response = SourceKobotoolbox().streams(config) - assert response == [] \ No newline at end of file + assert response == [] diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py index 2e2d1eb9b85c..66efa26ff88e 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py @@ -1,5 +1,10 @@ -import pytest +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + from unittest.mock import Mock + +import pytest import requests from source_kobotoolbox.source import KoboToolStream @@ -16,30 +21,34 @@ "auth_token": "my_token_123" } + @pytest.mark.parametrize('config', [(stream_config)]) def test_stream_base_url(config): stream = KoboToolStream(**config) assert stream.url_base == f"{config['api_url']}/assets/{config['form_id']}/" + @pytest.mark.parametrize('config', [(stream_config)]) def test_json_schema(config): stream = KoboToolStream(**config) assert stream.get_json_schema() == {} + @pytest.mark.parametrize('config, next_page_token', [(stream_config, None)]) def test_request_params(config, next_page_token): stream = KoboToolStream(**config) assert stream.request_params({}, None, next_page_token) == {'start': 0, 'limit': config['pagination_limit']} + @pytest.mark.parametrize('config, total_records, params, next_page_token', [ ( - stream_config, + stream_config, 50000, {'start': 100, 'limit': 100}, {'start': '200', 'limit': '100'} ), ( - stream_config, + stream_config, 1729, {'start': 1700, 'limit': 100}, None @@ -59,10 +68,10 @@ def fetch_next_page(params, total_records=total_records): prev = params return (prev, next1) - + def fetch_request(response, params, url, total_records=total_records): - (prev, next1) = fetch_next_page(params) + (prev, next1) = fetch_next_page(params) if prev is not None: prev = f"{url}?limit={prev['limit']}&start={prev['start']}" @@ -78,9 +87,9 @@ def fetch_request(response, params, url, total_records=total_records): } return response - + url = stream.url_base + stream.path() response = fetch_request(response, params, url) - assert next_page_token == stream.next_page_token(response) \ No newline at end of file + assert next_page_token == stream.next_page_token(response) From 4caf31b1db1295ccef85fa0ab5cba46afb70a130 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 22 Mar 2023 17:10:14 +0530 Subject: [PATCH 19/50] doc changes - added incremental sync modes and normalization mode not supported --- docs/integrations/sources/kobotoolbox.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md index 5ef75527deef..8e4f6cf90004 100644 --- a/docs/integrations/sources/kobotoolbox.md +++ b/docs/integrations/sources/kobotoolbox.md @@ -35,11 +35,17 @@ The Kobotoolbox source connector supports the following[ sync modes](https://doc - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- (Recommended)[ Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) ## Supported Streams The Kobotoolbox connector supports **forms** as the streams. +## Note + +Normalization mode of data is not supported. + ## Connector-specific features ​The Kobotoolbox box uses the [api v2](https://kf.kobotoolbox.org/api/v2) @@ -48,4 +54,4 @@ The Kobotoolbox connector supports **forms** as the streams. | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :-------------- | -| 0.1.0 | 2023-03-16 | [24138](https://github.com/airbytehq/airbyte/pull/22024) | Initial Release | +| 0.1.0 | 2023-03-16 | [24138](https://github.com/airbytehq/airbyte/pull/24138) | Initial Release | From 3e1a08ef71a15bda13030d25f8a4ffdc56f59c88 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 29 Mar 2023 12:21:36 +0530 Subject: [PATCH 20/50] using greater than equal while querying api --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index ef7548906287..695166083417 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -80,7 +80,7 @@ def request_params( params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} # submission_time = datetime.strptime(self.state[self.cursor_field], self.submission_date_format) - params["query"] = json.dumps({self.cursor_field: {"$gt": self.state[self.cursor_field]}}) + params["query"] = json.dumps({self.cursor_field: {"$gte": self.state[self.cursor_field]}}) if next_page_token: params.update(next_page_token) From 26951dfa2d00fafda957f8f1ab90ce1c3148525e Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 13 Apr 2023 14:32:28 +0530 Subject: [PATCH 21/50] updated cursor field to endtime - update docs, test cases and stream class --- .../integration_tests/abnormal_state.json | 2 +- .../integration_tests/configured_catalog.json | 4 ++-- .../source_kobotoolbox/source.py | 12 +++++------- .../source_kobotoolbox/spec.yaml | 4 ++-- .../source-kobotoolbox/unit_tests/test_stream.py | 14 +++++++++++--- docs/integrations/sources/kobotoolbox.md | 3 ++- 6 files changed, 23 insertions(+), 16 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json index d552c6d01d3e..7d8191917b77 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/abnormal_state.json @@ -1,5 +1,5 @@ { "Daily Issue Form": { - "_submission_time": "2024-01-01T00:00:00" + "endtime": "2024-01-01T00:00:00" } } diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index b4e6420d5a0e..4122ab266b7d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -6,10 +6,10 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["_submission_time"] + "default_cursor_field": ["endtime"] }, "sync_mode": "incremental", - "cursor_field": ["_submission_time"], + "cursor_field": ["endtime"], "destination_sync_mode": "append_dedup" } ] diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 695166083417..28bacd8e2c4a 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -24,16 +24,15 @@ "null", ] }, - "_submission_time": {"type": ["string", "null"]}, + "endtime": {"type": ["string", "null"]} }, } - class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" - cursor_field = "_submission_time" - submission_date_format = "%Y-%m-%dT%H:%M:%S" - start_date_format = "%Y-%m-%d" + cursor_field = "endtime" + # submission_date_format = "%Y-%m-%dT%H:%M:%S" + # end_time_format = "%Y-%m-%dT%H:%M:%S.%.3f%z" def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pagination_limit, auth_token, **kwargs): super().__init__() @@ -58,7 +57,7 @@ def name(self) -> str: s = s.strip() return s if len(s) > 0 else self.form_id - # State will be a dict : {'_submission_time': '2023-03-03'} + # State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'} @property def state(self) -> Mapping[str, Any]: @@ -79,7 +78,6 @@ def request_params( ) -> MutableMapping[str, Any]: params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} - # submission_time = datetime.strptime(self.state[self.cursor_field], self.submission_date_format) params["query"] = json.dumps({self.cursor_field: {"$gte": self.state[self.cursor_field]}}) if next_page_token: diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 67a9e159585d..50ec9d6db20d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -23,6 +23,6 @@ connectionSpecification: type: string title: Start Time description: Any data before this date will not be replicated. - default: "2023-03-15T00:00:00" + default: "2023-03-15T00:00:00.000+05:30" order: 3 - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}[+][0][5]:[3][0]$ diff --git a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py index 66efa26ff88e..ce55843404cf 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/unit_tests/test_stream.py @@ -4,15 +4,16 @@ from unittest.mock import Mock -import pytest +import json import requests +import pytest from source_kobotoolbox.source import KoboToolStream API_URL = "https://kf.kobotoolbox.org/api/v2" PAGINATION_LIMIT = 30000 stream_config = { - "config": {"username": "username", "password": "my_password"}, + "config": {"username": "username", "password": "my_password", "start_time": "2023-03-15T00:00:00.000+05:30"}, "form_id": "my_form_id", "schema": {}, "name": "my_form", @@ -21,6 +22,8 @@ "auth_token": "my_token_123" } +CURSOR = 'endtime' + @pytest.mark.parametrize('config', [(stream_config)]) def test_stream_base_url(config): @@ -37,7 +40,12 @@ def test_json_schema(config): @pytest.mark.parametrize('config, next_page_token', [(stream_config, None)]) def test_request_params(config, next_page_token): stream = KoboToolStream(**config) - assert stream.request_params({}, None, next_page_token) == {'start': 0, 'limit': config['pagination_limit']} + assert stream.request_params({}, None, next_page_token) == { + 'start': 0, + 'limit': config['pagination_limit'], + "sort": json.dumps({CURSOR: 1}), + "query": json.dumps({CURSOR: {"$gte": config['config']['start_time']}}) + } @pytest.mark.parametrize('config, total_records, params, next_page_token', [ diff --git a/docs/integrations/sources/kobotoolbox.md b/docs/integrations/sources/kobotoolbox.md index 8e4f6cf90004..bd2e558ced8a 100644 --- a/docs/integrations/sources/kobotoolbox.md +++ b/docs/integrations/sources/kobotoolbox.md @@ -44,7 +44,8 @@ The Kobotoolbox connector supports **forms** as the streams. ## Note -Normalization mode of data is not supported. +- Normalization mode of data is not supported. +- For incremental sync please make sure your that you have selected MetaData "start time" and "end time" in kobo form settings. ## Connector-specific features From 32a2a896855cb09ee811e462b897edab50b3cb9c Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 3 May 2023 15:09:18 +0530 Subject: [PATCH 22/50] changing cursor field to _submission_time --- .../source-kobotoolbox/source_kobotoolbox/source.py | 2 +- .../source-kobotoolbox/source_kobotoolbox/spec.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 28bacd8e2c4a..a399f4e9218f 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -30,7 +30,7 @@ class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" - cursor_field = "endtime" + cursor_field = "_submission_time" # submission_date_format = "%Y-%m-%dT%H:%M:%S" # end_time_format = "%Y-%m-%dT%H:%M:%S.%.3f%z" diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 50ec9d6db20d..67a9e159585d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -23,6 +23,6 @@ connectionSpecification: type: string title: Start Time description: Any data before this date will not be replicated. - default: "2023-03-15T00:00:00.000+05:30" + default: "2023-03-15T00:00:00" order: 3 - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}[+][0][5]:[3][0]$ + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ From 3cad24a642a155e9c364bf3c4c9857a0d8984276 Mon Sep 17 00:00:00 2001 From: Ishan Date: Mon, 10 Jul 2023 19:17:13 +0530 Subject: [PATCH 23/50] added a base url for the kobo connector --- .../source_kobotoolbox/source.py | 21 +++---- .../source_kobotoolbox/spec.yaml | 61 +++++++++++-------- 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index a399f4e9218f..7a457e7df0cf 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -34,20 +34,20 @@ class KoboToolStream(HttpStream, IncrementalMixin): # submission_date_format = "%Y-%m-%dT%H:%M:%S" # end_time_format = "%Y-%m-%dT%H:%M:%S.%.3f%z" - def __init__(self, config: Mapping[str, Any], form_id, schema, name, api_url, pagination_limit, auth_token, **kwargs): + def __init__(self, config: Mapping[str, Any], form_id, schema, name, pagination_limit, auth_token, **kwargs): super().__init__() self.form_id = form_id self.auth_token = auth_token self.schema = schema self.stream_name = name - self.API_URL = api_url + self.base_url = config['base_url'] self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None self.start_time = config["start_time"] @property def url_base(self) -> str: - return f"{self.API_URL}/assets/{self.form_id}/" + return f"{self.base_url}/api/v2/assets/{self.form_id}/" @property def name(self) -> str: @@ -116,8 +116,8 @@ def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: class SourceKobotoolbox(AbstractSource): - API_URL = "https://kf.kobotoolbox.org/api/v2" - TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" + # API_URL = "https://kf.kobotoolbox.org/api/v2" + # TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" PAGINATION_LIMIT = 30000 @classmethod @@ -130,12 +130,12 @@ def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: return False, "password in credentials is not provided" return True, None - + def get_access_token(self, config) -> Tuple[str, any]: - url = self.TOKEN_URL + token_url = f"{config['base_url']}/token/?format=json" try: - response = requests.post(url, auth=(config["username"], config["password"])) + response = requests.post(token_url, auth=(config["username"], config["password"])) response.raise_for_status() json_response = response.json() return (json_response.get("token", None), None) if json_response is not None else (None, None) @@ -147,7 +147,7 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: if not is_valid_credentials: return is_valid_credentials, msg - url = f"{self.API_URL}/assets.json" + url = f"{config['base_url']}/api/v2/assets.json" response = requests.get(url, auth=(config["username"], config["password"])) try: @@ -160,7 +160,7 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Fetch all assets(forms) - url = f"{self.API_URL}/assets.json" + url = f"{config['base_url']}/api/v2/assets.json" response = requests.get(url, auth=(config["username"], config["password"])) json_response = response.json() key_list = json_response.get("results") @@ -178,7 +178,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: form_id=form_dict["uid"], schema=stream_json_schema, name=form_dict["name"], - api_url=self.API_URL, pagination_limit=self.PAGINATION_LIMIT, auth_token=auth_token, ) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 67a9e159585d..b5b02099a73d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -1,28 +1,37 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/kobotoolbox connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Kobotoolbox Spec - type: object - additionalProperties: true - required: - - username - - password - properties: - username: - type: string - title: Username - description: Username to authenticate into the KoboToolBox server - order: 1 - password: - type: string - title: Password - description: Password to authenticate into the KoboToolBox server - airbyte_secret: true - order: 2 - start_time: - type: string - title: Start Time - description: Any data before this date will not be replicated. - default: "2023-03-15T00:00:00" - order: 3 - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ + $schema: http://json-schema.org/draft-07/schema# + title: Kobotoolbox Spec + type: object + additionalProperties: true + required: + - username + - password + - base_url + properties: + username: + type: string + title: Username + description: Username to authenticate into the KoboToolBox server + order: 1 + password: + type: string + title: Password + description: Password to authenticate into the KoboToolBox server + airbyte_secret: true + order: 2 + base_url: + type: string + title: Base Url + description: Base url for the kobo server + enum: + - https://kf.kobotoolbox.org + - https://kobo.humanitarianresponse.info + order: 3 + start_time: + type: string + title: Start Time + description: Any data before this date will not be replicated. + default: "2023-03-15T00:00:00" + order: 4 + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ From 436a9bb1454a99c122f5dc0d9b2cc7a6cd108f85 Mon Sep 17 00:00:00 2001 From: Ishan Date: Wed, 12 Jul 2023 16:21:02 +0530 Subject: [PATCH 24/50] added the feature of excluding fields while syncing with kobo connector --- .../integration_tests/configured_catalog.json | 14 ++++++---- .../source_kobotoolbox/source.py | 28 +++++++++++-------- .../source_kobotoolbox/spec.yaml | 5 ++++ 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json index 4122ab266b7d..03016b67466f 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-kobotoolbox/integration_tests/configured_catalog.json @@ -2,14 +2,18 @@ "streams": [ { "stream": { - "name": "Daily Issue Form", - "json_schema": {}, + "name": "Employability Skill Assessment Registration", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": {} + }, "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["endtime"] + "source_defined_cursor": false, + "default_cursor_field": ["_submission_time"] }, "sync_mode": "incremental", - "cursor_field": ["endtime"], + "cursor_field": ["_submission_time"], "destination_sync_mode": "append_dedup" } ] diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 7a457e7df0cf..49567a6a528f 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -44,6 +44,7 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, pagination_ self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None self.start_time = config["start_time"] + self.exclude_fields = config['exclude_fields'] if 'exclude_fields' in config else [] @property def url_base(self) -> str: @@ -55,7 +56,8 @@ def name(self) -> str: regex = re.compile("[^a-zA-Z ]") s = regex.sub("", self.stream_name) s = s.strip() - return s if len(s) > 0 else self.form_id + # return s if len(s) > 0 else self.form_id + return self.form_id # State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'} @@ -107,6 +109,9 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp result = json_response.get("results") for record in result: + for to_remove_field in self.exclude_fields: + if to_remove_field in record: + record.pop(to_remove_field) yield record def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: @@ -173,14 +178,15 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Generate array of stream objects streams = [] for form_dict in key_list: - stream = KoboToolStream( - config=config, - form_id=form_dict["uid"], - schema=stream_json_schema, - name=form_dict["name"], - pagination_limit=self.PAGINATION_LIMIT, - auth_token=auth_token, - ) - streams.append(stream) - + if form_dict['has_deployment']: + stream = KoboToolStream( + config=config, + form_id=form_dict["uid"], + schema=stream_json_schema, + name=form_dict["name"], + pagination_limit=self.PAGINATION_LIMIT, + auth_token=auth_token, + ) + streams.append(stream) + return streams diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index b5b02099a73d..8701feb8ba3d 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -35,3 +35,8 @@ connectionSpecification: default: "2023-03-15T00:00:00" order: 4 pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}$ + exclude_fields: + type: array + title: Exclude Fields + description: Column names that you dont want to sync + order: 5 From 353c7f970979f91b04afce671da6578e562e3698 Mon Sep 17 00:00:00 2001 From: Ishan Date: Thu, 13 Jul 2023 10:07:21 +0530 Subject: [PATCH 25/50] change stream name --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 49567a6a528f..b0b03aff4a07 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -56,8 +56,7 @@ def name(self) -> str: regex = re.compile("[^a-zA-Z ]") s = regex.sub("", self.stream_name) s = s.strip() - # return s if len(s) > 0 else self.form_id - return self.form_id + return s if len(s) > 0 else self.form_id # State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'} From b4ce6a8027a07fa57acf91dc08094e8dc7ff7ed7 Mon Sep 17 00:00:00 2001 From: Ishan Date: Fri, 8 Sep 2023 12:36:16 +0530 Subject: [PATCH 26/50] added new base url for lahi --- .../connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 8701feb8ba3d..b2878a1f5271 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -27,6 +27,7 @@ connectionSpecification: enum: - https://kf.kobotoolbox.org - https://kobo.humanitarianresponse.info + - https://eu.kobotoolbox.org order: 3 start_time: type: string From ab0eff755294f5733cc8302ca98d75d29bb690c8 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Thu, 28 Sep 2023 07:16:56 +0530 Subject: [PATCH 27/50] return each record inside {data} --- .../source_kobotoolbox/source.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index b0b03aff4a07..276b3a44477a 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -24,10 +24,11 @@ "null", ] }, - "endtime": {"type": ["string", "null"]} + "_submission_time": {"type": ["string", "null"]}, }, } + class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" cursor_field = "_submission_time" @@ -40,11 +41,11 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, pagination_ self.auth_token = auth_token self.schema = schema self.stream_name = name - self.base_url = config['base_url'] + self.base_url = config["base_url"] self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None self.start_time = config["start_time"] - self.exclude_fields = config['exclude_fields'] if 'exclude_fields' in config else [] + self.exclude_fields = config["exclude_fields"] if "exclude_fields" in config else [] @property def url_base(self) -> str: @@ -58,7 +59,7 @@ def name(self) -> str: s = s.strip() return s if len(s) > 0 else self.form_id - # State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'} + # State will be a dict : {'_submission_time': '2023-03-15T00:00:00.000+05:30'} @property def state(self) -> Mapping[str, Any]: @@ -77,7 +78,6 @@ def get_json_schema(self): def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: - params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} params["query"] = json.dumps({self.cursor_field: {"$gte": self.state[self.cursor_field]}}) @@ -111,7 +111,9 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for to_remove_field in self.exclude_fields: if to_remove_field in record: record.pop(to_remove_field) - yield record + retval = {"_id": record["_id"], "data": record} + retval[self.cursor_field] = record[self.cursor_field] + yield retval def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: for record in super().read_records(*args, **kwargs): @@ -134,7 +136,7 @@ def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: return False, "password in credentials is not provided" return True, None - + def get_access_token(self, config) -> Tuple[str, any]: token_url = f"{config['base_url']}/token/?format=json" @@ -162,7 +164,6 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: return True, None def streams(self, config: Mapping[str, Any]) -> List[Stream]: - # Fetch all assets(forms) url = f"{config['base_url']}/api/v2/assets.json" response = requests.get(url, auth=(config["username"], config["password"])) @@ -177,7 +178,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Generate array of stream objects streams = [] for form_dict in key_list: - if form_dict['has_deployment']: + if form_dict["has_deployment"]: stream = KoboToolStream( config=config, form_id=form_dict["uid"], @@ -187,5 +188,5 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: auth_token=auth_token, ) streams.append(stream) - + return streams From b2dd317058c545c62fcce0be5b6920a54c94ef68 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Thu, 28 Sep 2023 11:00:26 +0530 Subject: [PATCH 28/50] upgrade version tag --- airbyte-integrations/connectors/source-kobotoolbox/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile index 88aad6a5fb05..55d96c315d18 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile +++ b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile @@ -31,5 +31,5 @@ COPY source_kobotoolbox ./source_kobotoolbox ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=airbyte/source-kobotoolbox From 95667cba342678134204bac967e4a1cab146b36a Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Fri, 29 Sep 2023 08:17:52 +0530 Subject: [PATCH 29/50] renamed image --- airbyte-integrations/connectors/source-kobotoolbox/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile index 55d96c315d18..4a255fc9d125 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile +++ b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile @@ -31,5 +31,5 @@ COPY source_kobotoolbox ./source_kobotoolbox ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.0 -LABEL io.airbyte.name=airbyte/source-kobotoolbox +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=tech4dev/source-kobotoolbox From 65ae3cff7082e55fe23f77eb3baa842445f3ac89 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Fri, 29 Sep 2023 08:18:05 +0530 Subject: [PATCH 30/50] put data into the stream schema --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 276b3a44477a..096a67181e59 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -24,6 +24,9 @@ "null", ] }, + "data": { + "type": "object", + }, "_submission_time": {"type": ["string", "null"]}, }, } From fa6911b9648ff2c3f042823cf1b82ab7aa8f8f22 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Sat, 30 Sep 2023 21:16:16 +0530 Subject: [PATCH 31/50] formatting only --- .../source_kobotoolbox/source.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index b0b03aff4a07..c14a7bb85e68 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -24,10 +24,11 @@ "null", ] }, - "endtime": {"type": ["string", "null"]} + "endtime": {"type": ["string", "null"]}, }, } + class KoboToolStream(HttpStream, IncrementalMixin): primary_key = "_id" cursor_field = "_submission_time" @@ -40,11 +41,11 @@ def __init__(self, config: Mapping[str, Any], form_id, schema, name, pagination_ self.auth_token = auth_token self.schema = schema self.stream_name = name - self.base_url = config['base_url'] + self.base_url = config["base_url"] self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None self.start_time = config["start_time"] - self.exclude_fields = config['exclude_fields'] if 'exclude_fields' in config else [] + self.exclude_fields = config["exclude_fields"] if "exclude_fields" in config else [] @property def url_base(self) -> str: @@ -77,7 +78,6 @@ def get_json_schema(self): def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: - params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} params["query"] = json.dumps({self.cursor_field: {"$gte": self.state[self.cursor_field]}}) @@ -134,7 +134,7 @@ def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: return False, "password in credentials is not provided" return True, None - + def get_access_token(self, config) -> Tuple[str, any]: token_url = f"{config['base_url']}/token/?format=json" @@ -162,7 +162,6 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: return True, None def streams(self, config: Mapping[str, Any]) -> List[Stream]: - # Fetch all assets(forms) url = f"{config['base_url']}/api/v2/assets.json" response = requests.get(url, auth=(config["username"], config["password"])) @@ -177,7 +176,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Generate array of stream objects streams = [] for form_dict in key_list: - if form_dict['has_deployment']: + if form_dict["has_deployment"]: stream = KoboToolStream( config=config, form_id=form_dict["uid"], @@ -187,5 +186,5 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: auth_token=auth_token, ) streams.append(stream) - + return streams From cc1c3b67d51e856a12163050744c3034f790c177 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Thu, 30 Nov 2023 10:51:24 +0530 Subject: [PATCH 32/50] KoboStreamEndTime and KoboStreamSubmissionTime classes --- .../source_kobotoolbox/source.py | 190 +++++++++++++----- .../source_kobotoolbox/spec.yaml | 13 +- 2 files changed, 147 insertions(+), 56 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 096a67181e59..629d753a5537 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -6,7 +6,9 @@ import json import re from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +from abc import ABC, abstractmethod from urllib.parse import parse_qsl, urlparse +from datetime import datetime, timedelta import requests from airbyte_cdk.sources import AbstractSource @@ -27,62 +29,78 @@ "data": { "type": "object", }, + "endtime": {"type": ["string", "null"]}, "_submission_time": {"type": ["string", "null"]}, }, } -class KoboToolStream(HttpStream, IncrementalMixin): +# pylint:disable=too-many-instance-attributes +class KoboToolStream(HttpStream, IncrementalMixin, ABC): + """Each Kobo form is a stream""" + primary_key = "_id" - cursor_field = "_submission_time" + # cursor_field = "_submission_time" # submission_date_format = "%Y-%m-%dT%H:%M:%S" # end_time_format = "%Y-%m-%dT%H:%M:%S.%.3f%z" - def __init__(self, config: Mapping[str, Any], form_id, schema, name, pagination_limit, auth_token, **kwargs): + def __init__( + self, + config: Mapping[str, Any], + form_id: str, + schema: dict, + name: str, + pagination_limit: int, + auth_token: str, + **kwargs, + ): + """constructor""" super().__init__() self.form_id = form_id self.auth_token = auth_token self.schema = schema self.stream_name = name self.base_url = config["base_url"] + # pylint:disable=invalid-name self.PAGINATION_LIMIT = pagination_limit self._cursor_value = None self.start_time = config["start_time"] + self.max_days_to_close = config.get("max_days_to_close", 30) self.exclude_fields = config["exclude_fields"] if "exclude_fields" in config else [] @property def url_base(self) -> str: + """base url for all http requests for kobo forms""" return f"{self.base_url}/api/v2/assets/{self.form_id}/" @property def name(self) -> str: - # Return the english substring as stream name. If not found return form uid + """Return the english substring as stream name. If not found return form uid""" regex = re.compile("[^a-zA-Z ]") s = regex.sub("", self.stream_name) s = s.strip() return s if len(s) > 0 else self.form_id - # State will be a dict : {'_submission_time': '2023-03-15T00:00:00.000+05:30'} - - @property - def state(self) -> Mapping[str, Any]: - if self._cursor_value: - return {self.cursor_field: self._cursor_value} - else: - return {self.cursor_field: self.start_time} - - @state.setter - def state(self, value: Mapping[str, Any]): - self._cursor_value = value[self.cursor_field] - def get_json_schema(self): + """airbyte needs this function""" return self.schema + @abstractmethod + def mk_query(self): + """abstract method""" + def request_params( - self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + self, + stream_state: Mapping[str, Any], # pylint:disable=unused-argument + stream_slice: Mapping[str, any] = None, # pylint:disable=unused-argument + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: + """build the query request params""" params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} - params["query"] = json.dumps({self.cursor_field: {"$gte": self.state[self.cursor_field]}}) + + query = self.mk_query() + + params["query"] = json.dumps(query) if next_page_token: params.update(next_page_token) @@ -90,6 +108,7 @@ def request_params( return params def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """pagination""" json_response: Mapping[str, str] = response.json() next_url = json_response.get("next") params = None @@ -98,15 +117,21 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, params = dict(parse_qsl(parsed_url.query)) return params - def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: # pylint:disable=unused-argument + """airbyte needs this function""" return "data.json" def request_headers( - self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + self, + stream_state: Mapping[str, Any], # pylint:disable=unused-argument + stream_slice: Mapping[str, Any] = None, # pylint:disable=unused-argument + next_page_token: Mapping[str, Any] = None, # pylint:disable=unused-argument ) -> Mapping[str, Any]: + """build the request headers""" return {"Authorization": "Token " + self.auth_token} def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """parse the response and yield the records""" json_response = response.json() result = json_response.get("results") @@ -115,49 +140,93 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp if to_remove_field in record: record.pop(to_remove_field) retval = {"_id": record["_id"], "data": record} - retval[self.cursor_field] = record[self.cursor_field] + retval["_submission_time"] = record["_submission_time"] + retval["endtime"] = record.get("endtime") yield retval def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: + """read the records from the stream""" for record in super().read_records(*args, **kwargs): self._cursor_value = record[self.cursor_field] yield record +class KoboStreamSubmissionTime(KoboToolStream): + """KoboStreamSubmissionTime""" + + cursor_field = "_submission_time" + + @property + def state(self) -> Mapping[str, Any]: + """State will be a dict : {'_submission_time': '2023-03-15T00:00:00.000+05:30'}""" + if self._cursor_value: + return {"_submission_time": self._cursor_value} + + return {"_submission_time": self.start_time} + + @state.setter + def state(self, value: Mapping[str, Any]): + """setter for state""" + self._cursor_value = value["_submission_time"] + + def mk_query(self): + """query using _submittion_time""" + return {"_submission_time": {"$gte": self.state["_submission_time"]}} + + +class KoboStreamEndTime(KoboToolStream): + """KoboStreamEndTime""" + + cursor_field = "endtime" + + @property + def state(self) -> Mapping[str, Any]: + """State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'}""" + if self._cursor_value: + return {"endtime": self._cursor_value} + + return {"endtime": self.start_time} + + @state.setter + def state(self, value: Mapping[str, Any]): + """setter for state""" + self._cursor_value = value["endtime"] + + def mk_query(self): + """query using endtime""" + start_sub_time = datetime.fromisoformat(self.state["endtime"]) + start_sub_time -= timedelta(days=self.max_days_to_close) + return {"_submission_time": {"$gte": start_sub_time.isoformat()}, "endtime": {"$gte": self.state["endtime"]}} + + class SourceKobotoolbox(AbstractSource): + """One instance per sync""" + # API_URL = "https://kf.kobotoolbox.org/api/v2" # TOKEN_URL = "https://kf.kobotoolbox.org/token/?format=json" PAGINATION_LIMIT = 30000 - @classmethod - def _check_credentials(cls, config: Mapping[str, Any]) -> Tuple[bool, Any]: - # check if the credentials are provided correctly, because for now these value are not required in spec - if not config.get("username"): - return False, "username in credentials is not provided" - - if not config.get("password"): - return False, "password in credentials is not provided" - - return True, None - def get_access_token(self, config) -> Tuple[str, any]: + """get the access token for the given credentials""" token_url = f"{config['base_url']}/token/?format=json" - + auth = (config["username"], config["password"]) try: - response = requests.post(token_url, auth=(config["username"], config["password"])) + response = requests.post(token_url, auth=auth, timeout=30) response.raise_for_status() - json_response = response.json() - return (json_response.get("token", None), None) if json_response is not None else (None, None) - except requests.exceptions.RequestException as e: - return None, e + except requests.exceptions.RequestException: + return None, "error" + + json_response = response.json() + if json_response is not None: + return json_response.get("token"), None - def check_connection(self, logger, config) -> Tuple[bool, any]: - is_valid_credentials, msg = self._check_credentials(config) - if not is_valid_credentials: - return is_valid_credentials, msg + return None, "error" + def check_connection(self, logger, config) -> Tuple[bool, any]: # pylint:disable=unused-argument + """check the connection with the credentials provided""" url = f"{config['base_url']}/api/v2/assets.json" - response = requests.get(url, auth=(config["username"], config["password"])) + auth = (config["username"], config["password"]) + response = requests.get(url, auth=auth, timeout=30) try: response.raise_for_status() @@ -167,14 +236,15 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: return True, None def streams(self, config: Mapping[str, Any]) -> List[Stream]: - # Fetch all assets(forms) + """Fetch all assets(forms)""" url = f"{config['base_url']}/api/v2/assets.json" - response = requests.get(url, auth=(config["username"], config["password"])) + auth = (config["username"], config["password"]) + response = requests.get(url, auth=auth, timeout=30) json_response = response.json() key_list = json_response.get("results") # Generate a auth token for all streams - auth_token, msg = self.get_access_token(config) + auth_token, msg = self.get_access_token(config) # pylint:disable=unused-variable if auth_token is None: return [] @@ -182,14 +252,24 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams = [] for form_dict in key_list: if form_dict["has_deployment"]: - stream = KoboToolStream( - config=config, - form_id=form_dict["uid"], - schema=stream_json_schema, - name=form_dict["name"], - pagination_limit=self.PAGINATION_LIMIT, - auth_token=auth_token, - ) + if form_dict["name"] in config["forms_using_endtime"]: + stream = KoboStreamEndTime( + config=config, + form_id=form_dict["uid"], + schema=stream_json_schema, + name=form_dict["name"], + pagination_limit=self.PAGINATION_LIMIT, + auth_token=auth_token, + ) + else: + stream = KoboStreamSubmissionTime( + config=config, + form_id=form_dict["uid"], + schema=stream_json_schema, + name=form_dict["name"], + pagination_limit=self.PAGINATION_LIMIT, + auth_token=auth_token, + ) streams.append(stream) return streams diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index b2878a1f5271..1c742d0f7e64 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -39,5 +39,16 @@ connectionSpecification: exclude_fields: type: array title: Exclude Fields - description: Column names that you dont want to sync + description: Column names not to sync order: 5 + forms_using_endtime: + type: array + title: Forms Using Endtime + description: List of forms that use endtime instead of submission time + order: 6 + max_days_to_close: + type: integer + title: Max Days To Close + description: The maximum number of days between a form's submission date and end date, for those forms listed above + default: 30 + order: 7 \ No newline at end of file From 25e3d5a43c9fac4900653cd62f17076d5b565fe5 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Sat, 2 Dec 2023 08:49:49 +0530 Subject: [PATCH 33/50] refactored --- .../source_kobotoolbox/source.py | 61 ++++++------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 629d753a5537..85e993e4db0b 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -40,9 +40,6 @@ class KoboToolStream(HttpStream, IncrementalMixin, ABC): """Each Kobo form is a stream""" primary_key = "_id" - # cursor_field = "_submission_time" - # submission_date_format = "%Y-%m-%dT%H:%M:%S" - # end_time_format = "%Y-%m-%dT%H:%M:%S.%.3f%z" def __init__( self, @@ -85,9 +82,27 @@ def get_json_schema(self): """airbyte needs this function""" return self.schema - @abstractmethod + @property + def state(self) -> Mapping[str, Any]: + """State will be a dict : {cursor_field: '2023-03-15T00:00:00.000+05:30'}""" + if self._cursor_value: + return {self.cursor_field: self._cursor_value} + + return {self.cursor_field: self.start_time} + + @state.setter + def state(self, value: Mapping[str, Any]): + """setter for state""" + self._cursor_value = value[self.cursor_field] + def mk_query(self): - """abstract method""" + """query using endtime""" + if self.cursor_field == "_submission_time": + return {self.cursor_field: {"$gte": self.state[self.cursor_field]}} + else: + start_sub_time = datetime.fromisoformat(self.state[self.cursor_field]) + start_sub_time -= timedelta(days=self.max_days_to_close) + return {"_submission_time": {"$gte": start_sub_time.isoformat()}, self.cursor_field: {"$gte": self.state[self.cursor_field]}} def request_params( self, @@ -156,48 +171,12 @@ class KoboStreamSubmissionTime(KoboToolStream): cursor_field = "_submission_time" - @property - def state(self) -> Mapping[str, Any]: - """State will be a dict : {'_submission_time': '2023-03-15T00:00:00.000+05:30'}""" - if self._cursor_value: - return {"_submission_time": self._cursor_value} - - return {"_submission_time": self.start_time} - - @state.setter - def state(self, value: Mapping[str, Any]): - """setter for state""" - self._cursor_value = value["_submission_time"] - - def mk_query(self): - """query using _submittion_time""" - return {"_submission_time": {"$gte": self.state["_submission_time"]}} - class KoboStreamEndTime(KoboToolStream): """KoboStreamEndTime""" cursor_field = "endtime" - @property - def state(self) -> Mapping[str, Any]: - """State will be a dict : {'endtime': '2023-03-15T00:00:00.000+05:30'}""" - if self._cursor_value: - return {"endtime": self._cursor_value} - - return {"endtime": self.start_time} - - @state.setter - def state(self, value: Mapping[str, Any]): - """setter for state""" - self._cursor_value = value["endtime"] - - def mk_query(self): - """query using endtime""" - start_sub_time = datetime.fromisoformat(self.state["endtime"]) - start_sub_time -= timedelta(days=self.max_days_to_close) - return {"_submission_time": {"$gte": start_sub_time.isoformat()}, "endtime": {"$gte": self.state["endtime"]}} - class SourceKobotoolbox(AbstractSource): """One instance per sync""" From 62f51393a7eaf6b177599deab52c6d0412b2bc39 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Mon, 4 Dec 2023 08:15:48 +0530 Subject: [PATCH 34/50] remove unused import --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 85e993e4db0b..abc7e9f26fb6 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -6,7 +6,7 @@ import json import re from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple -from abc import ABC, abstractmethod +from abc import ABC from urllib.parse import parse_qsl, urlparse from datetime import datetime, timedelta From 39170a04fdb799237c427e559436548144cba73b Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Mon, 4 Dec 2023 15:59:44 +0530 Subject: [PATCH 35/50] ensure that "forms_using_endtime" is in the config --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index abc7e9f26fb6..bfaf4c5b6d31 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -231,7 +231,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams = [] for form_dict in key_list: if form_dict["has_deployment"]: - if form_dict["name"] in config["forms_using_endtime"]: + if "forms_using_endtime" in config and form_dict["name"] in config["forms_using_endtime"]: stream = KoboStreamEndTime( config=config, form_id=form_dict["uid"], From e35a651cf735720602e6c7df98b6ce014f378a0b Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Mon, 4 Dec 2023 18:32:00 +0530 Subject: [PATCH 36/50] tz calculations don't start the sub_time window earlier than start time! --- .../source_kobotoolbox/source.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index bfaf4c5b6d31..a32d0b5746f8 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -102,6 +102,16 @@ def mk_query(self): else: start_sub_time = datetime.fromisoformat(self.state[self.cursor_field]) start_sub_time -= timedelta(days=self.max_days_to_close) + from datetime import timezone + + tzaware_start_time = datetime.fromisoformat(self.start_time) + if tzaware_start_time.tzinfo is None: + # interpret as utc + tzaware_start_time = tzaware_start_time.replace(tzinfo=timezone.utc) + else: + # convert to utc if necessary + tzaware_start_time = tzaware_start_time.astimezone(timezone.utc) + start_sub_time = max(start_sub_time, tzaware_start_time) return {"_submission_time": {"$gte": start_sub_time.isoformat()}, self.cursor_field: {"$gte": self.state[self.cursor_field]}} def request_params( @@ -157,6 +167,12 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp retval = {"_id": record["_id"], "data": record} retval["_submission_time"] = record["_submission_time"] retval["endtime"] = record.get("endtime") + if retval["endtime"]: + from datetime import timezone + + # endtime is in utc + endtime = datetime.fromisoformat(retval["endtime"]).astimezone(timezone.utc) + retval["endtime"] = endtime.isoformat() yield retval def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: From c703b8871f0eb7bc561c8600910e83a07cf32fe8 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Mon, 4 Dec 2023 18:37:19 +0530 Subject: [PATCH 37/50] helper to set tzinfo --- .../source_kobotoolbox/source.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index a32d0b5746f8..692e4b6bcb98 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -95,6 +95,17 @@ def state(self, value: Mapping[str, Any]): """setter for state""" self._cursor_value = value[self.cursor_field] + def mk_tzaware_utc(self, dt): + """ + add a utc-tzinfo object to the dt if it doesn't have tzinfo + if it has a tzinfo, convert to utc + """ + from datetime import timezone + + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc) + def mk_query(self): """query using endtime""" if self.cursor_field == "_submission_time": @@ -102,15 +113,8 @@ def mk_query(self): else: start_sub_time = datetime.fromisoformat(self.state[self.cursor_field]) start_sub_time -= timedelta(days=self.max_days_to_close) - from datetime import timezone - - tzaware_start_time = datetime.fromisoformat(self.start_time) - if tzaware_start_time.tzinfo is None: - # interpret as utc - tzaware_start_time = tzaware_start_time.replace(tzinfo=timezone.utc) - else: - # convert to utc if necessary - tzaware_start_time = tzaware_start_time.astimezone(timezone.utc) + start_sub_time = self.mk_tzaware_utc(start_sub_time) + tzaware_start_time = self.mk_tzaware_utc(datetime.fromisoformat(self.start_time)) start_sub_time = max(start_sub_time, tzaware_start_time) return {"_submission_time": {"$gte": start_sub_time.isoformat()}, self.cursor_field: {"$gte": self.state[self.cursor_field]}} @@ -168,10 +172,8 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp retval["_submission_time"] = record["_submission_time"] retval["endtime"] = record.get("endtime") if retval["endtime"]: - from datetime import timezone - # endtime is in utc - endtime = datetime.fromisoformat(retval["endtime"]).astimezone(timezone.utc) + endtime = self.mk_tzaware_utc(datetime.fromisoformat(retval["endtime"])) retval["endtime"] = endtime.isoformat() yield retval From ee5d3883e8ed89c4f243f9be2f52f3981b967e4b Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Wed, 6 Dec 2023 12:29:01 +0530 Subject: [PATCH 38/50] use "end" as a cursor --- .../source_kobotoolbox/source.py | 15 +++++++++++++++ .../source_kobotoolbox/spec.yaml | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 692e4b6bcb98..bee066981cd4 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -196,6 +196,12 @@ class KoboStreamEndTime(KoboToolStream): cursor_field = "endtime" +class KoboStreamEnd(KoboToolStream): + """KoboStreamEnd""" + + cursor_field = "end" + + class SourceKobotoolbox(AbstractSource): """One instance per sync""" @@ -258,6 +264,15 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: pagination_limit=self.PAGINATION_LIMIT, auth_token=auth_token, ) + elif "forms_using_end" in config and form_dict["name"] in config["forms_using_end"]: + stream = KoboStreamEnd( + config=config, + form_id=form_dict["uid"], + schema=stream_json_schema, + name=form_dict["name"], + pagination_limit=self.PAGINATION_LIMIT, + auth_token=auth_token, + ) else: stream = KoboStreamSubmissionTime( config=config, diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml index 1c742d0f7e64..a0404d2e3cf8 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/spec.yaml @@ -46,6 +46,11 @@ connectionSpecification: title: Forms Using Endtime description: List of forms that use endtime instead of submission time order: 6 + forms_using_end: + type: array + title: Forms Using End + description: List of forms that use end instead of submission time + order: 6 max_days_to_close: type: integer title: Max Days To Close From 575bb721d154b619c9ad1d456f89073fb8dc5068 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Wed, 6 Dec 2023 13:23:43 +0530 Subject: [PATCH 39/50] added "end" to the json schema --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index bee066981cd4..ffddd97ea237 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -30,6 +30,7 @@ "type": "object", }, "endtime": {"type": ["string", "null"]}, + "end": {"type": ["string", "null"]}, "_submission_time": {"type": ["string", "null"]}, }, } From a613ac896221e0f89fe236d467095acb033d99ac Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Wed, 6 Dec 2023 16:28:34 +0530 Subject: [PATCH 40/50] put end into the record --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index ffddd97ea237..4d590bc988b7 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -172,6 +172,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp retval = {"_id": record["_id"], "data": record} retval["_submission_time"] = record["_submission_time"] retval["endtime"] = record.get("endtime") + retval["end"] = record.get("end") if retval["endtime"]: # endtime is in utc endtime = self.mk_tzaware_utc(datetime.fromisoformat(retval["endtime"])) From 216e56926bbe85a948a811cc61c6aa663ae3c345 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Thu, 7 Dec 2023 16:28:48 +0530 Subject: [PATCH 41/50] bump version number to 0.2.0 --- airbyte-integrations/connectors/source-kobotoolbox/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile index 4a255fc9d125..3178d789c975 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile +++ b/airbyte-integrations/connectors/source-kobotoolbox/Dockerfile @@ -31,5 +31,5 @@ COPY source_kobotoolbox ./source_kobotoolbox ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=tech4dev/source-kobotoolbox From 46bb88b7faaddce377ae3a79855de420e37ec40d Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Thu, 30 May 2024 22:35:13 +0530 Subject: [PATCH 42/50] check if state contains a cursor --- .../connectors/source-kobotoolbox/source_kobotoolbox/source.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index 4d590bc988b7..a8242f4ebdd2 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -94,7 +94,8 @@ def state(self) -> Mapping[str, Any]: @state.setter def state(self, value: Mapping[str, Any]): """setter for state""" - self._cursor_value = value[self.cursor_field] + if self.cursor_field in value: + self._cursor_value = value[self.cursor_field] def mk_tzaware_utc(self, dt): """ From e51017b39f0c89c1b7c32f6ab243a62e18b816c4 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Sun, 15 Sep 2024 05:44:23 +0530 Subject: [PATCH 43/50] don't update the cursor in read_records unless the sync mode is incremental (and other formatting changes) --- .../source_kobotoolbox/source.py | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py index a8242f4ebdd2..5d377a5342f0 100644 --- a/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py +++ b/airbyte-integrations/connectors/source-kobotoolbox/source_kobotoolbox/source.py @@ -8,12 +8,13 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple from abc import ABC from urllib.parse import parse_qsl, urlparse -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import requests from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import IncrementalMixin, Stream from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.models import SyncMode stream_json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", @@ -86,10 +87,14 @@ def get_json_schema(self): @property def state(self) -> Mapping[str, Any]: """State will be a dict : {cursor_field: '2023-03-15T00:00:00.000+05:30'}""" + retval = {} + if self._cursor_value: - return {self.cursor_field: self._cursor_value} + retval[self.cursor_field] = self._cursor_value + else: + retval[self.cursor_field] = self.start_time - return {self.cursor_field: self.start_time} + return retval @state.setter def state(self, value: Mapping[str, Any]): @@ -97,28 +102,30 @@ def state(self, value: Mapping[str, Any]): if self.cursor_field in value: self._cursor_value = value[self.cursor_field] - def mk_tzaware_utc(self, dt): + def mk_tzaware_utc(self, dt: datetime): """ add a utc-tzinfo object to the dt if it doesn't have tzinfo if it has a tzinfo, convert to utc """ - from datetime import timezone - if dt.tzinfo is None: return dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc) def mk_query(self): """query using endtime""" + retval = {} if self.cursor_field == "_submission_time": - return {self.cursor_field: {"$gte": self.state[self.cursor_field]}} + retval[self.cursor_field] = {"$gte": self.state[self.cursor_field]} + else: start_sub_time = datetime.fromisoformat(self.state[self.cursor_field]) start_sub_time -= timedelta(days=self.max_days_to_close) start_sub_time = self.mk_tzaware_utc(start_sub_time) tzaware_start_time = self.mk_tzaware_utc(datetime.fromisoformat(self.start_time)) start_sub_time = max(start_sub_time, tzaware_start_time) - return {"_submission_time": {"$gte": start_sub_time.isoformat()}, self.cursor_field: {"$gte": self.state[self.cursor_field]}} + retval[self.cursor_field] = {"$gte": self.state[self.cursor_field]} + retval["_submission_time"] = {"$gte": start_sub_time.isoformat()} + return retval def request_params( self, @@ -127,7 +134,9 @@ def request_params( next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: """build the query request params""" - params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps({self.cursor_field: 1})} + sort_params = {} + sort_params[self.cursor_field] = 1 + params = {"start": 0, "limit": self.PAGINATION_LIMIT, "sort": json.dumps(sort_params)} query = self.mk_query() @@ -180,11 +189,19 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp retval["endtime"] = endtime.isoformat() yield retval - def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, + **kwargs, + ) -> Iterable[Mapping[str, Any]]: """read the records from the stream""" - for record in super().read_records(*args, **kwargs): - self._cursor_value = record[self.cursor_field] + for record in super().read_records(sync_mode, cursor_field, stream_slice, stream_state, **kwargs): yield record + if sync_mode == SyncMode.incremental: + self._cursor_value = max(record[self.cursor_field], self._cursor_value) if self._cursor_value else record[self.cursor_field] class KoboStreamSubmissionTime(KoboToolStream): From ac948e8ab7eceedafad08e7a2f4c6c080ec25784 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Sat, 17 Aug 2024 19:21:01 +0530 Subject: [PATCH 44/50] check for cursor in state in case state is empty also SurveyStream needs to define its form_id property before calling super.init --- .../connectors/source-surveycto/source_surveycto/source.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-surveycto/source_surveycto/source.py b/airbyte-integrations/connectors/source-surveycto/source_surveycto/source.py index 67d27eec9900..2a78eb9b6569 100644 --- a/airbyte-integrations/connectors/source-surveycto/source_surveycto/source.py +++ b/airbyte-integrations/connectors/source-surveycto/source_surveycto/source.py @@ -34,10 +34,12 @@ }, } + class SurveyStream(HttpStream, ABC): transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) def __init__(self, config: Mapping[str, Any], form_id, schema, **kwargs): + self.form_id = None super().__init__() self.config = config @@ -76,7 +78,8 @@ def state(self) -> Mapping[str, Any]: @state.setter def state(self, value: Mapping[str, Any]): - self._cursor_value = value[self.cursor_field] + if self.cursor_field in value: + self._cursor_value = value[self.cursor_field] @property def name(self) -> str: From b7d5c1368cb7a325de754bb5811b7e69642b7d44 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Tue, 27 Aug 2024 17:21:16 +0530 Subject: [PATCH 45/50] step through tenant expenses month-wise --- .../source_mgramseva/source.py | 88 ++++++++++++++----- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 6b2f4ebbfdd8..4a3d8d78361a 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -8,7 +8,7 @@ import base64 import hashlib -from datetime import datetime +from datetime import datetime, timedelta from logging import Logger from dateutil.relativedelta import relativedelta import requests @@ -19,6 +19,8 @@ from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.core import StreamData +pytz.IST = pytz.timezone("Asia/Kolkata") + # Basic full refresh stream class MgramsevaStream(HttpStream, ABC): @@ -99,6 +101,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp """ :return an iterable containing each record in the response """ + self.logger.info(response.json()) return map(lambda x: {"data": x, "id": x["id"]}, response.json()[self.response_key]) @@ -106,16 +109,58 @@ class MgramsevaDemands(MgramsevaStream): """object for consumer demands""" def __init__( - self, headers: dict, request_info: dict, user_request: dict, tenantid: str, start_date: datetime, end_date: datetime, **kwargs + self, headers: dict, request_info: dict, user_request: dict, tenantid: str, fromdate: datetime, todate: datetime, **kwargs ): - """specify endpoint for demands and call super""" - params = { - "tenantId": tenantid, - "businessService": "WS", - "periodFrom": int(1000 * start_date.timestamp()), - "periodTo": int(1000 * end_date.timestamp()), - } - super().__init__("billing-service/demand/_search", headers, request_info, user_request, params, "Demands", **kwargs) + """ctor""" + self.headers = headers + self.request_info = request_info + self.user_request = user_request + self.tenantid = tenantid + self.fromdate = fromdate + self.todate = todate + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, + ) -> Iterable[StreamData]: + """override""" + + # ==================================================================================== + # params = { + # "tenantId": self.tenantid, + # "businessService": "WS", + # "periodFrom": int(1000 * self.fromdate.timestamp()), + # "periodTo": int(1000 * self.todate.timestamp()), + # } + # stream = MgramsevaStream("billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, "Demands") + # yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) + # ==================================================================================== + + month_start = self.fromdate.replace(day=1) + + while month_start < self.todate: + + next_month_start = month_start + relativedelta(months=1) + if next_month_start > self.todate: + next_month_start = self.todate + + params = { + "tenantId": self.tenantid, + "businessService": "WS", + "periodFrom": int(1000 * month_start.timestamp()), + "periodTo": int(1000 * next_month_start.timestamp()), + } + self.logger.info(params) + + stream = MgramsevaStream( + "billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, "Demands" + ) + yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) + + month_start = next_month_start class MgramsevaBills(MgramsevaStream): @@ -160,18 +205,18 @@ def __init__( user_request: dict, tenantid: str, month_start: datetime, - next_month_start: datetime, + month_end: datetime, response_key: str, **kwargs, ): """call super""" self.tenantid = tenantid self.month_start = month_start - self.next_month_start = next_month_start + self.month_end = month_end params = { "tenantId": self.tenantid, "fromDate": int(month_start.timestamp() * 1000), - "toDate": int(next_month_start.timestamp() * 1000), + "toDate": int(month_end.timestamp() * 1000), } super().__init__(endpoint, headers, request_info, user_request, params, response_key, **kwargs) @@ -182,7 +227,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp expenses = response.json()[self.response_key] expenses["tenantId"] = self.tenantid expenses["fromDate"] = self.month_start.strftime("%Y-%m-%d") - expenses["toDate"] = self.next_month_start.strftime("%Y-%m-%d") + expenses["toDate"] = self.month_end.strftime("%Y-%m-%d") combined_string = f"{self.tenantid}{expenses['fromDate']}{expenses['toDate']}" id_hash = hashlib.sha256(combined_string.encode()) return [{"data": expenses, "id": id_hash.hexdigest()}] @@ -219,7 +264,7 @@ def read_records( while month_start < self.todate: - next_month_start = month_start + relativedelta(months=1) + next_month_start = month_start + relativedelta(months=1) - timedelta(milliseconds=1) stream = MgramsevaTenantExpense( "echallan-services/eChallan/v1/_expenseDashboard", @@ -332,22 +377,23 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # tenant_expenses_from = datetime.strptime(config.get("tenant_expenses_from", "2022-01-01"), "%Y-%m-%d") # tenant_expenses_to = datetime.strptime(config.get("tenant_expenses_to", "2022-01-01"), "%Y-%m-%d") - start_date = datetime.strptime(config.get("start_date", "2022-01-01"), "%Y-%m-%d").replace(tzinfo=pytz.UTC) - end_date = datetime.today().replace(tzinfo=pytz.UTC) + start_date = datetime.strptime(config.get("start_date", "2022-01-01"), "%Y-%m-%d") + start_date = pytz.IST.localize(start_date).astimezone(pytz.utc) + end_date = datetime.today() + end_date = pytz.IST.localize(end_date).astimezone(pytz.utc) for tenantid in self.config["tenantids"]: # Generate streams for each object type streams = [ MgramsevaPayments(self.headers, self.request_info, self.user_request, tenantid), MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), + MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), ] - demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date) - streams.append(demand_stream) - # and now we need bills for each consumer consumer_codes = set() - for demand in demand_stream.read_records(SyncMode.full_refresh): + tmp_demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date) + for demand in tmp_demand_stream.read_records(SyncMode.full_refresh): consumer_codes.add(demand["data"]["consumerCode"]) streams.append(MgramsevaBills(self.headers, self.request_info, self.user_request, tenantid, list(consumer_codes))) From 2482a95b860419537f173907c3d7466eb7e6b41b Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Tue, 27 Aug 2024 22:58:47 +0530 Subject: [PATCH 46/50] put the demandDate into the demands --- .../source_mgramseva/source.py | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 4a3d8d78361a..7d83b2d6ca34 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -22,6 +22,11 @@ pytz.IST = pytz.timezone("Asia/Kolkata") +def convert_to_date(x: int) -> datetime: + """convert a timestamp to a date""" + return datetime.fromtimestamp(x / 1000, pytz.UTC).astimezone(pytz.IST) + + # Basic full refresh stream class MgramsevaStream(HttpStream, ABC): """Base for all objects""" @@ -105,6 +110,30 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp return map(lambda x: {"data": x, "id": x["id"]}, response.json()[self.response_key]) +class MgramsevaDemand(MgramsevaStream): + """object for a single demand""" + + def __init__( + self, + endpoint: str, + headers: dict, + request_info: dict, + user_request: dict, + params: dict, + response_key: str, + **kwargs, + ): + """call super""" + super().__init__(endpoint, headers, request_info, user_request, params, response_key, **kwargs) + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """include the bill date""" + demands = response.json()[self.response_key] + for demand in demands: + demand["demandDate"] = convert_to_date(demand["taxPeriodTo"]).strftime("%Y-%m-%d") + return map(lambda x: {"data": x, "id": x["id"]}, demands) + + class MgramsevaDemands(MgramsevaStream): """object for consumer demands""" @@ -128,34 +157,20 @@ def read_records( ) -> Iterable[StreamData]: """override""" - # ==================================================================================== - # params = { - # "tenantId": self.tenantid, - # "businessService": "WS", - # "periodFrom": int(1000 * self.fromdate.timestamp()), - # "periodTo": int(1000 * self.todate.timestamp()), - # } - # stream = MgramsevaStream("billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, "Demands") - # yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) - # ==================================================================================== - - month_start = self.fromdate.replace(day=1) + month_start = self.fromdate while month_start < self.todate: next_month_start = month_start + relativedelta(months=1) - if next_month_start > self.todate: - next_month_start = self.todate params = { "tenantId": self.tenantid, "businessService": "WS", "periodFrom": int(1000 * month_start.timestamp()), - "periodTo": int(1000 * next_month_start.timestamp()), + "periodTo": int(1000 * (next_month_start - timedelta(milliseconds=1)).timestamp()), } - self.logger.info(params) - stream = MgramsevaStream( + stream = MgramsevaDemand( "billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, "Demands" ) yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) @@ -378,7 +393,9 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # tenant_expenses_to = datetime.strptime(config.get("tenant_expenses_to", "2022-01-01"), "%Y-%m-%d") start_date = datetime.strptime(config.get("start_date", "2022-01-01"), "%Y-%m-%d") + start_date_month_start = start_date.replace(day=1) start_date = pytz.IST.localize(start_date).astimezone(pytz.utc) + start_date_month_start = pytz.IST.localize(start_date_month_start).astimezone(pytz.utc) end_date = datetime.today() end_date = pytz.IST.localize(end_date).astimezone(pytz.utc) @@ -387,12 +404,14 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams = [ MgramsevaPayments(self.headers, self.request_info, self.user_request, tenantid), MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), - MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), + MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date_month_start, end_date), ] # and now we need bills for each consumer consumer_codes = set() - tmp_demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date) + tmp_demand_stream = MgramsevaDemands( + self.headers, self.request_info, self.user_request, tenantid, start_date_month_start, end_date + ) for demand in tmp_demand_stream.read_records(SyncMode.full_refresh): consumer_codes.add(demand["data"]["consumerCode"]) From 16de8144e9b1bdbf8b6934880c9e51ad6d3fb5f0 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Wed, 28 Aug 2024 20:22:32 +0530 Subject: [PATCH 47/50] rolled back the attempt at fetching demands month-wise --- .../source_mgramseva/source.py | 78 ++++--------------- 1 file changed, 13 insertions(+), 65 deletions(-) diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 7d83b2d6ca34..5e5637bb6b81 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -110,74 +110,26 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp return map(lambda x: {"data": x, "id": x["id"]}, response.json()[self.response_key]) -class MgramsevaDemand(MgramsevaStream): - """object for a single demand""" +class MgramsevaDemands(MgramsevaStream): + """object for consumer demands""" - def __init__( - self, - endpoint: str, - headers: dict, - request_info: dict, - user_request: dict, - params: dict, - response_key: str, - **kwargs, - ): - """call super""" - super().__init__(endpoint, headers, request_info, user_request, params, response_key, **kwargs) + def __init__(self, headers: dict, request_info: dict, user_request: dict, tenantid: str, **kwargs): + """ctor""" + params = { + "tenantId": tenantid, + "businessService": "WS", + } + super().__init__("billing-service/demand/_search", headers, request_info, user_request, params, "Demands") def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """include the bill date""" demands = response.json()[self.response_key] for demand in demands: - demand["demandDate"] = convert_to_date(demand["taxPeriodTo"]).strftime("%Y-%m-%d") + demand["demandFromDate"] = convert_to_date(demand["taxPeriodFrom"]).strftime("%Y-%m-%d") + demand["demandToDate"] = convert_to_date(demand["taxPeriodTo"]).strftime("%Y-%m-%d") return map(lambda x: {"data": x, "id": x["id"]}, demands) -class MgramsevaDemands(MgramsevaStream): - """object for consumer demands""" - - def __init__( - self, headers: dict, request_info: dict, user_request: dict, tenantid: str, fromdate: datetime, todate: datetime, **kwargs - ): - """ctor""" - self.headers = headers - self.request_info = request_info - self.user_request = user_request - self.tenantid = tenantid - self.fromdate = fromdate - self.todate = todate - - def read_records( - self, - sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[StreamData]: - """override""" - - month_start = self.fromdate - - while month_start < self.todate: - - next_month_start = month_start + relativedelta(months=1) - - params = { - "tenantId": self.tenantid, - "businessService": "WS", - "periodFrom": int(1000 * month_start.timestamp()), - "periodTo": int(1000 * (next_month_start - timedelta(milliseconds=1)).timestamp()), - } - - stream = MgramsevaDemand( - "billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, "Demands" - ) - yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) - - month_start = next_month_start - - class MgramsevaBills(MgramsevaStream): """object for consumer bills""" @@ -393,9 +345,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # tenant_expenses_to = datetime.strptime(config.get("tenant_expenses_to", "2022-01-01"), "%Y-%m-%d") start_date = datetime.strptime(config.get("start_date", "2022-01-01"), "%Y-%m-%d") - start_date_month_start = start_date.replace(day=1) start_date = pytz.IST.localize(start_date).astimezone(pytz.utc) - start_date_month_start = pytz.IST.localize(start_date_month_start).astimezone(pytz.utc) end_date = datetime.today() end_date = pytz.IST.localize(end_date).astimezone(pytz.utc) @@ -404,14 +354,12 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams = [ MgramsevaPayments(self.headers, self.request_info, self.user_request, tenantid), MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), - MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid, start_date_month_start, end_date), + MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid), ] # and now we need bills for each consumer consumer_codes = set() - tmp_demand_stream = MgramsevaDemands( - self.headers, self.request_info, self.user_request, tenantid, start_date_month_start, end_date - ) + tmp_demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid) for demand in tmp_demand_stream.read_records(SyncMode.full_refresh): consumer_codes.add(demand["data"]["consumerCode"]) From cf902e273818c8becdb10afccf0120e237ca1f78 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Mon, 2 Sep 2024 23:43:37 +0530 Subject: [PATCH 48/50] iterate over all tenants --- .../connectors/source-mgramseva/source_mgramseva/source.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 5e5637bb6b81..6f102cae1af0 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -349,9 +349,11 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: end_date = datetime.today() end_date = pytz.IST.localize(end_date).astimezone(pytz.utc) + streams = [] + for tenantid in self.config["tenantids"]: # Generate streams for each object type - streams = [ + streams += [ MgramsevaPayments(self.headers, self.request_info, self.user_request, tenantid), MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid), @@ -365,4 +367,4 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: streams.append(MgramsevaBills(self.headers, self.request_info, self.user_request, tenantid, list(consumer_codes))) - return streams + return streams From 5733e0e5821ceb226a5c4c08060c591ba207c6e4 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Tue, 3 Sep 2024 05:56:43 +0530 Subject: [PATCH 49/50] each stream needs to iterate over the list of tenant ids --- .../source_mgramseva/source.py | 148 +++++++++++------- 1 file changed, 95 insertions(+), 53 deletions(-) diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 6f102cae1af0..25c9f736ecd7 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -106,20 +106,40 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp """ :return an iterable containing each record in the response """ - self.logger.info(response.json()) + # self.logger.info(response.json()) return map(lambda x: {"data": x, "id": x["id"]}, response.json()[self.response_key]) class MgramsevaDemands(MgramsevaStream): """object for consumer demands""" - def __init__(self, headers: dict, request_info: dict, user_request: dict, tenantid: str, **kwargs): + def __init__( + self, headers: dict, request_info: dict, user_request: dict, tenantid_list: list, **kwargs + ): # pylint: disable=super-init-not-called """ctor""" - params = { - "tenantId": tenantid, - "businessService": "WS", - } - super().__init__("billing-service/demand/_search", headers, request_info, user_request, params, "Demands") + self.tenantid_list = tenantid_list + self.headers = headers + self.request_info = request_info + self.user_request = user_request + self.response_key = "Demands" + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, + ) -> Iterable[StreamData]: + """override""" + for tenantid in self.tenantid_list: + params = { + "tenantId": tenantid, + "businessService": "WS", + } + demandstream = MgramsevaStream( + "billing-service/demand/_search", self.headers, self.request_info, self.user_request, params, self.response_key + ) + yield from demandstream.read_records(sync_mode, cursor_field, stream_slice, stream_state) def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """include the bill date""" @@ -133,16 +153,15 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp class MgramsevaBills(MgramsevaStream): """object for consumer bills""" - def __init__(self, headers: dict, request_info: dict, user_request: dict, tenantid: str, consumer_codes: list, **kwargs): + def __init__( + self, headers: dict, request_info: dict, user_request: dict, tenantid_list: list, consumer_codes: dict, **kwargs + ): # pylint: disable=super-init-not-called """specify endpoint for bills and call super""" self.headers = headers self.request_info = request_info self.user_request = user_request self.consumer_codes = consumer_codes - self.params = { - "tenantId": tenantid, - "businessService": "WS", - } + self.tenantid_list = tenantid_list def read_records( self, @@ -152,13 +171,13 @@ def read_records( stream_state: Optional[Mapping[str, Any]] = None, ) -> Iterable[StreamData]: """override""" - for consumer_code in self.consumer_codes: - params = self.params.copy() - params["consumerCode"] = consumer_code - consumer_code_stream = MgramsevaStream( - "billing-service/bill/v2/_fetchbill", self.headers, self.request_info, self.user_request, params, "Bill" - ) - yield from consumer_code_stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) + for tenantid in self.tenantid_list: + for consumer_code in self.consumer_codes[tenantid]: + params = {"tenantId": tenantid, "businessService": "WS", "consumerCode": consumer_code} + consumer_code_stream = MgramsevaStream( + "billing-service/bill/v2/_fetchbill", self.headers, self.request_info, self.user_request, params, "Bill" + ) + yield from consumer_code_stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) class MgramsevaTenantExpense(MgramsevaStream): @@ -204,8 +223,8 @@ class MgramsevaTenantExpenses(MgramsevaStream): """object for tenant payments""" def __init__( - self, headers: dict, request_info: dict, user_request: dict, tenantid: str, fromdate: datetime, todate: datetime, **kwargs - ): + self, headers: dict, request_info: dict, user_request: dict, tenantid_list: list, fromdate: datetime, todate: datetime, **kwargs + ): # pylint: disable=super-init-not-called """ specify endpoint for demands and call super 1672531200000 = 2023-01-01 00:00 @@ -214,7 +233,7 @@ def __init__( self.headers = headers self.request_info = request_info self.user_request = user_request - self.tenantid = tenantid + self.tenantid_list = tenantid_list self.fromdate = fromdate self.todate = todate @@ -227,34 +246,56 @@ def read_records( ) -> Iterable[StreamData]: """override""" - month_start = self.fromdate.replace(day=1) + for tenantid in self.tenantid_list: - while month_start < self.todate: + month_start = self.fromdate.replace(day=1) - next_month_start = month_start + relativedelta(months=1) - timedelta(milliseconds=1) + while month_start < self.todate: - stream = MgramsevaTenantExpense( - "echallan-services/eChallan/v1/_expenseDashboard", - self.headers, - self.request_info, - self.user_request, - self.tenantid, - month_start, - next_month_start, - "ExpenseDashboard", - ) - yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) + next_month_start = month_start + relativedelta(months=1) - timedelta(milliseconds=1) + + stream = MgramsevaTenantExpense( + "echallan-services/eChallan/v1/_expenseDashboard", + self.headers, + self.request_info, + self.user_request, + tenantid, + month_start, + next_month_start, + "ExpenseDashboard", + ) + yield from stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) - month_start = next_month_start + month_start = next_month_start class MgramsevaPayments(MgramsevaStream): """object for consumer payments""" - def __init__(self, headers: dict, request_info: dict, user_request: dict, tenantid: str, **kwargs): + def __init__( + self, headers: dict, request_info: dict, user_request: dict, tenantid_list: list, **kwargs + ): # pylint: disable=super-init-not-called """specify endpoint for payments and call super""" - params = {"tenantId": tenantid, "businessService": "WS"} - super().__init__("collection-services/payments/WS/_search", headers, request_info, user_request, params, "Payments", **kwargs) + self.headers = headers + self.request_info = request_info + self.user_request = user_request + self.tenantid_list = tenantid_list + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, + ) -> Iterable[StreamData]: + """override""" + + for tenantid in self.tenantid_list: + params = {"tenantId": tenantid, "businessService": "WS"} + paymentstream = MgramsevaStream( + "collection-services/payments/WS/_search", self.headers, self.request_info, self.user_request, params, "Payments" + ) + yield from paymentstream.read_records(sync_mode, cursor_field, stream_slice, stream_state) # Source @@ -349,22 +390,23 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: end_date = datetime.today() end_date = pytz.IST.localize(end_date).astimezone(pytz.utc) - streams = [] + # Generate streams for each object type + streams = [ + MgramsevaPayments(self.headers, self.request_info, self.user_request, self.config["tenantids"]), + MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, self.config["tenantids"], start_date, end_date), + MgramsevaDemands(self.headers, self.request_info, self.user_request, self.config["tenantids"]), + ] + # and now we need bills for each consumer + tenantid_to_consumer_codes = {} for tenantid in self.config["tenantids"]: - # Generate streams for each object type - streams += [ - MgramsevaPayments(self.headers, self.request_info, self.user_request, tenantid), - MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, tenantid, start_date, end_date), - MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid), - ] - - # and now we need bills for each consumer - consumer_codes = set() - tmp_demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, tenantid) + tenantid_to_consumer_codes[tenantid] = set() + tmp_demand_stream = MgramsevaDemands(self.headers, self.request_info, self.user_request, [tenantid]) for demand in tmp_demand_stream.read_records(SyncMode.full_refresh): - consumer_codes.add(demand["data"]["consumerCode"]) + tenantid_to_consumer_codes[tenantid].add(demand["data"]["consumerCode"]) - streams.append(MgramsevaBills(self.headers, self.request_info, self.user_request, tenantid, list(consumer_codes))) + streams.append( + MgramsevaBills(self.headers, self.request_info, self.user_request, self.config["tenantids"], tenantid_to_consumer_codes) + ) return streams From 99f77ef417a6728977318433703fe3ab78a931c9 Mon Sep 17 00:00:00 2001 From: Rohit Chatterjee Date: Wed, 4 Sep 2024 22:53:45 +0530 Subject: [PATCH 50/50] wataer connections stream --- .../schemas/mgramseva_water_connections.json | 12 ++++++++ .../source_mgramseva/source.py | 30 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 airbyte-integrations/connectors/source-mgramseva/source_mgramseva/schemas/mgramseva_water_connections.json diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/schemas/mgramseva_water_connections.json b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/schemas/mgramseva_water_connections.json new file mode 100644 index 000000000000..a7af717984f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/schemas/mgramseva_water_connections.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "data": { + "type": "object" + } + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py index 25c9f736ecd7..e559a9f53d88 100644 --- a/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py +++ b/airbyte-integrations/connectors/source-mgramseva/source_mgramseva/source.py @@ -298,6 +298,35 @@ def read_records( yield from paymentstream.read_records(sync_mode, cursor_field, stream_slice, stream_state) +class MgramsevaWaterConnections(MgramsevaStream): + """object for water connections""" + + def __init__( + self, headers: dict, request_info: dict, user_request: dict, tenantid_list: list, **kwargs + ): # pylint: disable=super-init-not-called + """specify endpoint for water connections and call super""" + self.headers = headers + self.request_info = request_info + self.user_request = user_request + self.tenantid_list = tenantid_list + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, + ) -> Iterable[StreamData]: + """override""" + + for tenantid in self.tenantid_list: + params = {"tenantId": tenantid, "businessService": "WS"} + wcstream = MgramsevaStream( + "ws-services/wc/_search", self.headers, self.request_info, self.user_request, params, "WaterConnection" + ) + yield from wcstream.read_records(sync_mode, cursor_field, stream_slice, stream_state) + + # Source class SourceMgramseva(AbstractSource): """Source for mGramSeva""" @@ -393,6 +422,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: # Generate streams for each object type streams = [ MgramsevaPayments(self.headers, self.request_info, self.user_request, self.config["tenantids"]), + MgramsevaWaterConnections(self.headers, self.request_info, self.user_request, self.config["tenantids"]), MgramsevaTenantExpenses(self.headers, self.request_info, self.user_request, self.config["tenantids"], start_date, end_date), MgramsevaDemands(self.headers, self.request_info, self.user_request, self.config["tenantids"]), ]