From f6d7cdda811735c567a0660341c1564cca1606ea Mon Sep 17 00:00:00 2001 From: akimrx Date: Wed, 8 Nov 2023 10:12:41 +0300 Subject: [PATCH] feat: added support for extending the TrackerIssue model --- Dockerfile | 2 +- README.md | 32 +++++++++++++++++++++++- examples/docker/docker-compose.yml | 1 + examples/extended_model/main.py | 39 ++++++++++++++++++++++++++++++ tracker_exporter/__init__.py | 10 +++++++- tracker_exporter/_meta.py | 2 +- tracker_exporter/config.py | 2 +- tracker_exporter/etl.py | 8 +++--- tracker_exporter/main.py | 4 ++- 9 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 examples/extended_model/main.py diff --git a/Dockerfile b/Dockerfile index b673e88..507a1c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.10-slim -LABEL maintainer="a.faskhutdinov@yclients.tech" +LABEL maintainer="akimstrong@yandex.ru" LABEL name="tools/tracker-exporter" ENV DEBIAN_FRONTEND noninteractive diff --git a/README.md b/README.md index 5abeec8..0368520 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,36 @@ This is also the answer to the question why the tool is not asynchronous. Limits The processing speed of one issue depends on how many changes there are in the issue in its history. More changes means longer processing. +## Extend exported issue data by your custom fields + +Just declare your `main.py` module in which extended the [TrackerIssue](tracker_exporter/models/issue.py#L65) model using multiple inheritance like + +```python + +from tracker_exporter.models.issue import TrackerIssue +from tracker_exporter.utils.helpers import validate_resource +from tracker_exporter import run_etl + + +class CustomIssueFields: + def __init__(self, issue: Issues) -> None: + self.foo_custom_field = validate_resource(issue, "fooCustomField") + self.bar_custom_field = validate_resource(issue, "barCustomField") + + +class ExtendedTrackerIssue(TrackerIssue, CustomIssueFields): + def __init__(self, issue: Issues) -> None: + super().__init__(issue) + CustomIssueFields.__init__(self, issue) + + +run_etl(issue_model=ExtendedTrackerIssue) + +``` + +See full example [here](examples/extended_model/main.py) + + ## Usage ### Native @@ -286,7 +316,7 @@ See config declaration [here](/tracker_exporter/config.py) |----------|-------------| | `EXPORTER_STATEFUL` | Enable stateful mode. Required `EXPORTER_STATE__*` params. Default is `False` | | `EXPORTER_STATEFUL_INITIAL_RANGE` | Initial search range when unknown last state. Default: `1w` | -| `EXPORTER_CHANGELOG_EXPORT_ENABLED` | Enable export all issues changelog to Clickhouse. Can greatly slow down exports. Default is `True` | +| `EXPORTER_CHANGELOG_EXPORT_ENABLED` | Enable export all issues changelog to Clickhouse. **Can greatly slow down exports** (x5 - x10). Default is `False` | | `EXPORTER_LOGLEVEL` | ETL log level. Default: `info` | | `EXPORTER_LOG_ETL_STATS` | Enable logging transform stats every N iteration. Default is `True` | | `EXPORTER_LOG_ETL_STATS_EACH_N_ITER` | How many iterations must pass to log stats. Default is `100` | diff --git a/examples/docker/docker-compose.yml b/examples/docker/docker-compose.yml index 5908427..e7ac88d 100644 --- a/examples/docker/docker-compose.yml +++ b/examples/docker/docker-compose.yml @@ -22,6 +22,7 @@ services: EXPORTER_TRACKER__CLOUD_ORG_ID: your_org_id EXPORTER_CLICKHOUSE__HOST: clickhouse EXPORTER_CLICKHOUSE__PORT: 8123 + EXPORTER_CHANGELOG_EXPORT_ENABLED: "true" EXPORTER_STATEFUL: "true" EXPORTER_STATE__STORAGE: jsonfile EXPORTER_STATE__JSONFILE_STRATEGY: local diff --git a/examples/extended_model/main.py b/examples/extended_model/main.py new file mode 100644 index 0000000..66940d5 --- /dev/null +++ b/examples/extended_model/main.py @@ -0,0 +1,39 @@ + +from tracker_exporter.models.issue import TrackerIssue +from tracker_exporter.utils.helpers import to_snake_case, validate_resource +from tracker_exporter import configure_sentry, run_etl + +from yandex_tracker_client.collections import Issues + + +class CustomIssueFields: + """ + Additional custom fields for Yandex Tracker issue. + Must be created in the Clickhouse issue table. + """ + + def __init__(self, issue: Issues) -> None: + self.foo_custom_field = to_snake_case(validate_resource(issue, "fooCustomField")) + self.bar_custom_field = validate_resource(issue, "barCustomField") + self.baz = True if "baz" in issue.tags else False + + +class ExtendedTrackerIssue(TrackerIssue, CustomIssueFields): + """Extended Yandex Tracker issue model with custom fields.""" + + def __init__(self, issue: Issues) -> None: + super().__init__(issue) + CustomIssueFields.__init__(self, issue) + + +def main() -> None: + """Entry point.""" + run_etl( + ignore_exceptions=False, + issue_model=ExtendedTrackerIssue + ) + + +if __name__ == "__main__": + configure_sentry() + main() diff --git a/tracker_exporter/__init__.py b/tracker_exporter/__init__.py index ff1c1a3..d39f829 100644 --- a/tracker_exporter/__init__.py +++ b/tracker_exporter/__init__.py @@ -1,4 +1,9 @@ -from tracker_exporter.main import run_etl +from tracker_exporter.main import ( + run_etl, + configure_sentry, + configure_jsonfile_storage, + configure_state_service, +) from tracker_exporter.etl import YandexTrackerETL from tracker_exporter.services.clickhouse import ClickhouseClient from tracker_exporter.services.tracker import YandexTrackerClient @@ -20,4 +25,7 @@ "S3FileStorageStrategy", "LocalFileStorageStrategy", "run_etl", + "configure_sentry", + "configure_jsonfile_storage", + "configure_state_service", ] diff --git a/tracker_exporter/_meta.py b/tracker_exporter/_meta.py index 874fabf..537552a 100644 --- a/tracker_exporter/_meta.py +++ b/tracker_exporter/_meta.py @@ -1,4 +1,4 @@ -version = "1.0.0" +version = "1.0.1" url = "https://github.com/akimrx/yandex-tracker-exporter" download_url = "https://pypi.org/project/tracker-exporter/" appname = "yandex_tracker_exporter" diff --git a/tracker_exporter/config.py b/tracker_exporter/config.py index bcda96b..0dc2a22 100644 --- a/tracker_exporter/config.py +++ b/tracker_exporter/config.py @@ -190,7 +190,7 @@ class Settings(BaseSettings): state: StateSettings = StateSettings() stateful: bool = False stateful_initial_range: str = "1w" - changelog_export_enabled: bool = True + changelog_export_enabled: bool = False log_etl_stats: bool = True log_etl_stats_each_n_iter: int = 100 diff --git a/tracker_exporter/etl.py b/tracker_exporter/etl.py index 13b2b62..8bcc447 100644 --- a/tracker_exporter/etl.py +++ b/tracker_exporter/etl.py @@ -32,6 +32,7 @@ def __init__( tracker_client: YandexTrackerClient, clickhouse_client: ClickhouseClient, statekeeper: StateKeeper | None = None, + issue_model: TrackerIssue = TrackerIssue, database: str = config.clickhouse.database, issues_table: str = config.clickhouse.issues_table, metrics_table: str = config.clickhouse.issue_metrics_table, @@ -42,6 +43,7 @@ def __init__( self.tracker = tracker_client self.clickhouse = clickhouse_client self.state = statekeeper + self.issue_model = issue_model self.database = database self.issues_table = issues_table self.metrics_table = metrics_table @@ -111,7 +113,7 @@ def build_query_from_filters() -> str: @monitoring.send_time_metric("issue_transform_time_seconds") def _transform(self, issue: Issues) -> ClickhousePayload: """Transform issue to storage-compatible payload format.""" - _issue = TrackerIssue(issue) + _issue = self.issue_model(issue) changelog = _issue._changelog_events metrics = _issue.metrics() @@ -146,7 +148,7 @@ def _export_and_transform( logger.info("Paginated list received, possible new state will be calculated later") else: pagination = False - possible_new_state = self._get_possible_new_state(TrackerIssue(found_issues[-1])) + possible_new_state = self._get_possible_new_state(self.issue_model(found_issues[-1])) et_start_time = time.time() for i, tracker_issue in enumerate(found_issues): @@ -160,7 +162,7 @@ def _export_and_transform( issue, changelog, issue_metrics = self._transform(tracker_issue).model_dump().values() if pagination and i == len(found_issues): logger.info("Trying to get new state from last iteration") - possible_new_state = self._get_possible_new_state(TrackerIssue(tracker_issue)) + possible_new_state = self._get_possible_new_state(self.issue_model(tracker_issue)) issues.append(issue) changelog_events.extend(changelog) if not issue_metrics: diff --git a/tracker_exporter/main.py b/tracker_exporter/main.py index 22498c8..c56e070 100644 --- a/tracker_exporter/main.py +++ b/tracker_exporter/main.py @@ -33,6 +33,7 @@ from tracker_exporter.services.monitoring import sentry_events_filter from tracker_exporter.services.state import StateKeeper, LocalFileStorageStrategy, JsonStateStorage from tracker_exporter.models.base import StateStorageTypes, JsonStorageStrategies +from tracker_exporter.models.issue import TrackerIssue from tracker_exporter.etl import YandexTrackerETL from tracker_exporter.services.tracker import YandexTrackerClient from tracker_exporter.services.clickhouse import ClickhouseClient @@ -103,12 +104,13 @@ def configure_state_service() -> StateKeeper | None: return StateKeeper(storage) -def run_etl(ignore_exceptions: bool = False) -> None: +def run_etl(ignore_exceptions: bool = False, issue_model: TrackerIssue = TrackerIssue) -> None: """Start ETL process.""" etl = YandexTrackerETL( tracker_client=YandexTrackerClient(), clickhouse_client=ClickhouseClient(), statekeeper=configure_state_service(), + issue_model=issue_model, ) etl.run( stateful=config.stateful,