diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..be537c9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,95 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +# env/ +# !/**/env +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv +venv* + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +.idea +tests +.git diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fdd346c --- /dev/null +++ b/.gitignore @@ -0,0 +1,131 @@ +scripts/* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b549090 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.7-slim +LABEL maintainer="a.faskhutdinov@yclients.tech" +LABEL name="tools/tracker-exporter" + +ENV DEBIAN_FRONTEND noninteractive + +# Configure timezone +RUN apt-get -qq update +RUN apt-get install -yqq tzdata +ENV TZ=Europe/Moscow +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN dpkg-reconfigure -f noninteractive tzdata + +# Configure exporter +RUN mkdir -p /opt/exporter +COPY ./requirements.txt requirements.txt +COPY . /opt/exporter/ + +# Install exporter +WORKDIR /opt/exporter +RUN python3 setup.py install + +CMD ["tracker-exporter"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9ec9cb3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Akim Faskhutdinov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..381b468 --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +.PHONY: clean clean-build clean-pyc dist help +.DEFAULT_GOAL := help + +help: + @echo "clean - remove all artifacts" + @echo "clean-build - remove build artifacts" + @echo "clean-pyc - remove python artifacts" + @echo "install - install the package" + @echo "init - initialize the development environment" + @echo "dist - build package" + @echo "release - upload package to PyPi" + @echo "lint - check style with pylint" + +clean: clean-build clean-pyc + +clean-build: + rm -rf build/ + rm -rf dist/ + rm -rf .eggs/ + find . -name '*.egg-info' -exec rm -rf {} + + find . -name '*.egg' -exec rm -rf {} + + find . -name '.DS_Store' -exec rm -f {} + + +clean-pyc: + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -rf {} + + +lint: + pylint --max-line-length=120 --rcfile=setup.cfg tracker_exporter + +dist: + python3 setup.py sdist bdist_wheel + +release: clean dist + @make clean + @make dist + python3 -m twine upload --repository pypi dist/* + +install: clean + python3 setup.py install + +init: + pip3 install -r requirements-dev.txt \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..150a0c4 --- /dev/null +++ b/README.md @@ -0,0 +1,261 @@ +# Yandex.Tracker Exporter + +Export issue metadata & agile metrics to OLAP data storage. Metrics based on issue changelog. + +## Self-hosted arch example + +![](/docs/images/agile_metrics.png) + +So, you can install Clickhouse with Clickhouse Proxy via Ansible role inside project. +Edit the inventory file `ansible/inventory/hosts.yml` and just run ansible-playbook. + +> **Attention:** +> For the role to work correctly, docker must be installed on the target server. + +Example: +```bash + +pip3 install -r requirements-dev.txt +cd ansible +ansible-playbook -i inventory/hosts.yml playbooks/clickhouse.yml --limit agile +``` + + +## Serverless arch example + +![](/docs/images/agile_metrics_cloud.png) + +### Create a Managed Clickhouse cluster + +> How to: https://cloud.yandex.com/en/docs/managed-clickhouse/operations/cluster-create + +* Set user for exporter, example: `agile` +* Set a database name, example: `agile` +* Enable `Serverless access` flag +* For testing enable host public access +* Enable `Access from the management console` flag +* Run migration or manual create tables (see migration block [here](#migration), see [sql](/migrations/clickhouse/)) + +### Create Cloud Function + +> How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-function/python-function-quickstart + +* Use Python >= 3.7 +* Copy/paste example content from `examples/serverless` ([code](/examples/serverless/)) +* Set entrypoint: `main.handler` (for code from examples) +* Set function timeout to `600`, because the launch can be long if there are a lot of updated issues during the collection period +* Set memory to `512MB` or more +* Add environment variables (see variables block [here](#environment-variables-settings)) + ```bash + EXPORTER_TRACKER_TOKEN=XXXXXXXXXXXXXXXX + EXPORTER_TRACKER_ORG_ID=123456 + EXPORTER_CLICKHOUSE_PROTO=https + EXPORTER_CLICKHOUSE_CERT=/etc/ssl/certs/ca-certificates.crt + EXPORTER_CLICKHOUSE_HTTP_PORT=8443 + EXPORTER_CLICKHOUSE_HOST=rc1b-xxxxxx.mdb.yandexcloud.net + EXPORTER_CLICKHOUSE_DATABASE=agile + EXPORTER_CLICKHOUSE_USER=agile + EXPORTER_CLICKHOUSE_PASSWORD=xxxx + EXPORTER_ENABLE_UPLOAD=true + EXPORTER_ISSUES_SEARCH_INTERVAL=2h +* Release function +* Run test +* See logs + +![](/docs/images/logs.png) + + +##### Serverless database connection without public access +If you don't want to enable clickhouse public access, use service account with such permissions - `serverless.mdbProxies.user` and set environment variables below: +```bash +EXPORTER_CLICKHOUSE_HOST=akfd3bhqk3xxxxxxxxxxx.clickhouse-proxy.serverless.yandexcloud.net +EXPORTER_CLICKHOUSE_SERVERLESS_PROXY_ID=akfd3bhqk3xxxxxxxxxxxxx +``` + +> How to create database connection: https://cloud.yandex.com/en/docs/functions/operations/database-connection + +Also, the `EXPORTER_CLICKHOUSE_PASSWORD` variable with service account must be replaced by IAM-token. Keep this in mind. +Probably, you should get it in the function code, because the IAM-token works for a limited period of time. + +### Create Trigger + +> How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-trigger/timer-quickstart + +* Create new trigger +* Choose type `Timer` +* Set interval every hour: `0 * ? * * *` +* Select your function +* Create serverless service account or use an existing one +* Save trigger + + +# Visualization + +You can use any BI tool for visualization, for example: +- Yandex DataLens +- Apache Superset +- PowerBI +- Grafana + +![](/docs/images/datalens_example.png) + + +# Migration + +Based on [go-migrate](https://github.com/golang-migrate/migrate) tool. + +## Download and install go-migrate tool + +### macOS +```shell +wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.darwin-amd64.tar.gz -O migrate.tar.gz + +tar xvf migrate.tar.gz +mv migrate ~/bin +``` + +### Linux +```shell +wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.linux-amd64.tar.gz -O migrate.tar.gz + +tar -xvf migrate.tar.gz +mv migrate /usr/local/bin +``` + +## Run migration + +Example bash script + +```bash +#!/usr/bin/env bash + +set -Eeuo pipefail + +CLICKHOUSE_HOST="localhost" +CLICKHOUSE_TCP_PORT=9000 +CLICKHOUSE_HTTP_PORT=8123 +CLICKHOUSE_USER="default" +CLICKHOUSE_PASSWORD="strongpassword" + +MIGRATION_SOURCE_PATH="file://${PWD}/../migrations/clickhouse" +MIGRATION_HISTORY_TABLE="ci_gomigrate_migrations" +MIGRATION_DATABASE="agile" + +MIGRATION_CLICKHOUSE_DSN="clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_TCP_PORT}?username=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}&database=${MIGRATION_DATABASE}&x-multi-statement=true&x-migrations-table=${MIGRATION_HISTORY_TABLE}" + +prepare_migration() { + echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_DATABASE}" | \ + curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}" --data-binary @- + +} + +run_migration() { + migrate -verbose \ + -source $MIGRATION_SOURCE_PATH \ + -database $MIGRATION_CLICKHOUSE_DSN \ + up + +} + +prepare_migration +run_migration +``` + +# Environment variables (settings) + +| variable | require? | default | description | +|----------|----------|---------|-------------| +| `EXPORTER_LOGLEVEL` | ❌ | `info` | One of: `debug`, `info`, `warning`, `error`, `exception` | +| `EXPORTER_ENABLE_UPLOAD` | ❌ | `false` | Enable/disable upload to Clickhouse storage | +| `EXPORTER_MONITORING_ENABLED` | ❌ | `false` | Enable send statsd metrics | +| `EXPORTER_MONITORING_HOST` | ❌ | `localhost` | Monitoring statsd hostname | +| `EXPORTER_MONITORING_PORT` | ❌ | `8125` | Monitoring statsd UDP port | +| `EXPORTER_MONITORING_PREFIX` | ❌ | `tracker_exporter` | Prefix for all sent metrics, i.e.: `{prefix}_{metric_name}` | +| `EXPORTER_SENTRY_ENABLED` | ❌ | `false` | Send exceptions and errors to Sentry | +| `EXPORTER_SENTRY_DSN` | ❌ | None | Sentry DSN like https://{id}@{sentry_url} | +| `EXPORTER_TRACKER_TOKEN` | ✅ | None | Yandex.Tracker OAuth token | +| `EXPORTER_TRACKER_ORG_ID` | ✅ | None | Yandex.Tracker organization ID for Yandex.Tracker | +| `EXPORTER_TRACKER_ISSUES_SEARCH_RANGE` | ❌ | `4h` | The query search range for recently updated issues, i.e: `Updated >= now() - {VARIABLE}` | +| `EXPORTER_TRACKER_ISSUES_SEARCH_QUERY` | ❌ | None | The query search string like `Queue: SRE and status: closed` | +| `EXPORTER_TRACKER_ISSUES_FETCH_INTERVAL` | ❌ | `120` | Exporter job run interval in minutes for issue and metrics | +| `EXPORTER_CLOSED_ISSUES_STATUSES` | ❌ | `closed,rejected,released,resolved,cancelled` | Lowercase comma-separated status, which will be flagged as `is_closed` | +| `EXPORTER_CLICKHOUSE_PROTO` | ❌ | `http` | Clickhouse protocol - HTTP or HTTPS | +| `EXPORTER_CLICKHOUSE_HOST` | ❌ | `localhost` | Clickhouse hostname | +| `EXPORTER_CLICKHOUSE_HTTP_PORT` | ❌ | `8123` | Clickhouse HTTP(S) port | +| `EXPORTER_CLICKHOUSE_USER` | ❌ | `default` | Clickhouse read-write username | +| `EXPORTER_CLICKHOUSE_PASSWORD` | ✅ | None | Clickhouse user password. **If your clickhouse/user can work without password just ignore this variable.** | +| `EXPORTER_CLICKHOUSE_CACERT_PATH` | ❌ | `None` | Path to CA certificate. Only for HTTPS | +| `EXPORTER_CLICKHOUSE_SERVERLESS_PROXY_ID` | ❌ | `None` | Database connection ID. Only for serverless | +| `EXPORTER_CLICKHOUSE_DATABASE` | ❌ | `agile` | Database for exporter CH tables | +| `EXPORTER_CLICKHOUSE_ISSUES_TABLE` | ❌ | `issues` | Table when store issues metadata | +| `EXPORTER_CLICKHOUSE_ISSUE_METRICS_TABLE` | ❌ | `issue_metrics` | Table when store issue metrics | + + +# Usage + +## Native + +### Install from source + +```bash + +python3 -m venv venv +source venv/bin/activate +python3 setup.py install + +export EXPORTER_TRACKER_TOKEN="xxxx" +export EXPORTER_TRACKER_ORG_ID="123456" + +export EXPORTER_TRACKER_ISSUES_SEARCH_RANGE="6h" +export EXPORTER_TRACKER_FETCH_INTERVAL=30 + +export EXPORTER_CLICKHOUSE_USER="default" +export EXPORTER_CLICKHOUSE_PASSWORD="strongpassword" +export EXPORTER_CLICKHOUSE_HOST="clickhouse01.example.com" +export EXPORTER_CLICKHOUSE_HTTP_PORT="8121" + +export EXPORTER_LOGLEVEL="info" +export EXPORTER_ENABLE_UPLOAD=true + +tracker-exporter +``` + +### Install from pypi + +```bash +pip3 install tracker-exporter +tracker-exporter +``` + +### Use .env file + +```bash +tracker-exporter --env-file /home/akimrx/tracker/.settings +``` + + +## Docker + +```bash + +cd yandex-tracker-exporter +touch .env # prepare the environment variables file (dotenv), like the example above +docker-compose up -d --build +docker logs tracker-exporter -f +``` + +# Monitoring + +| Metric name | Metric type | Labels | Description | +|-------------|-------------|--------|-------------| +| `tracker_exporter_clickhouse_insert_time_seconds` | `time` | `project` | Insert query time | +| `tracker_exporter_clickhouse_optimize_time_seconds` | `time` | `project` | Optimize query time | +| `tracker_exporter_clickhouse_inserted_rows` | `gauge` | `project`, `database`, `table` | Inserted rows to Clickhouse from last update | +| `tracker_exporter_cycle_time_total_processing_time_seconds` | `time` | `project` | Total issues processing time | +| `tracker_exporter_issue_transform_time_seconds` | `time` | `project` | Time of transformation of one issue into an object | +| `tracker_exporter_issues_search_time_seconds` | `time` | `project` | Yandex.Tracker search time | +| `tracker_exporter_issues_processing_time_seconds` | `time` | `project` | Time of transformation of batch issues into objects | +| `tracker_exporter_issues_total_processed_count` | `count` | `project`, `source` | Processed issues from Yandex.Tracker | +| `tracker_exporter_issues_without_metrics` | `gauge` | `project` | Issues with empty metrics | +| `tracker_exporter_upload_status` | `gauge` | `project` | Status of data upload to storage | +| `tracker_exporter_last_update_timestamp` | `timestamp gauge` | `project` | Timestamp of the last data upload to the storage | \ No newline at end of file diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..9a8769a --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,31 @@ +[defaults] +forks = 30 +poll_interval = 1 +transport = smart +gather_subset = all,!facter,!ohai +host_key_checking = False +stdout_callback = skippy +callback_whitelist = profile_tasks, timer, skippy, actionable, counter_enabled +remote_user = root +display_ok_hosts = no +display_skipped_hosts = no +private_key_file = ~/.ssh/id_rsa +ansible_managed = Generated by Ansible +retry_files_enabled = False +deprecation_warnings = False + + +[ssh_connection] +ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o PreferredAuthentications=publickey -o IdentitiesOnly=yes +pipelining = True + +[colors] +highlight = white +verbose = dark gray +warn = bright purple +error = bright red +changed = bright blue + +[inventory] +cache=True +cache_plugin=jsonfile \ No newline at end of file diff --git a/ansible/inventory/group_vars/all.yml b/ansible/inventory/group_vars/all.yml new file mode 100644 index 0000000..e69de29 diff --git a/ansible/inventory/hosts.yml b/ansible/inventory/hosts.yml new file mode 100644 index 0000000..a5dab98 --- /dev/null +++ b/ansible/inventory/hosts.yml @@ -0,0 +1,8 @@ +--- +all: + children: + agile: + hosts: + server01: + ansible_host: localhost + ansible_user: root diff --git a/ansible/playbooks/clickhouse.yml b/ansible/playbooks/clickhouse.yml new file mode 100644 index 0000000..944962a --- /dev/null +++ b/ansible/playbooks/clickhouse.yml @@ -0,0 +1,4 @@ +--- +- hosts: agile + roles: + - role: clickhouse diff --git a/ansible/playbooks/roles/clickhouse/defaults/main.yml b/ansible/playbooks/roles/clickhouse/defaults/main.yml new file mode 100644 index 0000000..4fd2e45 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/defaults/main.yml @@ -0,0 +1,69 @@ +--- +clickhouse_general_owner: root +clickhouse_system_user: root +clickhouse_system_group: root + +clickhouse_base_root_dir: /opt +clickhouse_base_config_dir: "{{ clickhouse_base_root_dir }}/conf" +clickhouse_base_data_dir: "{{ clickhouse_base_root_dir }}/data" + +clickhouse_data_dir: "{{ clickhouse_base_data_dir }}/clickhouse" +clickhouse_config_dir: "{{ clickhouse_base_config_dir }}/clickhouse" + +clickhouse_version: 22.1.3.7 +clickhouse_prompt: "{{ inventory_hostname }} - internal agile" +clickhouse_listen_address: 0.0.0.0 +clickhouse_listen_http_port: 8123 +clickhouse_listen_tcp_port: 9000 + +# $clickhouse_zookeeper_hosts: (dict) +# - fqdn: zoo1.example.com +# port: 2181 +# - fqdn: ... +clickhouse_zookeeper_hosts: {} +clickhouse_zookeeper_enabled: false + +clickhouse_docker_limit_cpu: 4 +clickhouse_docker_limit_memory: 12gb + +clickhouse_max_concurrent_queries: 300 +clickhouse_max_connections: 4096 + +clickhouse_users: + default: + username: default + password: defaultpassword # CHANGEME + profile: default + quota: default + exporter: + username: exporter + password: exporterpassword # CHANGEME + profile: default + quota: default + readonly: + username: readonly + password: readonlypassword # CHANGEME + profile: readonly + quota: default + +clickhouse_proxy_enabled: true +clickhouse_proxy_version: 1.16.0 +clickhouse_proxy_binary_dir: /usr/local/bin +clickhouse_proxy_systemd_service_dir: /lib/systemd/system +clickhouse_proxy_base_cache_dir: "{{ clickhouse_base_data_dir }}/chproxy" +clickhouse_proxy_cache_dir: "{{ clickhouse_proxy_base_cache_dir }}/cache" +clickhouse_proxy_binary_name: chproxy +clickhouse_proxy_config_dir: "{{ clickhouse_config_dir }}/proxy.yml" +clickhouse_proxy_service_name: chproxy +clickhouse_proxy_listen_address: 0.0.0.0 +clickhouse_proxy_listen_http_port: 8080 +clickhouse_proxy_allowed_users: + - readonly +clickhouse_proxy_allowed_networks: + - 178.154.242.176/28 # DataLens + - 178.154.242.192/28 # DataLens + - 178.154.242.208/28 # DataLens + - 178.154.242.128/28 # DataLens + - 178.154.242.144/28 # DataLens + - 178.154.242.160/28 # DataLens + - 127.0.0.1 diff --git a/ansible/playbooks/roles/clickhouse/handlers/main.yml b/ansible/playbooks/roles/clickhouse/handlers/main.yml new file mode 100644 index 0000000..f46bca2 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/handlers/main.yml @@ -0,0 +1,15 @@ +--- +- name: clickhouse - restart docker composition + docker_compose: + project_src: "{{ clickhouse_config_dir }}" + build: no + restarted: true + ignore_errors: true + when: not ansible_check_mode + +- name: clickhouse - restart proxy service + systemd: + daemon_reload: true + name: "{{ clickhouse_proxy_service_name }}" + state: restarted + when: not ansible_check_mode diff --git a/ansible/playbooks/roles/clickhouse/tasks/chproxy.yml b/ansible/playbooks/roles/clickhouse/tasks/chproxy.yml new file mode 100644 index 0000000..ea560c4 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/tasks/chproxy.yml @@ -0,0 +1,72 @@ +--- +- name: clickhouse - check proxy binary exists + stat: + path: "{{ clickhouse_proxy_binary_dir }}/{{ clickhouse_proxy_binary_name }}" + register: clickhouse_proxy_binary + +- block: + - name: clickhouse - download proxy binary + get_url: + url: "https://github.com/ContentSquare/chproxy/releases/download/v{{ clickhouse_proxy_version }}/chproxy_{{ clickhouse_proxy_version }}_linux_{{ system_arch }}.tar.gz" + dest: "/tmp/chproxy-{{ clickhouse_proxy_version }}.linux-{{ system_arch }}.tar.gz" + mode: "0644" + register: download_binary + until: download_binary is succeeded + retries: 5 + delay: 2 + delegate_to: localhost + + - name: clickhouse - unpack proxy binary + become: false + unarchive: + src: "/tmp/chproxy-{{ clickhouse_proxy_version }}.linux-{{ system_arch }}.tar.gz" + dest: "/tmp" + creates: "/tmp/chproxy-{{ clickhouse_proxy_version }}.linux-{{ system_arch }}/{{ clickhouse_proxy_binary_name }}" + delegate_to: localhost + check_mode: false + + - name: clickhouse - propagate proxy binary + copy: + src: "/tmp/chproxy" + dest: "{{ clickhouse_proxy_binary_dir }}/{{ clickhouse_proxy_binary_name }}" + mode: "0755" + owner: "{{ clickhouse_general_owner }}" + group: "{{ clickhouse_general_owner }}" + + when: not clickhouse_proxy_binary.stat.exists + +- name: clickhouse - create cache dir for proxy + file: + path: "{{ item }}" + state: directory + mode: "0755" + with_items: + - "{{ clickhouse_proxy_base_cache_dir }}" + - "{{ clickhouse_proxy_cache_dir }}" + +- block: + - name: clickhouse - configure proxy systemd service + template: + src: templates/proxy/chproxy.service.j2 + dest: "{{ clickhouse_proxy_systemd_service_dir }}/{{ clickhouse_proxy_service_name }}.service" + mode: "0644" + owner: "{{ clickhouse_general_owner }}" + group: "{{ clickhouse_general_owner }}" + + - name: clickhouse - configure proxy + template: + src: templates/proxy/config.yml.j2 + dest: "{{ clickhouse_config_dir }}/proxy.yml" + owner: "{{ clickhouse_general_owner }}" + group: "{{ clickhouse_general_owner }}" + mode: "0644" + notify: + clickhouse - restart proxy service + +- name: clickhouse - start proxy + systemd: + daemon_reload: true + name: "{{ clickhouse_proxy_service_name }}" + enabled: true + state: started + when: not ansible_check_mode diff --git a/ansible/playbooks/roles/clickhouse/tasks/configure.yml b/ansible/playbooks/roles/clickhouse/tasks/configure.yml new file mode 100644 index 0000000..2f440e6 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/tasks/configure.yml @@ -0,0 +1,33 @@ +--- +- name: clickhouse - create clickhouse dirs + file: + path: "{{ item }}" + state: directory + mode: "0755" + owner: "{{ clickhouse_system_user }}" + group: "{{ clickhouse_system_group }}" + with_items: + - "{{ clickhouse_config_dir }}" + - "{{ clickhouse_data_dir }}" + tags: + - clickhouse-install + +- name: clickhouse - server configuration + template: + src: templates/server.xml.j2 + dest: "{{ clickhouse_config_dir }}/config.xml" + mode: "0644" + notify: + - clickhouse - restart docker composition + tags: + - clickhouse-config + +- name: clickhouse - users configuration + template: + src: templates/users.xml.j2 + dest: "{{ clickhouse_config_dir }}/users.xml" + mode: "0644" + notify: + - clickhouse - restart docker composition + tags: + - clickhouse-users diff --git a/ansible/playbooks/roles/clickhouse/tasks/install.yml b/ansible/playbooks/roles/clickhouse/tasks/install.yml new file mode 100644 index 0000000..08b40ab --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/tasks/install.yml @@ -0,0 +1,44 @@ +--- +- name: clickhouse - prepare docker composition + template: + src: templates/docker/docker-compose.yml.j2 + dest: "{{ clickhouse_config_dir }}/docker-compose.yml" + mode: "0644" + register: compose_file + +- name: clickhouse - clean previous docker composition + docker_compose: + project_name: clickhouse + project_src: "{{ clickhouse_config_dir }}" + state: absent + remove_volumes: true + remove_orphans: true + ignore_errors: true + when: compose_file.changed + +- name: clickhouse - up docker composition + docker_compose: + project_name: clickhouse + project_src: "{{ clickhouse_config_dir }}" + state: present + build: false + pull: true + nocache: false + when: compose_file.changed + +- name: clickhouse - wait while docker composition become ready + wait_for: + port: "{{ item | int }}" + delay: 10 + with_items: + - "{{ clickhouse_listen_http_port }}" + - "{{ clickhouse_listen_tcp_port }}" + +- name: clickhouse - prune docker useless artifacts + docker_prune: + containers: true + images: true + networks: true + volumes: true + builder_cache: true + when: clickhouse_docker_prune_after_up diff --git a/ansible/playbooks/roles/clickhouse/tasks/main.yml b/ansible/playbooks/roles/clickhouse/tasks/main.yml new file mode 100644 index 0000000..c355e72 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/tasks/main.yml @@ -0,0 +1,26 @@ +--- + +- import_tasks: preflight.yml + tags: + - clickhouse + - clickhouse-install + - clickhouse-config + +- import_tasks: configure.yml + tags: + - clickhouse + - clickhouse-install + - clickhouse-config + +- import_tasks: install.yml + tags: + - clickhouse + - clickhouse-install + - clickhouse-docker-config + +- import_tasks: chproxy.yml + tags: + - clickhouse + - clickhouse-install + - clickhouse-proxy + when: clickhouse_proxy_enabled diff --git a/ansible/playbooks/roles/clickhouse/tasks/preflight.yml b/ansible/playbooks/roles/clickhouse/tasks/preflight.yml new file mode 100644 index 0000000..4964d1c --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/tasks/preflight.yml @@ -0,0 +1,32 @@ +--- +- name: clickhouse - create base directories + file: + path: "{{ item }}" + state: directory + mode: "0755" + owner: "{{ clickhouse_general_owner }}" + group: "{{ clickhouse_general_owner }}" + with_items: + - "{{ clickhouse_base_root_dir }}" + - "{{ clickhouse_base_config_dir }}" + - "{{ clickhouse_base_data_dir }}" + tags: + - clickhouse-install + - clickhouse-config + +- name: clickhouse - create group + group: + name: "{{ clickhouse_system_group }}" + state: present + system: true + when: clickhouse_system_group != "root" + +- name: clickhouse - create user + user: + name: "{{ clickhouse_system_user }}" + groups: "{{ clickhouse_system_group }}" + append: true + shell: /usr/sbin/nologin + system: true + create_home: false + when: clickhouse_system_user != "root" diff --git a/ansible/playbooks/roles/clickhouse/templates/docker/docker-compose.yml.j2 b/ansible/playbooks/roles/clickhouse/templates/docker/docker-compose.yml.j2 new file mode 100644 index 0000000..04077e0 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/templates/docker/docker-compose.yml.j2 @@ -0,0 +1,21 @@ +version: '3' + +services: + agile-clickhouse: + image: "yandex/clickhouse-server:{{ clickhouse_version }}" + restart: unless-stopped + container_name: agile-clickhouse + hostname: agile-clickhouse + volumes: + - "{{ clickhouse_data_dir }}:/var/lib/clickhouse" + - "{{ clickhouse_config_dir }}/config.xml:/etc/clickhouse-server/config.xml:ro" + - "{{ clickhouse_config_dir }}/users.xml:/etc/clickhouse-server/users.xml:ro" + cpus: "{{ clickhouse_docker_limit_cpu }}" + mem_limit: "{{ clickhouse_docker_limit_memory }}" + cap_add: + - SYS_ADMIN + - SYS_NICE + - NET_ADMIN + ports: + - "{{ clickhouse_listen_tcp_port }}:{{ clickhouse_listen_tcp_port }}" + - "{{ clickhouse_listen_http_port }}:{{ clickhouse_listen_http_port }}" diff --git a/ansible/playbooks/roles/clickhouse/templates/proxy/chproxy.service.j2 b/ansible/playbooks/roles/clickhouse/templates/proxy/chproxy.service.j2 new file mode 100644 index 0000000..474fb7a --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/templates/proxy/chproxy.service.j2 @@ -0,0 +1,25 @@ +# {{ ansible_managed }} + +[Unit] +Description=ClickHouse HTTP proxy and load balancer +Documentation=https://github.com/Vertamedia/chproxy +After=network-online.target systemd-resolved.service +Wants=network-online.target systemd-resolved.service + +[Service] +Type=simple +User={{ clickhouse_general_owner }} +Group={{ clickhouse_general_owner }} + +StandardOutput=syslog +StandardError=syslog +SyslogIdentifier={{ clickhouse_proxy_service_name }} + +ExecStart={{ clickhouse_proxy_binary_dir }}/{{ clickhouse_proxy_service_name }} \ + -config={{ clickhouse_config_dir }}/proxy.yml + +Restart=always +RestartSec=3 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/playbooks/roles/clickhouse/templates/proxy/config.yml.j2 b/ansible/playbooks/roles/clickhouse/templates/proxy/config.yml.j2 new file mode 100644 index 0000000..1a8204f --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/templates/proxy/config.yml.j2 @@ -0,0 +1,51 @@ +#jinja2: trim_blocks:True, lstrip_blocks:True +--- +hack_me_please: true # disable config security checks +log_debug: false + +server: + http: + listen_addr: "{{ clickhouse_proxy_listen_address }}:{{ clickhouse_proxy_listen_http_port }}" + read_timeout: 5m + allowed_networks: + {% for network in clickhouse_proxy_allowed_networks %} + - {{ network }} + {% endfor %} + + metrics: + allowed_networks: + {% for network in clickhouse_proxy_allowed_networks %} + - {{ network }} + {% endfor %} + +clusters: +- name: agile + nodes: + - 127.0.0.1:{{ clickhouse_listen_http_port }} + + kill_query_user: + name: "{{ clickhouse_users.default.username }}" + password: "{{ clickhouse_users.default.password }}" + + users: + {% for user in clickhouse_users %} + {% if user in clickhouse_proxy_allowed_users %} + + - name: {{ clickhouse_users[user].username }} + password: {{ clickhouse_users[user].password }} + {% endif %} + {% endfor %} + +users: +{% for user in clickhouse_users %} +{% if user in clickhouse_proxy_allowed_users %} +- name: {{ clickhouse_users[user].username }} + password: {{ clickhouse_users[user].password }} + to_cluster: agile + to_user: {{ clickhouse_users[user].username }} + allowed_networks: + {% for network in clickhouse_proxy_allowed_networks %} + - {{ network }} + {% endfor %} +{% endif %} +{% endfor %} diff --git a/ansible/playbooks/roles/clickhouse/templates/server.xml.j2 b/ansible/playbooks/roles/clickhouse/templates/server.xml.j2 new file mode 100644 index 0000000..809f8a0 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/templates/server.xml.j2 @@ -0,0 +1,417 @@ + + + + + warning + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 50M + 3 + + + + {{ clickhouse_prompt }} + {{ clickhouse_listen_http_port }} + {{ clickhouse_listen_tcp_port }} + + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 5788 + + + {{ inventory_hostname }} + + + + + + + + {{ clickhouse_listen_address }} + + + + + + + + + + {{ clickhouse_max_connections | int }} + 3 + + + {{ clickhouse_max_concurrent_queries | int }} + + + + + + 8589934592 + + + 5368709120 + + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + /var/lib/clickhouse/user_files/ + + + users.xml + + + default + + + + + + default + + + + + + + + + false + + + + + + + + {% if clickhouse_zookeeper_enabled and clickhouse_zookeeper_hosts -%} + + + {% for host, port in clickhouse_zookeeper_hosts.items() -%} + + + {{ host }} + {{ port | int }} + + {% endfor %} + + 120000 + 30000 + + {% endif %} + + + + 01 + {{ inventory_hostname }} + + + + 3600 + + + 3600 + + + 60 + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + 7500 +
+ + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + 0 + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + +
\ No newline at end of file diff --git a/ansible/playbooks/roles/clickhouse/templates/users.xml.j2 b/ansible/playbooks/roles/clickhouse/templates/users.xml.j2 new file mode 100644 index 0000000..0b268a4 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/templates/users.xml.j2 @@ -0,0 +1,64 @@ + + + + + + + + 10000000000 + + + 0 + + + random + + + + + 2 + + + + + + {% for user in clickhouse_users -%} + + <{{ clickhouse_users[user].username }}> + {{ clickhouse_users[user].password }} + + ::/0 + + {{ clickhouse_users[user].profile }} + {{ clickhouse_users[user].quota }} + + + {% endfor %} + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + \ No newline at end of file diff --git a/ansible/playbooks/roles/clickhouse/vars/main.yml b/ansible/playbooks/roles/clickhouse/vars/main.yml new file mode 100644 index 0000000..bb0b723 --- /dev/null +++ b/ansible/playbooks/roles/clickhouse/vars/main.yml @@ -0,0 +1,8 @@ +--- +clickhouse_docker_prune_after_up: true + +system_arch_map: + i386: "386" + x86_64: "amd64" + +system_arch: "{{ system_arch_map[ansible_architecture] | default(ansible_architecture) }}" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8effd1b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3.1' + +services: + tracker-exporter: + container_name: tracker-exporter + hostname: tracker-exporter + restart: unless-stopped + build: . + volumes: + - ./.env:/opt/exporter/.env:ro + command: | + tracker-exporter --env-file /opt/exporter/.env diff --git a/docs/diagrams/agile_metrics.drawio b/docs/diagrams/agile_metrics.drawio new file mode 100644 index 0000000..365504c --- /dev/null +++ b/docs/diagrams/agile_metrics.drawio @@ -0,0 +1 @@ +7Rtrd6LI8tfk4+7h6ehHIuiQY2OMmAx+uQeBIIjiRQyPX3+ruhvFaGZn7maSyW4ykwNUF931ol7duZL763KYudslSf0guZIEv7yS9StJEhVJusL/gl8xyJcOB4RZ5HOkI2Aa1QEHChy6j/xgd4KYp2mSR9tToJduNoGXn8DcLEuLU7THNDlddeuGwRlg6rnJOfQh8vMlg3ZV4Qj/GkThsllZFPjI2m2QOWC3dP20aIFk40ruZ2mas7t12Q8SFF4jF/be4IXRA2FZsMl/5AV9qlXjtBv3ojDNUtPxgsHDH6LMpnlykz3nmFObV40IsnS/8QOcRbySr4tllAfTrevhaAFKB9gyXyd8+MClAA9h4u52/H63CnJvyR8e003OdS124NnNvOYRh93FLk32eaAdwfhSlCT9NEkzeN6kmwAnzbN0FTTAK0lWpa70+MhXGLjrKEG7uw8y3924AD4XGpfjU5DlQdkCcSEOg3Qd5FkFKAeb5grlFq00Gi+O9iF3OWzZsg2pQXS5TYaHuY9qgxuuuZ/R4l8rEfSyxdtoTQ3+GvmNwMxH7iJIbtNdlEfpBsYXaZ6n6xaClkQhDuQpapq+ru227FOjumoeHqMSjeSar6D7bg4S19ijNNg9hVfSdQmGIvVvv1rSvLpWFg/l3quFyP16J3h6+jSSfdmvVJlU6pO39p5IrBWk36v9tReZX/3t/Otdejs1axKZoTu8386lpdA8++sk8YWbp0AXItLXClMnFf2NrtfuQ7m7nd7sF5KamLGyNuXlclyXhfPtLjWH8+1iWOTe5n43t4Vo/m2eLNa91bxvho5Ubr2+WPkPZQLvJ/76Hua4W8GcK3i2iO2ExCCV1VcUUhulqWshqc3QqpTSqjXJqjSgTSutFcErw4tXtambwigOa2IbFJ/OU9/o9L04VMZT+l7B8aqxTULgCecBmLcntldYsRe28AFuSKZ+7R75NWPLNhWymg/NqAuyv4ksxbF3XW84ENw+wxp9uxEXw1nPXN9L8wf1aT6cRObQ2rnftIsSmYN0bduoR/FKhVUrIjrleBaWQIk6tpMHElsmscl+bK/Uh5VQEgoPgcPlA7HvYMzDMeU+NpSpzucZbC2Yq4S5VITBWD3VE/ewTv8wJjN8pyRCyOdeIp5yimfyOTh8kEZcyz1z46fut7vkVCIjGeyxVppP5+/5B4gBf6onHkIWpTMPoV5wEOqv8g/Sv8w/WM/8g3XZP0j095f6h5D5B7BUUs/wGwZaNLBK8A3svhwbBK8cB/xAjF8IERl9hoLXSR3uie5wmNbgsHt7qcO8zZzsyvyB8mb+ADytWY1iT7R1o+TfYkX6iohejdjm3rI1AXAKGCvpdwqeC3wicGKAN1shp/Cte4gD7xPVMkgJ3lIl9oTiWHVYoocbxZPCQr8zxW9/FVIJwzj6m0ltqKN4pp6M4/x6KLF5PYXgWKRUVjNWr4RJjbSHEmoB3ivH6I1jsrd0pOtAM3jvA0/g54Cn+l4n8YTiwloVp72knh78GX1nRvmQxjrSCRZgmy38o9yor0Rr6CuyNStKRj/6TQdkRyrgDfBmhYVRIEK5eCH6WfiFaIJ+0VDRmkCuMsWplHpsWyD7CaxlCAzHU1oy5D6by5D55bYMj+MHGTZzIM2mOJ621gF5jHVHatFSAS0ClSXwADTIGM3Y+rOaJA7jVU+YDO0Vrl+2ZQiybulYa+iDiOgo3L74uNl6H2WKft8Txvd8jfutxXAmoLOJ2rJRWAf1OaA2SnRNhIjNYVRWwCc81yhrtOGJQKZ0vAB+OeyevRsjHWCnuobrC9yeSqBROMpYQ5sCHpBHg2YGh3G0t1oDGSFtk4Iw+akWegS2NtBN5VdR+QFv1on8ZvuDrmoqA4nHaPq9oc1AfKb2gBlDe5xmKGhvzT2bt+U5bmR3mAjzqfl0GzlxMDS+3DKvC17kFSKmrD7LqL8cIugbRMzVxlGySfc/q951ZC70xKqX6h/yWYAMfKgL+WOa5cs0TDduYhyh18c6CePeEWeUYmikJUwc5HnFSxp3n6entRNIMKu+8ffpg4MPIAv+qJftQb1qqqlWYQQ10GPXCzzvUnW06KqKKhzKIGTpxWKSg3bpPvOC7+QWXFK5m4VB/h2JvmACWZC4efR0SserJ0A/UOX+oxKg1y2QJj1zJdACY1JrvOggAoQZdPUSqScCS0TAVQ2cAtwbLW6s2KEJD7iUFh6hrgnSc5gL71u4MYQqSHDAbbKiCa+JU5MVefa+1sYtmvkegMf5OtktgD+znlVj/WZ4F7Vc1tfrpT8MGc7lpC1eDJPicgpEZQAhR+PlhVnBtbiPaZlCQzmUGyKDGxXQU0FIRzcrw3PJ4FgC+YivYoiYUropvgr4Qpt+X1pu/eEsGtWNLoonT55vbkM01lcpUzryaZHytk734leqvq/TPfpZpz32yk73//GxyrmP/V4f6Jlffy+n2/nAMdR3g+7jRXV2vG6weHwTdUq/lTqVf1kM/RBNhOmlJsLsZ5oI4lkTYfpOTQSI8hMsLkXICGQi0CJYhIImxOISCvUSij8ojkATvIhnhY/BGgCUUoMXkBPFGtACSST2nU5xsPDRZworsDRWAEPxRXSTFm5ExwIVCzfIbIwCC1uRFbNLgsUxFm6cNoU2J2hxa4oNjOgkZHihhE0GKKgUKN54cd7iK3H4mMUbl6gtKN70ELIHvh5mNzZteHB6aIsZCjjCi0vCC/MJLYobXizboeswnFkJMsCGhjDWQQZ0nUZOmHV52J7mcmTZl0WbFFTWlU2L6Fl1oHtVYDF94JcV1axpwIvJmjcuJGqBNsGitbQP/BsFZHGsIYJNijgMx1B0WnZYEFqkAh3w3jMY0z8UzlAEizalm2CbCxsdItcTbwI41JrfOIs67FMesqjzzSBVvpBDyb/IS3fPvPS1662CjX/mrIHD/Nne3Ums41tsF3bdXO6tPZBbkF1w4+vI92k4v7RZeBriX9inQ3Ar6MpyrzcYvNL+nfrlWX9eFcUzpXUuJL7yr0p8e2dKc1yQUfmnnaHysg+tu8GgAz+vpLsvvWe6k7rnH5woKm+ovMYDtLTXTyJvtUz3u+BDa+41vzpZOHWU7/7NNXtyLbXpkEyOgs3uQyvtNT+3jvxMaZJ6oUfwtmq7VIZ0ElTP9kRpnf/u8WTNdRJtgj8a0jRAEf+UqHQaBLgL8WqihqDUBBRz85i5zbRA5rbB+WnLwIKxwxXyvGOg4r93+rDxBwtsmCvKEbeDbyM341PQa+RIHfHUijrnNiQKvXMjktRfZESHSv236DQdmxEvtCZe0OkbdixE3sv5YC2Lhux/Tc/is+//2ff/C2+syKfeWBHfuesvfjn7SIeZ+4hO7iPnYa+ZPCtS51RnnfOi502zsAt+9HfeqvkNAmj3YwbQ83bSZwD9DKCfAfSkKL7Q8n3TAHrePpzut0G2A0fzGUEvR1D1Qp/+V0XQi8ehlHcNoOLVm2yO/+wBs5cPjv1o5HyDQPk9Ij/j5GecPGxn8ximjmI8h73a49li4I2fKSfN2XW2+Y7brvzMOGF4uJVdM5nQK/CxChs8ehYdrwbBc9B03KRrOWyruw9zTSlcxL+bAfrL57TgVu2RFpPTcnOghZ1BpngNLQWjJdyP2RY8wBkP4wMdIT2LzQ4osHPdMAf9251JbYjIAz03PtVUyo+NW8WDBzy/TLf8cWs3cUprFiLNKD/IFczCBj7YNrP/QGI8rz07ymKqqJw+PGNeP+MTz1Ef+aTn9fHIwF3DJzvXz/A4n2bF+KRn/5t1MIep6PXAq1kcZB5rLZmbJZe5/FO5CXr43OVuAHLvY//1LNBdcKUvx77ee570u+gu1TN32d4yE26ztKz+SbnL39KferYLc0l/r5S9wOPxT5LpWOsPu2Xjfw== \ No newline at end of file diff --git a/docs/images/agile_metrics.png b/docs/images/agile_metrics.png new file mode 100644 index 0000000..0467692 Binary files /dev/null and b/docs/images/agile_metrics.png differ diff --git a/docs/images/agile_metrics_cloud.png b/docs/images/agile_metrics_cloud.png new file mode 100644 index 0000000..077bb6a Binary files /dev/null and b/docs/images/agile_metrics_cloud.png differ diff --git a/docs/images/datalens_example.png b/docs/images/datalens_example.png new file mode 100644 index 0000000..7d4b4d4 Binary files /dev/null and b/docs/images/datalens_example.png differ diff --git a/docs/images/logs.png b/docs/images/logs.png new file mode 100644 index 0000000..4b14e37 Binary files /dev/null and b/docs/images/logs.png differ diff --git a/examples/serverless/main.py b/examples/serverless/main.py new file mode 100644 index 0000000..daf86b0 --- /dev/null +++ b/examples/serverless/main.py @@ -0,0 +1,13 @@ +import logging +from tracker_exporter.main import export_cycle_time + +logging.getLogger().setLevel(logging.INFO) + +def handler(event, context): + try: + export_cycle_time(ignore_exceptions=False) + response = {"statusCode": 200, "message": "success"} + except Exception as exc: + response = {"statusCode": 500, "message": exc} + finally: + return response diff --git a/examples/serverless/requirements.txt b/examples/serverless/requirements.txt new file mode 100644 index 0000000..00843fd --- /dev/null +++ b/examples/serverless/requirements.txt @@ -0,0 +1 @@ +tracker-exporter diff --git a/migrations/clickhouse/000001_create_table_issues.down.sql b/migrations/clickhouse/000001_create_table_issues.down.sql new file mode 100644 index 0000000..c66583e --- /dev/null +++ b/migrations/clickhouse/000001_create_table_issues.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS `issues`; diff --git a/migrations/clickhouse/000001_create_table_issues.up.sql b/migrations/clickhouse/000001_create_table_issues.up.sql new file mode 100644 index 0000000..e8f6951 --- /dev/null +++ b/migrations/clickhouse/000001_create_table_issues.up.sql @@ -0,0 +1,39 @@ +CREATE TABLE IF NOT EXISTS `issues` +( + `version` DateTime DEFAULT now(), + + `queue` LowCardinality(String) COMMENT 'Queue key', + `title` String DEFAULT '' COMMENT 'Issue summary', + `issue_key` String COMMENT 'Unique issue key like TEST-1', + `issue_type` LowCardinality(String) COMMENT 'Issue type', + `priority` LowCardinality(String) COMMENT 'Issue priority', + `status` LowCardinality(String) COMMENT 'Last issue status', + `resolution` LowCardinality(String) DEFAULT '' COMMENT 'Issue resolution', + + `assignee` String DEFAULT '' COMMENT 'Issue assignee', + `author` String DEFAULT '' COMMENT 'Issue creator', + `qa_engineer` String DEFAULT '' COMMENT 'QA engineer who conducted the testing', + + `tags` Array(String) COMMENT 'Issue labels', + `components` Array(String) COMMENT 'Issue components', + + `created_at` Date COMMENT 'Issue creation date', + `updated_at` Date COMMENT 'Date of the last update of the issue', + `deadline` Date DEFAULT toDate('1970-01-01') COMMENT 'Deadline for completing the issue', + `closed_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue without resolution, based on custom closing statuses', + `resolved_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue with the resolution', + `start_date` Date DEFAULT toDate('1970-01-01') COMMENT 'Start date (fact, manual field, gantt)', + `end_date` Date DEFAULT toDate('1970-01-01') COMMENT 'End date (fact, manual field, gantt)', + + `is_subtask` UInt8 DEFAULT 0 COMMENT 'Subtask flag', + `is_closed` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (based on custom closing statuses)', + `is_resolved` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (with resolution)', + + `story_points` Float32 DEFAULT 0.0 COMMENT 'Estimating the cost of the issue', + `sprints` Array(String) COMMENT 'Sprints in which the issue participated', + `parent_issue_key` String DEFAULT '' COMMENT 'The key of the parent issue, like TEST-1', + `epic_issue_key` String DEFAULT '' COMMENT 'Epic key, like GOAL-1' +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(updated_at) +ORDER BY issue_key diff --git a/migrations/clickhouse/000002_create_table_issue_metrics.down.sql b/migrations/clickhouse/000002_create_table_issue_metrics.down.sql new file mode 100644 index 0000000..2145c18 --- /dev/null +++ b/migrations/clickhouse/000002_create_table_issue_metrics.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS `issue_metrics`; diff --git a/migrations/clickhouse/000002_create_table_issue_metrics.up.sql b/migrations/clickhouse/000002_create_table_issue_metrics.up.sql new file mode 100644 index 0000000..c18bca1 --- /dev/null +++ b/migrations/clickhouse/000002_create_table_issue_metrics.up.sql @@ -0,0 +1,17 @@ +CREATE TABLE IF NOT EXISTS `issue_metrics` +( + `version` DateTime DEFAULT now(), + `last_seen` DateTime COMMENT 'The date when the issue was last in this status', + + `issue_key` String COMMENT 'Issue key', + `status_name` LowCardinality(String) COMMENT 'Status name', + `status_transitions_count` UInt8 COMMENT 'The number of transitions to this status', + + `duration` UInt32 COMMENT 'Time spent in the status in seconds (for all time)', + `human_readable_duration` String DEFAULT '' COMMENT 'Human - readable format for duration', + `busdays_duration` UInt32 COMMENT 'Time spent in the status in seconds (busdays only)', + `human_readable_busdays_duration` String DEFAULT '' COMMENT 'Human - readable format for busdays_duration' +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(last_seen) +ORDER BY (issue_key, status_name, last_seen) diff --git a/migrations/clickhouse/000003_create_view_issues_view.down.sql b/migrations/clickhouse/000003_create_view_issues_view.down.sql new file mode 100644 index 0000000..2d25829 --- /dev/null +++ b/migrations/clickhouse/000003_create_view_issues_view.down.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS `issues_view`; \ No newline at end of file diff --git a/migrations/clickhouse/000003_create_view_issues_view.up.sql b/migrations/clickhouse/000003_create_view_issues_view.up.sql new file mode 100644 index 0000000..7b2636b --- /dev/null +++ b/migrations/clickhouse/000003_create_view_issues_view.up.sql @@ -0,0 +1,4 @@ +CREATE VIEW IF NOT EXISTS `issues_view` AS +SELECT * +FROM `issues` +FINAL diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..872513a --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,7 @@ +wheel +twine +ansible==2.10.* +molecule==3.6.* +molecule-docker==1.1.* +pytest==7.1.* +pytest-testinfra==6.7.* \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0deb057 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +yandex_tracker_client==2.3 +clickhouse_driver==0.2.* +datadog==0.44.* +apscheduler==3.9.* +requests==2.27.* +pandas==1.3.* +numpy==1.21.* +businesstime==0.3.* +holidays==0.14.* +sentry-sdk==1.6.* +python-dotenv==0.20.* \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..d1329ff --- /dev/null +++ b/setup.cfg @@ -0,0 +1,40 @@ +[metadata] +name = tracker-exporter +description-file = README + +[flake8] +ignore = D203, W503, E722, W605 +extend-ignore = E203 +exclude = + ansible, + scripts, + docs, + migrations, + .git, + .env, + build, + dist, + venv, + .eggs, + tests, + scripts + setup.py, + .example, + .yaml, + .vscode +max-complexity = 15 +max-line-length = 120 + +[pylint.message-control] +disable = + W0511, + C0114, + C0115, + C0116, + W1203, + W0703, + R0903, + C0116, + R0913, + R0902, + R1719 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..793b603 --- /dev/null +++ b/setup.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +from os import path +from setuptools import find_packages, setup + + +def readme(): + with open("README.md", "r") as fh: + long_description = fh.read() + + return long_description + +cwd = path.abspath(path.dirname(__file__)) + + +def metadata(): + meta = {} + with open(path.join(cwd, "tracker_exporter", "__version__.py"), "r") as fh: + exec(fh.read(), meta) + return meta + + +def requirements(): + requirements_list = [] + + with open("requirements.txt") as requirements: + for install in requirements: + requirements_list.append(install.strip()) + + return requirements_list + +metadata = metadata() +readme = readme() +packages = find_packages() +requirements = requirements() + + +def main(): + setup( + name="tracker-exporter", + version=metadata.get("version"), + author=metadata.get("author"), + author_email=metadata.get("author_email"), + license=metadata.get("license"), + description=metadata.get("description"), + long_description=readme, + long_description_content_type="text/markdown", + url=metadata.get("url"), + download_url=metadata.get("download_url"), + keywords=["yandex tracker exporter", "agile", "cycle time"], + platforms=["osx", "linux"], + packages=packages, + classifiers = [ + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], + install_requires=requirements, + include_package_data=True, + python_requires=">=3.7", + entry_points={ + "console_scripts": [ + "tracker-exporter=tracker_exporter.main:main" + ] + }, + zip_safe=False + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tracker_exporter/__init__.py b/tracker_exporter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracker_exporter/__version__.py b/tracker_exporter/__version__.py new file mode 100644 index 0000000..7a2218d --- /dev/null +++ b/tracker_exporter/__version__.py @@ -0,0 +1,9 @@ +# pylint: disable=C0103,W0622 +version = "0.1.17" +url = "https://github.com/akimrx/yandex-tracker-exporter" +download_url = "https://pypi.org/project/tracker-exporter/" +appname = "yandex_tracker_exporter" +description = "Yandex.Tracker issue metrics exporter" +author = "Akim Faskhutdinov" +author_email = "a.faskhutdinov@yclients.tech" +license = "MIT" diff --git a/tracker_exporter/defaults.py b/tracker_exporter/defaults.py new file mode 100644 index 0000000..14b9586 --- /dev/null +++ b/tracker_exporter/defaults.py @@ -0,0 +1,62 @@ +import os +import datetime as dt + +# Common settings +LOGLEVEL = os.environ.get("EXPORTER_LOGLEVEL", "info") +UPLOAD_TO_STORAGE = os.environ.get("EXPORTER_ENABLE_UPLOAD", "false").lower() in ("true", "yes") + +# Business days settings +BUSINESS_HOURS_START = dt.time(9) +BUSINESS_HOURS_END = dt.time(22) +WEEKENDS = (5, 6,) # Monday is 0, Sunday is 6 + +# Monitoring settings +MONITORING_ENABLED = os.environ.get("EXPORTER_MONITORING_ENABLED", "false").lower() in ("true", "yes") +MONITORING_HOST = os.environ.get("EXPORTER_MONITORING_HOST", "localhost") +MONITORING_PORT = os.environ.get("EXPORTER_MONITORING_PORT", 8125) +MONITORING_METRIC_BASE_PREFIX = os.environ.get("MONITORING_METRIC_PREFIX", "tracker_exporter") +MONITORING_BASE_LABELS = [ + "project:internal", +] +SENTRY_ENABLED = os.environ.get("EXPORTER_SENTRY_ENABLED", "false").lower() in ("true", "yes") +SENTRY_DSN = os.environ.get("EXPORTER_SENTRY_DSN") + +# Tracker settings +TRACKER_DEFAULT_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" +TRACKER_BULK_CYCLE_TIME_ISSUES_LIMIT = 1000 +TRACKER_TOKEN = os.environ.get("EXPORTER_TRACKER_TOKEN") +TRACKER_ORG_ID = os.environ.get("EXPORTER_TRACKER_ORG_ID") +TRACKER_ISSUES_UPDATE_INTERVAL = os.environ.get("EXPORTER_TRACKER_ISSUES_FETCH_INTERVAL", 30) # min +TRACKER_ISSUES_SEARCH_QUERY = os.environ.get("EXPORTER_TRACKER_ISSUES_SEARCH_QUERY") +TRACKER_ISSUES_SEARCH_RANGE = os.environ.get("EXPORTER_TRACKER_ISSUES_SEARCH_RANGE", "2h") + +# Clickhouse settings +CLICKHOUSE_HOST = os.environ.get("EXPORTER_CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PROTO = os.environ.get("EXPORTER_CLICKHOUSE_PROTO", "http") +CLICKHOUSE_HTTP_PORT = os.environ.get("EXPORTER_CLICKHOUSE_HTTP_PORT", 8123) +CLICKHOUSE_CACERT_PATH = os.environ.get("EXPORTER_CLICKHOUSE_CERT", None) +CLICKHOUSE_SERVERLESS_PROXY_ID = os.environ.get("EXPORTER_CLICKHOUSE_SERVERLESS_PROXY_ID", None) +CLICKHOUSE_USER = os.environ.get("EXPORTER_CLICKHOUSE_USER", "default") +CLICKHOUSE_PASSWORD = os.environ.get("EXPORTER_CLICKHOUSE_PASSWORD") +CLICKHOUSE_DATABASE = os.environ.get("EXPORTER_CLICKHOUSE_DATABASE", "agile") +CLICKHOUSE_ISSUES_TABLE = os.environ.get("EXPORTER_CLICKHOUSE_ISSUES_TABLE", "issues") +CLICKHOUSE_ISSUE_METRICS_TABLE = os.environ.get("EXPORTER_CLICKHOUSE_ISSUE_METRICS_TABLE", "issue_metrics") + +# Exporter settings +_DEFAULT_CLOSED_ISSUE_STATUSES = "closed,rejected,resolved,cancelled,released" +CLOSED_ISSUE_STATUSES = os.environ.get("EXPORTER_CLOSED_ISSUE_STATUES", _DEFAULT_CLOSED_ISSUE_STATUSES).split(",") +EXCLUDE_QUEUES = ( + "TEST", +) +NOT_NULLABLE_FIELDS = ( + "created_at", + "resolved_at", + "closed_at", + "updated_at", + "released_at", + "deadline", + "start_date", + "end_date", + "start_time", + "end_time", +) diff --git a/tracker_exporter/errors.py b/tracker_exporter/errors.py new file mode 100644 index 0000000..72e0709 --- /dev/null +++ b/tracker_exporter/errors.py @@ -0,0 +1,23 @@ +class TrackerExporterError(Exception): + pass + + +class ClickhouseError(TrackerExporterError): + pass + + +class TrackerError(TrackerExporterError): + pass + + +class NetworkError(TrackerExporterError): + pass + + +class ExportError(TrackerExporterError): + pass + + +class TimedOut(TrackerExporterError): + def __init__(self): + super().__init__("Timed out") diff --git a/tracker_exporter/exporter.py b/tracker_exporter/exporter.py new file mode 100644 index 0000000..15f9372 --- /dev/null +++ b/tracker_exporter/exporter.py @@ -0,0 +1,117 @@ +import time +import logging + +from typing import Union, Tuple, List +from tracker_exporter.services.tracker import YandexTrackerClient +from tracker_exporter.services.clickhouse import ClickhouseClient +from tracker_exporter.services.monitoring import DogStatsdClient +from tracker_exporter.utils.helpers import to_human_time +from tracker_exporter.defaults import ( + CLICKHOUSE_HOST, + CLICKHOUSE_HTTP_PORT, + CLICKHOUSE_USER, + CLICKHOUSE_PASSWORD, + CLICKHOUSE_DATABASE, + CLICKHOUSE_ISSUES_TABLE, + CLICKHOUSE_ISSUE_METRICS_TABLE, + TRACKER_BULK_CYCLE_TIME_ISSUES_LIMIT, + EXCLUDE_QUEUES, + MONITORING_ENABLED, + TRACKER_ORG_ID, + TRACKER_TOKEN, + TRACKER_ISSUES_SEARCH_RANGE, + TRACKER_ISSUES_SEARCH_QUERY +) + +logger = logging.getLogger(__name__) +tracker = YandexTrackerClient(token=TRACKER_TOKEN, org_id=TRACKER_ORG_ID) +monitoring = DogStatsdClient(enabled=MONITORING_ENABLED) + + +class Exporter: + # TODO: configure class instance + # TODO: parse migration from sprint to sprint by changelog (field changed), + # by default exported only last sprint (tracker logic) + def __init__(self): + self.clickhouse = ClickhouseClient( + host=CLICKHOUSE_HOST, + port=CLICKHOUSE_HTTP_PORT, + user=CLICKHOUSE_USER, + password=CLICKHOUSE_PASSWORD + ) + + @monitoring.send_time_metric("issues_processing_time_seconds") + def _bulk_issue_cycle_time(self, + query: str, + limit: int = 50) -> Tuple[List]: + """Collects and transforms metrics for found tasks.""" + issues = [] + metrics = [] + issues_without_metrics = 0 + found_issues = tracker.search_issues(query=query, limit=limit) + logger.info("Prepare cycle time metrics...") + + for tracker_issue in found_issues: + try: + issue, metric = tracker.issue_cycle_time(tracker_issue.key) + if metric is None: + logger.debug(f"Ignore {tracker_issue.key} because metrics is empty") + issues_without_metrics += 1 + issues.append(issue) + else: + issues.append(issue) + for m in metric: # pylint: disable=C0103 + metrics.append(m) + except Exception as exc: + logger.exception(f"Issue {tracker_issue.key} can't be transformed, details: {exc}") + + monitoring.send_gauge_metric("issues_without_metrics", value=issues_without_metrics) + logger.info( + f"Total issues: {len(issues)}, total cycle time metrics: {len(metrics)}, " + f"ignored issues with empty metrics: {issues_without_metrics}" + ) + return issues, metrics + + + def _upload_data_to_storage(self, payload: list, table: str) -> None: + """Inserts a batch of data into the Clickhouse with deduplication.""" + logger.info(f"Inserting batch ({len(payload)} rows) to Clickhouse ({table})...") + self.clickhouse.insert_batch(CLICKHOUSE_DATABASE, table, payload) + + logger.info(f"Optimizing table '{table}' for deduplication...") + self.clickhouse.deduplicate(CLICKHOUSE_DATABASE, table) + + + @monitoring.send_time_metric("cycle_time_total_processing_time_seconds") + def cycle_time(self, + query: str = TRACKER_ISSUES_SEARCH_QUERY, + exclude_queues: Union[list, tuple] = EXCLUDE_QUEUES, + search_range: str = TRACKER_ISSUES_SEARCH_RANGE, + upload: bool = True) -> int: + """Export issues cycle time and upload its to storage.""" + logger.info("Started processing issues...") + if query: + logger.warning("Arguments `excluded_queues`, `search_range` has no effect if a `query` is passed") + queues = ", ".join([f"!{q}" for q in exclude_queues]) + _default_query = f"Queue: {queues} AND Updated: >= now() - {search_range}" + search_query = TRACKER_ISSUES_SEARCH_QUERY or _default_query + start_time = time.time() + + issues, metrics = self._bulk_issue_cycle_time( + search_query, + limit=TRACKER_BULK_CYCLE_TIME_ISSUES_LIMIT + ) + + if upload: + self._upload_data_to_storage(issues, table=CLICKHOUSE_ISSUES_TABLE) + self._upload_data_to_storage(metrics, table=CLICKHOUSE_ISSUE_METRICS_TABLE) + else: + logger.debug("Upload to Clickhouse is disabled") + + elapsed_time = time.time() - start_time + logger.info( + f"Processing issues completed. Elapsed time: {to_human_time(elapsed_time)}, " + f"total tasks processed: {len(issues)}" + ) + + return len(issues) if upload else 0 diff --git a/tracker_exporter/main.py b/tracker_exporter/main.py new file mode 100644 index 0000000..c4e866e --- /dev/null +++ b/tracker_exporter/main.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +import os +import sys +import time +import signal +import logging +import warnings +import argparse + +from datetime import datetime, timedelta +from dotenv import load_dotenv + +import sentry_sdk +from apscheduler.schedulers.background import BackgroundScheduler + +parser = argparse.ArgumentParser("tracker-exporter") +parser.add_argument( + "-e", "--env-file", + metavar="file", + dest="env_file", + type=str, + required=False, + help="Path to .env file" +) +args = parser.parse_args() +load_dotenv(args.env_file) +warnings.filterwarnings("ignore") + +# pylint: disable=C0413 +from .errors import ExportError +from .services.monitoring import DogStatsdClient, sentry_events_filter +from .exporter import Exporter +from .__version__ import appname, version +from .defaults import ( + EXCLUDE_QUEUES, + LOGLEVEL, + UPLOAD_TO_STORAGE, + TRACKER_ISSUES_UPDATE_INTERVAL, + SENTRY_ENABLED, + SENTRY_DSN, +) + +logging.basicConfig( + level=LOGLEVEL.upper(), + datefmt="%Y-%m-%d %H:%M:%S", + format="%(asctime)s [%(levelname)s] [%(name)s.%(funcName)s] %(message)s" +) +logging.getLogger("yandex_tracker_client").setLevel(logging.WARNING) +logger = logging.getLogger(__name__) +scheduler = BackgroundScheduler() +monitoring = DogStatsdClient() +exporter = Exporter() +logger.debug(f"Environment: {os.environ.items()}") + + +def signal_handler(sig, frame) -> None: # pylint: disable=W0613 + if sig == signal.SIGINT: + logger.warning("Received SIGINT, graceful shutdown...") + scheduler.shutdown() + sys.exit(0) + + +def configure_sentry() -> None: + if SENTRY_ENABLED: + assert SENTRY_DSN is not None + sentry_sdk.init( + dsn=SENTRY_DSN, + traces_sample_rate=1.0, + release=f"{appname}@{version}", + before_send=sentry_events_filter + ) + logger.info(f"Sentry send traces is {'enabled' if SENTRY_ENABLED else 'disabled'}") + + +def export_cycle_time(exclude_queues: str = EXCLUDE_QUEUES, + upload: bool = UPLOAD_TO_STORAGE, + ignore_exceptions: bool = True) -> None: + try: + inserted_rows = exporter.cycle_time(exclude_queues=exclude_queues, upload=upload) + if inserted_rows > 0: + monitoring.send_gauge_metric("last_update_timestamp", value=int(time.time())) + monitoring.send_gauge_metric("upload_status", value=1) + except Exception as exc: + monitoring.send_gauge_metric("upload_status", value=2) + logger.exception(f"Something error occured: {exc}") + if not ignore_exceptions: + raise ExportError(exc) from exc + + +def main() -> None: + configure_sentry() + signal.signal(signal.SIGINT, signal_handler) + scheduler.start() + scheduler.add_job( + export_cycle_time, + trigger="interval", + name="issues_cycle_time_exporter", + minutes=int(TRACKER_ISSUES_UPDATE_INTERVAL), + max_instances=1, + next_run_time=datetime.now() + timedelta(seconds=5) + ) + signal.pause() + + +if __name__ == "__main__": + main() diff --git a/tracker_exporter/models/__init__.py b/tracker_exporter/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracker_exporter/models/base.py b/tracker_exporter/models/base.py new file mode 100644 index 0000000..f0d0184 --- /dev/null +++ b/tracker_exporter/models/base.py @@ -0,0 +1,44 @@ +import json +from abc import ABCMeta + + +class Base: + """Base class for objects.""" + + __metaclass__ = ABCMeta + + def __str__(self) -> str: + return str(self.to_dict()) + + def __repr__(self) -> str: + return str(self) + + def __getitem__(self, item): + return self.__dict__[item] + + @classmethod + def de_json(cls, data) -> dict: + """Deserialize object.""" + if not data: + return None + + data = data.copy() + return data + + def to_json(self) -> dict: + """Serialize object to json.""" + return json.dumps(self.to_dict()) + + def to_dict(self) -> dict: + """Recursive serialize object.""" + null_cleaner = lambda value: "" if value is None else value # pylint: disable=C3001 + + def parse(val): + if isinstance(val, list): + return [parse(it) for it in val] + if isinstance(val, dict): + return {key: null_cleaner(parse(value)) for key, value in val.items() if not key.startswith("_")} + return val + + data = self.__dict__.copy() + return parse(data) diff --git a/tracker_exporter/models/enums.py b/tracker_exporter/models/enums.py new file mode 100644 index 0000000..bea1b11 --- /dev/null +++ b/tracker_exporter/models/enums.py @@ -0,0 +1,21 @@ +class TrackerChangelogEvents: + ISSUE_WORKFLOW = "IssueWorkflow" + + +class TrackerWorkflowTypes: + TRANSITION = "status" + RESOLVE_ISSUE = "resolution" + + +class YandexTrackerLanguages: + RU = "ru" + EN = "en" + + +class TimeDeltaOut: + SECONDS = "seconds" + MINUTES = "minutes" + +class ClickhouseProto: + HTTPS = "https" + HTTP = "http" diff --git a/tracker_exporter/models/issue.py b/tracker_exporter/models/issue.py new file mode 100644 index 0000000..e2c3c43 --- /dev/null +++ b/tracker_exporter/models/issue.py @@ -0,0 +1,142 @@ +import logging + +from tracker_exporter.models.base import Base +from tracker_exporter.models.enums import ( + TrackerChangelogEvents, + TrackerWorkflowTypes, +) +from tracker_exporter.services.monitoring import DogStatsdClient +from tracker_exporter.utils.helpers import ( + calculate_time_spent, + string_normalize, + validate_resource, + to_simple_datetime, + to_snake_case, + to_human_time, +) +from tracker_exporter.defaults import CLOSED_ISSUE_STATUSES + +logger = logging.getLogger(__name__) +monitoring = DogStatsdClient() + + +class TrackerIssueMetric(Base): + """This object represents a issue metrics for TrackerIssue object.""" + def __init__(self, + issue_key: str, + status_name: str, + status_transitions_count: int, + duration: int, + busdays_duration: int, + last_seen: str): + + self.issue_key = issue_key + self.status_name = status_name + self.status_transitions_count = status_transitions_count + self.duration = duration + self.human_readable_duration = to_human_time(self.duration) + self.busdays_duration = busdays_duration + self.human_readable_busdays_duration = to_human_time(self.busdays_duration) + self.last_seen = last_seen + + +class TrackerIssue(Base): + """This object represents a issue from Yandex.Tracker.""" + + def __init__(self, issue: object) -> None: + self._issue = issue + self._transform(self._issue) + + def _transform(self, issue: object) -> None: + """Formation of a task object based on its metadata.""" + self.queue = issue.queue.key + self.issue_key = issue.key + self.title = string_normalize(issue.summary) + self.issue_type = to_snake_case(validate_resource(issue.type, "name")) + self.priority = validate_resource(issue.priority, "name") + self.assignee = validate_resource(issue.assignee, "email") + self.author = validate_resource(issue.createdBy, "email") + self.status = to_snake_case(validate_resource(issue.status, "name")) + self.resolution = to_snake_case(validate_resource(issue.resolution, "name")) + self.tags = issue.tags or [] + self.components = [c.name for c in issue.components if issue.components] + self.created_at = to_simple_datetime(issue.createdAt, date_only=True) + self.updated_at = to_simple_datetime(issue.updatedAt, date_only=True) + self.deadline = validate_resource(issue, "deadline") + self.resolved_at = to_simple_datetime(issue.resolvedAt, date_only=True) + self.start_date = validate_resource(issue, "start") + self.end_date = validate_resource(issue, "end") + self.story_points = validate_resource(issue, "storyPoints") or 0 + self.sprints = [s.name for s in issue.sprint if issue.sprint] + self.parent_issue_key = validate_resource(issue.parent, "key", low=False) + self.epic_issue_key = validate_resource(issue.epic, "key", low=False) + self.is_subtask = True if any((self.parent_issue_key,)) else False + self.is_closed = True if self.status in CLOSED_ISSUE_STATUSES else False + self.is_resolved = True if self.resolution is not None else False + self.qa_engineer = validate_resource(issue.qaEngineer, "email") + + @monitoring.send_count_metric("issues_total_processed_count", 1, tags=["source:issues"]) + def metrics(self) -> list: + """ + All metrics are based on status change events in the task history. + The method has the ability to filter only the necessary statuses + passed in the argument. + + The metric of being in the status is considered + only after the end of being in the calculated status. + + For example, the task has moved from the status "Open" + to the status "In progress", in this case only the metric + for "Open" will be considered. + As soon as the status "In progress" is changed to any other, + it will be calculated as a metric for "In progress". + + In other words, the current status of the task will not be + calculated. + """ + metrics_storage = {} + + for event in self._issue.changelog: + if event.type == TrackerChangelogEvents.ISSUE_WORKFLOW: + logger.debug(f"Issue workflow fields found: {event.fields}") + worklow_type = event.fields[0].get("field").id + # Keep only status transition events, drop otherwise + if worklow_type != TrackerWorkflowTypes.TRANSITION: + logger.debug(f"Skipping {event.fields[0].get('field').id} for {self.issue_key}") + continue + + status = to_snake_case(event.fields[0].get("from").name.lower()) + event_start_time = event.fields[1].get("from") + event_end_time = event.fields[1].get("to") + + # Custom logic for calculating the completion date of the task, + # because not everyone uses resolutions, sadly + transition_status = to_snake_case(event.fields[0].get("to").name.lower()) + if transition_status in CLOSED_ISSUE_STATUSES and self.status in CLOSED_ISSUE_STATUSES: + self.closed_at = to_simple_datetime(event_end_time, date_only=True) # pylint: disable=W0201 + + if event_start_time is None or event_end_time is None: + continue + + # Calculation of the time spent in the status + start_time = to_simple_datetime(event_start_time) + end_time = to_simple_datetime(event_end_time) + total_status_time = calculate_time_spent(start_time, end_time) + busdays_status_time = calculate_time_spent(start_time, end_time, busdays_only=True) + try: + metrics_storage[status]["duration"] += total_status_time + metrics_storage[status]["busdays_duration"] += busdays_status_time + metrics_storage[status]["status_transitions_count"] += 1 + except (KeyError, AttributeError): + metrics_storage[status] = dict( + issue_key=self.issue_key, + status_name=status, + status_transitions_count=1, + duration=total_status_time, + busdays_duration=busdays_status_time, + last_seen=to_simple_datetime(event_end_time) + ) + + logger.debug(f"Metrics for {self.issue_key}: {metrics_storage}") + metrics = [TrackerIssueMetric(**metric) for _, metric in metrics_storage.items()] + return metrics diff --git a/tracker_exporter/services/__init__.py b/tracker_exporter/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracker_exporter/services/clickhouse.py b/tracker_exporter/services/clickhouse.py new file mode 100644 index 0000000..adaec02 --- /dev/null +++ b/tracker_exporter/services/clickhouse.py @@ -0,0 +1,114 @@ +import json +import logging + +from typing import List, Dict, Union + +from requests import Response +import requests + +from tracker_exporter.errors import ClickhouseError, NetworkError, TimedOut +from tracker_exporter.utils.helpers import retry +from tracker_exporter.services.monitoring import DogStatsdClient +from tracker_exporter.models.enums import ClickhouseProto +from tracker_exporter.defaults import ( + CLICKHOUSE_PROTO, + CLICKHOUSE_CACERT_PATH, + CLICKHOUSE_SERVERLESS_PROXY_ID +) + +logger = logging.getLogger(__name__) +monitoring = DogStatsdClient() + + +class ClickhouseClient: + """This class provide simple facade interface for Clickhouse.""" + + def __init__(self, # pylint: disable=W0102 + host: str, + port: int = 8123, + user: str = "default", + password: str = None, + proto: ClickhouseProto = CLICKHOUSE_PROTO, + cacert: str = CLICKHOUSE_CACERT_PATH, + serverless_proxy_id: str = CLICKHOUSE_SERVERLESS_PROXY_ID, + params: dict = {}, + http_timeout: int = 10) -> None: + + self.host = host + self.port = port + self.user = user + self.password = password + self.proto = proto + self.cacert = cacert + self.serverless_proxy_id = serverless_proxy_id + self.params = params + self.timeout = int(http_timeout) + self.headers = {"Content-Type": "application/json"} + + if self.proto == ClickhouseProto.HTTPS: + assert self.cacert is not None + + @retry((NetworkError, TimedOut)) + def execute(self, query: str) -> Union[None, Response]: + url = f"{self.proto}://{self.host}:{self.port}" + + if self.proto != ClickhouseProto.HTTPS: + url += f"?user={self.user}" + if self.password is not None: + url += f"&password={self.password}" + else: + self.headers["X-Clickhouse-User"] = self.user + self.headers["X-Clickhouse-Key"] = self.password + + if self.serverless_proxy_id: + self.params["database"] = self.serverless_proxy_id + + if self.params: + params = "&".join([f"{k}={v}" for k, v in self.params.items()]) + url += f"&{params}" if self.proto != ClickhouseProto.HTTPS else f"?{params}" + + try: + if self.proto == ClickhouseProto.HTTPS: + response = requests.post( + url=url, headers=self.headers, data=query, + timeout=self.timeout, verify=self.cacert + ) + else: + response = requests.post( + url=url, headers=self.headers, data=query, timeout=self.timeout + ) + except requests.Timeout as exc: + raise TimedOut() from exc + except requests.ConnectionError as exc: + raise NetworkError(exc) from exc + except Exception as exc: + logger.exception( + f"Could not execute query in Clickhouse: {exc}" + ) + raise ClickhouseError(exc) from exc + else: + if not response.ok: + msg = ( + f"Could not execute query in Clickhouse. " + f"Status: {response.status_code}. {response.text}" + ) + logger.error(msg) + raise ClickhouseError(msg) + return response + + # TODO: add sort by partition key (i.e. `updated_at`) for best insert perfomance + @monitoring.send_time_metric("clickhouse_insert_time_seconds") + def insert_batch(self, database: str, table: str, payload: List[Dict]) -> Union[None, Response]: + if not isinstance(payload, list): + raise ClickhouseError("Payload must be list") + + _tags = [f"database:{database}", f"table:{table}"] + data = " ".join([json.dumps(row) for row in payload]) + logger.debug(f"Inserting batch: {data}") + query_result = self.execute(f"INSERT INTO {database}.{table} FORMAT JSONEachRow {data}") + monitoring.send_gauge_metric("clickhouse_inserted_rows", len(payload), _tags) + return query_result + + @monitoring.send_time_metric("clickhouse_deduplicate_time_seconds") + def deduplicate(self, database: str, table: str) -> Union[None, Response]: + return self.execute(f"OPTIMIZE TABLE {database}.{table} FINAL") diff --git a/tracker_exporter/services/monitoring.py b/tracker_exporter/services/monitoring.py new file mode 100644 index 0000000..f197ae4 --- /dev/null +++ b/tracker_exporter/services/monitoring.py @@ -0,0 +1,102 @@ +# pylint: disable=W0102 +import logging + +from functools import wraps +from datadog import DogStatsd + +from yandex_tracker_client.exceptions import ( + TrackerError, + TrackerServerError, + TrackerRequestError, + TrackerClientError, +) +from tracker_exporter.defaults import ( + MONITORING_METRIC_BASE_PREFIX, + MONITORING_HOST, + MONITORING_PORT, + MONITORING_BASE_LABELS, + MONITORING_ENABLED +) + +logger = logging.getLogger(__name__) + + +class DogStatsdClient: + """This class represents interface for DataDog statsd UDP client.""" + + def __init__(self, + host: str = MONITORING_HOST, + port: int = MONITORING_PORT, + base_labels: list = MONITORING_BASE_LABELS, # pylint: disable=W0102 + metric_name_prefix: str = MONITORING_METRIC_BASE_PREFIX, + use_ms: bool = True, + enabled: bool = MONITORING_ENABLED): + + self.host = host + self.port = int(port) + self.base_labels = base_labels + self.prefix = metric_name_prefix + self._enabled = enabled + self.client = DogStatsd( + host=self.host, + port=self.port, + use_ms=use_ms, + constant_tags=self.base_labels + ) + if self._enabled: + assert self.host is not None + assert self.port is not None + logger.info(f"Monitoring send metrics is {'enabled' if self._enabled else 'disabled'}") + + def send_count_metric(self, + name: str, + value: int, + tags: list = []): + metric = f"{self.prefix}_{name}" + def metric_wrapper(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not self._enabled: + return func(*args, **kwargs) + self.client.increment(metric, value, tags=tags) + logger.debug(f"Success sent count metric: {metric}") + return func(*args, **kwargs) + return wrapper + return metric_wrapper + + def send_gauge_metric(self, + name: str, + value: int, + tags: list = []): + if not self._enabled: + return + metric = f"{self.prefix}_{name}" + self.client.gauge(metric, value, tags=tags) + logger.debug(f"Success sent gauge metric: {metric}") + + def send_time_metric(self, + name: str, + tags: list = []): + metric = f"{self.prefix}_{name}" + def metric_wrapper(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not self._enabled: + return func(*args, **kwargs) + with self.client.timed(metric, tags=tags): + return func(*args, **kwargs) + logger.debug(f"Success start time metric: {metric}") + return wrapper + return metric_wrapper + + +def sentry_events_filter(event, hint): # pylint: disable=R1710 + # Drop all events without exception trace + if "exc_info" not in hint: + return + + exception = hint["exc_info"][1] + if isinstance(exception, (TrackerError, TrackerClientError, TrackerRequestError, TrackerServerError)): + event["fingerprint"] = ["tracker-error"] + + return event diff --git a/tracker_exporter/services/tracker.py b/tracker_exporter/services/tracker.py new file mode 100644 index 0000000..af92657 --- /dev/null +++ b/tracker_exporter/services/tracker.py @@ -0,0 +1,57 @@ +import logging + +from typing import List, Tuple +from yandex_tracker_client import TrackerClient +from yandex_tracker_client.collections import Issues, IssueComments + +from tracker_exporter.models.issue import TrackerIssue +from tracker_exporter.models.enums import YandexTrackerLanguages +from tracker_exporter.utils.helpers import fix_null_dates +from tracker_exporter.services.monitoring import DogStatsdClient +from tracker_exporter.errors import TrackerError + +logger = logging.getLogger(__name__) +monitoring = DogStatsdClient() + + +class YandexTrackerClient: + """This class provide simple facade interface for Yandex.Tracker.""" + + def __init__(self, + org_id: str, + token: str, + lang: YandexTrackerLanguages = YandexTrackerLanguages.EN): + + self.token = token + self.org_id = str(org_id) + self.lang = lang + + if self.lang.lower() not in ("en", "ru"): + raise TrackerError("Tracker client language must be 'en' or 'ru'") + + self.client = TrackerClient( + token=self.token, + org_id=self.org_id, + headers={"Accept-Language": self.lang} + ) + + def get_issue(self, issue_key: str) -> Issues: + return self.client.issues[issue_key] + + def get_comments(self, issue_key: str) -> IssueComments: + return self.client.issues[issue_key].comments.get_all() + + @monitoring.send_time_metric("issues_search_time_seconds") + def search_issues(self, query: str, limit: int = 100) -> List[Issues]: + found_issues = self.client.issues.find(query, per_page=limit) + logger.info(f"Found {len(found_issues)} issues by query '{query}'") + return found_issues + + @monitoring.send_time_metric("issue_transform_time_seconds") + def issue_cycle_time(self, issue_key: str) -> Tuple[List[dict]]: + issue = TrackerIssue(self.get_issue(issue_key)) + metrics = issue.metrics() + + if not metrics: + return fix_null_dates(issue.to_dict()), None + return fix_null_dates(issue.to_dict()), [m.to_dict() for m in metrics] diff --git a/tracker_exporter/utils/__init__.py b/tracker_exporter/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracker_exporter/utils/helpers.py b/tracker_exporter/utils/helpers.py new file mode 100644 index 0000000..7b26237 --- /dev/null +++ b/tracker_exporter/utils/helpers.py @@ -0,0 +1,189 @@ +import re +import time +import logging + +from functools import wraps +from typing import Union, Tuple +from datetime import datetime + +import holidays +import pandas as pd + +from businesstime import BusinessTime +from tracker_exporter.models.enums import TimeDeltaOut +from tracker_exporter.defaults import ( + NOT_NULLABLE_FIELDS, + WEEKENDS, + BUSINESS_HOURS_START, + BUSINESS_HOURS_END, + TRACKER_DEFAULT_DATETIME_FORMAT +) + +logger = logging.getLogger(__name__) + + +def get_timedelta(end_time: datetime, + start_time: datetime, + out: TimeDeltaOut = TimeDeltaOut.SECONDS) -> int: + """Simple timedelta between dates.""" + assert isinstance(start_time, datetime) + assert isinstance(end_time, datetime) + + delta = int((end_time - start_time).total_seconds()) + if out == TimeDeltaOut.MINUTES: + return delta // 60 + if out == TimeDeltaOut.SECONDS: + return delta + return delta + + +def calculate_time_spent(start_date: datetime, + end_date: datetime, + busdays_only: bool = False, + weekends: Tuple[int] = WEEKENDS, + business_hours: Tuple = (BUSINESS_HOURS_START, BUSINESS_HOURS_END,)) -> int: + """ + Calculate timedelta between dates with business days support. + Weekdays: Monday is 0, Sunday is 6, so weekends (5, 6) mean (Sat, Sun). + """ + if not isinstance(start_date, datetime): + start_date = pd.to_datetime(start_date) + if not isinstance(end_date, datetime): + end_date = pd.to_datetime(end_date) + + if busdays_only: + bt = BusinessTime(business_hours=business_hours, weekends=weekends, holidays=holidays.RU()) # pylint: disable=C0103 + result = bt.businesstimedelta(start_date, end_date).total_seconds() + else: + result = (end_date - start_date).total_seconds() + + return abs(int(result)) + + +def fix_null_dates(data: dict) -> dict: + to_remove = [] + + for key, value in data.items(): + if key in NOT_NULLABLE_FIELDS and (value is None or value == ""): + to_remove.append(key) + + for key in to_remove: + del data[key] + + return data + + +# pylint: disable=R1710 +def validate_resource(resource: object, + attribute: str, + low: bool = True) -> Union[str, list, bool, int, None]: + """Validate Yandex.Tracker object attribute and return it if exists as string.""" + if hasattr(resource, attribute): + _attr = getattr(resource, attribute) + if isinstance(_attr, str): + if low: + return _attr.lower() + return _attr + return _attr + + +def to_snake_case(text: str) -> str: + """Convert any string to `snake_case` format.""" + if text is None or text == "": + return text + + text = re.sub(r"('|\")", "", text) + string = re.sub(r"(_|-)+", " ", text).title().replace(" ", "") + output = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', string) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', output).lower() + + +def to_simple_datetime(dtime: str, + source_dt_format: str = TRACKER_DEFAULT_DATETIME_FORMAT, + date_only: bool = False, + shift: int = 3) -> str: + """Return (Unicode) date format `YYYY-MM-DD HH:mm:ss` or `YYYY-MM-DD` if `date_only`.""" + if dtime is None: + logger.debug("dtime is empty, can't transform date to simple string.") + return + + if date_only: + fmt = "%Y-%m-%d" + else: + fmt = "%Y-%m-%d %H:%M:%S" + + timestamp = time.mktime(datetime.strptime(dtime.split(".")[0], source_dt_format).timetuple()) + if shift > 0: + timestamp += 60 * 60 * shift + elif shift < 0: + timestamp -= 60 * 60 * shift + return datetime.fromtimestamp(int(timestamp)).strftime(fmt) + + +def retry(exceptions: tuple, tries: int = 3, delay: Union[float, int] = 1, backoff: int = 3): + """Decorator to retry the execution of the func, if you have received the errors listed.""" + def retry_decorator(func): + @wraps(func) + def func_retry(*args, **kwargs): + mtries, mdelay = tries, delay + counter = 0 + while mtries > 0: + try: + counter += 1 + return func(*args, **kwargs) + except exceptions as err: + mtries -= 1 + if mtries == 0: + logger.warning(f"{func.__name__} has failed {counter} times") + raise err + logger.warning( + f"Error in func {func.__name__}, cause: {err}. " + f"Retrying ({counter}) in {mdelay} seconds..." + ) + time.sleep(mdelay) + mdelay *= backoff + return func_retry + return retry_decorator + + +def to_human_time(seconds: Union[int, float], verbosity: int = 2) -> str: + """Convert seconds to human readable timedelta + like a `2w 3d 1h 20m` + """ + seconds = int(seconds) + if seconds == 0: + return "0s" + + negative = False + if seconds < 0: + negative = True + seconds = abs(seconds) + + result = [] + intervals = ( + ("y", 31104000), + ("mo", 2592000), + ("w", 604800), + ("d", 86400), + ("h", 3600), + ("m", 60), + ("s", 1), + ) + for name, count in intervals: + value = seconds // count + if value: + seconds -= value * count + result.append(f"{value}{name}") + delta = " ".join(result[:verbosity]) + return f"-{delta}" if negative else delta + + +def string_normalize(text: str) -> str: + """Remove all incompatible symbols.""" + emoji_pattern = re.compile("[" + u"\U0001F600-\U0001F64F" # emoticons + u"\U0001F300-\U0001F5FF" # symbols & pictographs + u"\U0001F680-\U0001F6FF" # transport & map symbols + u"\U0001F1E0-\U0001F1FF" # flags (iOS) + "]+", flags=re.UNICODE) + return emoji_pattern.sub(r"", text)