Skip to content

Commit

Permalink
Dependencies: Minimize dependencies of core installation
Browse files Browse the repository at this point in the history
- Defer `polars` to `cratedb-toolkit[io]` and `cratedb-toolkit[cfr]`
- Add dedicated testing slot for CrateDB CFR
  • Loading branch information
amotl committed Nov 5, 2024
1 parent eae9593 commit 2baa480
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 6 deletions.
65 changes: 65 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,71 @@ jobs:
fail_ci_if_error: true


tests-cfr:

runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.9", "3.12"]

env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
# Do not tear down Testcontainers
TC_KEEPALIVE: true

# https://docs.github.com/en/actions/using-containerized-services/about-service-containers
services:
cratedb:
image: crate/crate:nightly
ports:
- 4200:4200
- 5432:5432

name: "
CFR:
Python ${{ matrix.python-version }} on OS ${{ matrix.os }}"
steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: 'pyproject.toml'

- name: Set up project
run: |
# `setuptools 0.64.0` adds support for editable install hooks (PEP 660).
# https://github.com/pypa/setuptools/blob/main/CHANGES.rst#v6400
pip install "setuptools>=64" --upgrade
# Install package in editable mode.
pip install --use-pep517 --prefer-binary --editable=.[cfr,test,develop]
- name: Run linter and software tests
run: |
pytest -m cfr
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
files: ./coverage.xml
flags: cfr
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: true


tests-pymongo:

runs-on: ${{ matrix.os }}
Expand Down
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Changelog

## Unreleased
- Dependencies: Minimize dependencies of core installation,
defer `polars` to `cratedb-toolkit[io]`.

## 2024/10/13 v0.0.29
- MongoDB: Added Zyp transformations to the CDC subsystem,
Expand Down
16 changes: 11 additions & 5 deletions cratedb_toolkit/cfr/systable.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import typing as t
from pathlib import Path

import polars as pl
if t.TYPE_CHECKING:
import polars as pl

Check warning on line 27 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L27

Added line #L27 was not covered by tests

import sqlalchemy as sa
from tqdm import tqdm

Expand Down Expand Up @@ -125,7 +127,9 @@ def __init__(self, dburi: str, target: t.Union[Path], data_format: DataFormat =
self.info = InfoContainer(adapter=self.adapter)
self.inspector = SystemTableInspector(dburi=self.dburi)

def read_table(self, tablename: str) -> pl.DataFrame:
def read_table(self, tablename: str) -> "pl.DataFrame":
import polars as pl

sql = f'SELECT * FROM "{SystemTableKnowledge.SYS_SCHEMA}"."{tablename}"' # noqa: S608
logger.debug(f"Running SQL: {sql}")
return pl.read_database(
Expand All @@ -134,7 +138,7 @@ def read_table(self, tablename: str) -> pl.DataFrame:
infer_schema_length=1000,
)

def dump_table(self, frame: pl.DataFrame, file: t.Union[t.TextIO, None] = None):
def dump_table(self, frame: "pl.DataFrame", file: t.Union[t.TextIO, None] = None):
if self.data_format == "csv":
# polars.exceptions.ComputeError: CSV format does not support nested data
# return df.write_csv() # noqa: ERA001
Expand Down Expand Up @@ -235,15 +239,17 @@ def load(self):

# Load data.
try:
df: pl.DataFrame = self.load_table(path_table_data)
df: "pl.DataFrame" = self.load_table(path_table_data)
df.write_database(table_name=tablename_restored, connection=self.dburi, if_table_exists="append")
except Exception as ex:
error_logger(self.debug)(f"Importing table failed: {tablename}. Reason: {ex}")

logger.info(f"Successfully imported {table_count} system tables")
# df.to_pandas().to_sql(name=tablename, con=self.adapter.engine, if_exists="append", index=False) # noqa: ERA001, E501

def load_table(self, path: Path) -> pl.DataFrame:
def load_table(self, path: Path) -> "pl.DataFrame":
import polars as pl

if path.suffix in [".jsonl"]:
return pl.read_ndjson(path)
elif path.suffix in [".parquet", ".pq"]:
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ dependencies = [
'importlib-metadata; python_version < "3.8"',
'importlib-resources; python_version < "3.9"',
"orjsonl<2",
"polars<1.11",
"pympler<1.2",
"python-dateutil<3",
"python-dotenv<2",
Expand All @@ -115,6 +114,7 @@ all = [
]
cfr = [
"pandas<2.3",
"polars<1.11",
"pyarrow<18.1",
]
cloud = [
Expand Down Expand Up @@ -157,6 +157,7 @@ io = [
"dask[dataframe]>=2020",
"fsspec[s3,http]",
"pandas<2.3,>=1",
"polars<1.11",
"sqlalchemy>=2",
"universal-pathlib<0.3",
]
Expand Down Expand Up @@ -268,6 +269,7 @@ testpaths = [
]
xfail_strict = true
markers = [
"cfr",
"examples",
"dynamodb",
"influxdb",
Expand Down
7 changes: 7 additions & 0 deletions tests/cfr/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# ruff: noqa: E402
import json
import os.path
import re
import shutil
import sys
import tarfile

import pytest

pymongo = pytest.importorskip("polars", reason="Skipping tests because polars is not installed")

import tests

if sys.version_info < (3, 9):
Expand All @@ -17,6 +22,8 @@

from cratedb_toolkit.cfr.cli import cli

pytestmark = pytest.mark.cfr


def filenames(path: Path):
return sorted([item.name for item in path.iterdir()])
Expand Down

0 comments on commit 2baa480

Please sign in to comment.