Skip to content

Commit

Permalink
Upgrade deltalake and reduce chunk size
Browse files Browse the repository at this point in the history
  • Loading branch information
Gilbert09 committed Dec 19, 2024
1 parent df3ea8d commit 62fd630
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(
self,
engine: Engine,
table: Table,
chunk_size: int = 1000,
chunk_size: int = DEFAULT_CHUNK_SIZE,
incremental: Optional[dlt.sources.incremental[Any]] = None,
connect_args: Optional[list[str]] = None,
db_incremental_field_last_value: Optional[Any] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dlt.sources.credentials import ConnectionStringCredentials

from posthog.settings.utils import get_from_env
from posthog.temporal.data_imports.pipelines.sql_database_v2.settings import DEFAULT_CHUNK_SIZE
from posthog.temporal.data_imports.pipelines.sql_database_v2._json import BigQueryJSON
from posthog.utils import str_to_bool
from posthog.warehouse.models import ExternalDataSource
Expand Down Expand Up @@ -252,7 +253,7 @@ def sql_database(
schema: Optional[str] = dlt.config.value,
metadata: Optional[MetaData] = None,
table_names: Optional[list[str]] = dlt.config.value,
chunk_size: int = 50000,
chunk_size: int = DEFAULT_CHUNK_SIZE,
backend: TableBackend = "pyarrow",
detect_precision_hints: Optional[bool] = False,
reflection_level: Optional[ReflectionLevel] = "full",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from dlt.sources.credentials import ConnectionStringCredentials

from posthog.temporal.data_imports.pipelines.sql_database_v2.settings import DEFAULT_CHUNK_SIZE

from .arrow_helpers import row_tuples_to_arrow
from .schema_types import (
default_table_adapter,
Expand All @@ -44,7 +46,7 @@ def __init__(
backend: TableBackend,
table: Table,
columns: TTableSchemaColumns,
chunk_size: int = 1000,
chunk_size: int = DEFAULT_CHUNK_SIZE,
incremental: Optional[dlt.sources.incremental[Any]] = None,
db_incremental_field_last_value: Optional[Any] = None,
query_adapter_callback: Optional[TQueryAdapter] = None,
Expand Down Expand Up @@ -302,7 +304,7 @@ class SqlTableResourceConfiguration(BaseConfiguration):
table: Optional[str] = None
schema: Optional[str] = None
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
chunk_size: int = 50000
chunk_size: int = DEFAULT_CHUNK_SIZE
backend: TableBackend = "sqlalchemy"
detect_precision_hints: Optional[bool] = None
defer_table_reflect: Optional[bool] = False
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DEFAULT_CHUNK_SIZE = 10_000
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ clickhouse-driver==0.2.7
clickhouse-pool==0.5.3
conditional-cache==1.2
cryptography==39.0.2
deltalake==0.22.3
dj-database-url==0.5.0
Django~=4.2.15
django-axes==5.9.0
Expand All @@ -34,7 +35,6 @@ djangorestframework==3.15.1
djangorestframework-csv==2.1.1
djangorestframework-dataclasses==1.2.0
dlt==1.3.0
dlt[deltalake]==1.3.0
dnspython==2.2.1
drf-exceptions-hog==0.4.0
drf-extensions==0.7.0
Expand Down
7 changes: 2 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ defusedxml==0.6.0
# via
# python3-openid
# social-auth-core
deltalake==0.19.1
# via dlt
deltalake==0.22.3
# via -r requirements.in
distro==1.9.0
# via openai
dj-database-url==0.5.0
Expand Down Expand Up @@ -273,8 +273,6 @@ googleapis-common-protos==1.60.0
# via
# google-api-core
# grpcio-status
greenlet==3.1.1
# via sqlalchemy
grpcio==1.63.2
# via
# -r requirements.in
Expand Down Expand Up @@ -505,7 +503,6 @@ pyarrow==17.0.0
# via
# -r requirements.in
# deltalake
# dlt
# sqlalchemy-bigquery
pyasn1==0.5.0
# via
Expand Down

0 comments on commit 62fd630

Please sign in to comment.