Skip to content

Commit

Permalink
Fix issues with duckdb local run (#2493)
Browse files Browse the repository at this point in the history
* Fix issues with duckdb local run

* Add additional test

* Improve test docs
  • Loading branch information
ravenac95 authored Nov 21, 2024
1 parent 568b98b commit 87480be
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
test_developer_active_days_to_artifact_over_60_day_window_with_cumulative_active_days:
# Tests rolling count of active days when the user is active 4 of the 5 days
# in the test interval
gateway: local
model: metrics.developer_active_days_to_artifact_over_60_day_window
vars:
start: 2024-01-01
end: 2024-01-05
inputs:
metrics.events_daily_to_artifact:
rows:
- to_artifact_id: repo_0
from_artifact_id: dev_0
event_source: SOURCE_PROVIDER
event_type: COMMIT_CODE
bucket_day: 2024-01-01
amount: 1
- to_artifact_id: repo_0
from_artifact_id: dev_0
event_source: SOURCE_PROVIDER
event_type: COMMIT_CODE
bucket_day: 2024-01-02
amount: 10
- to_artifact_id: repo_0
from_artifact_id: dev_0
event_source: SOURCE_PROVIDER
event_type: COMMIT_CODE
bucket_day: 2024-01-03
amount: 12
- to_artifact_id: repo_0
from_artifact_id: dev_0
event_source: SOURCE_PROVIDER
event_type: COMMIT_CODE
bucket_day: 2024-01-04
amount: 20
outputs:
partial: true
query:
partial: true
rows:
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-01
amount: 1
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-02
amount: 2
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-03
amount: 3
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-04
amount: 4
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-05
amount: 4

test_developer_active_days_to_artifact_over_60_day_window_with_1_active_day:
# Tests rolling count of active days when the user is active 1 in the test interval
gateway: local
model: metrics.developer_active_days_to_artifact_over_60_day_window
vars:
start: 2024-01-01
end: 2024-01-03
inputs:
metrics.events_daily_to_artifact:
rows:
- to_artifact_id: repo_0
from_artifact_id: dev_0
event_source: SOURCE_PROVIDER
event_type: COMMIT_CODE
bucket_day: 2024-01-01
amount: 10
outputs:
partial: true
query:
partial: true
rows:
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-01
amount: 1
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-02
amount: 1
- to_artifact_id: repo_0
from_artifact_id: dev_0
metrics_sample_date: 2024-01-03
amount: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

test_events_daily_to_artifact:
gateway: local
model: metrics.events_daily_to_artifact
vars:
start: 2024-01-01
Expand Down
13 changes: 10 additions & 3 deletions warehouse/metrics_tools/factory/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def generate_models(self, calling_file: str):
)
},
)
logger.debug("model generation complete")
logger.info("model generation complete")

def generate_model_for_rendered_query(
self,
Expand Down Expand Up @@ -567,7 +567,7 @@ def timeseries_metrics(
):
add_metrics_tools_to_sqlmesh_logging()

logger.debug("loading timeseries metrics")
logger.info("loading timeseries metrics")
calling_file = inspect.stack()[1].filename
timeseries_metrics = TimeseriesMetrics.from_raw_options(**raw_options)
return timeseries_metrics.generate_models(calling_file)
Expand Down Expand Up @@ -646,7 +646,14 @@ def generated_rolling_query(
locals.update(sqlmesh_vars)

runner = MetricsRunner.from_sqlmesh_context(context, query, ref, locals)
yield runner.run_rolling(start, end)
df = runner.run_rolling(start, end)
# If the rolling window is empty we need to yield from an empty tuple
# otherwise sqlmesh fails. See:
# https://sqlmesh.readthedocs.io/en/latest/concepts/models/python_models/#returning-empty-dataframes
if df.empty:
yield from ()
else:
yield df


def generated_rolling_query_proxy(
Expand Down
11 changes: 8 additions & 3 deletions warehouse/metrics_tools/transformer/tables.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Transforms table references from an execution context
"""

import logging
import typing as t

from sqlglot import exp
from sqlmesh.core.context import ExecutionContext
from .base import Transform

logger = logging.getLogger(__name__)


class TableTransform(Transform):
def transform_table_name(self, table: exp.Table) -> exp.Table | None:
Expand All @@ -19,10 +22,9 @@ def transform_tables(node: exp.Expression):
actual_table = self.transform_table_name(node)
if not actual_table:
return node
table_kwargs = {}
if node.alias:
table_kwargs["alias"] = node.alias
return exp.to_table(actual_table.this.this, **table_kwargs)
actual_table = actual_table.as_(node.alias)
return actual_table

transformed_expressions = []
for expression in query:
Expand Down Expand Up @@ -53,6 +55,9 @@ def __init__(
def transform_table_name(self, table: exp.Table) -> exp.Table | None:
table_name = f"{table.db}.{table.this.this}"
try:
logger.debug(
f"Transforming tables for query {self._context.table(table_name)}"
)
return exp.to_table(self._context.table(table_name))
except KeyError:
return None
3 changes: 3 additions & 0 deletions warehouse/metrics_tools/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

connected_to_sqlmesh_logs = False

logger = logging.getLogger(__name__)


def add_metrics_tools_to_sqlmesh_logging():
"""sqlmesh won't automatically add metrics_tools logging. This will enable
Expand All @@ -15,6 +17,7 @@ def add_metrics_tools_to_sqlmesh_logging():
if app_name == "sqlmesh" and not connected_to_sqlmesh_logs:
add_metrics_tools_to_existing_logger(app_name)
connected_to_sqlmesh_logs = True
logger.info("metrics_tools logs connected to sqlmesh")


def add_metrics_tools_to_existing_logger(logger_name: str):
Expand Down

0 comments on commit 87480be

Please sign in to comment.