Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added docs for no timestamp_column #1151

Merged
merged 10 commits into from
Sep 10, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ Elementary anomaly detection tests will use this column to create time buckets a
It is highly recommended to configure a timestamp column (if there is one).
The best column for this would be an `updated_at`/`created_at`/`loaded_at` timestamp for each row (date type also works).

- When you specify a `timestamp_column`, when the test runs it splits the data to buckets according to the timestamp in this column, calculates the metric for each bucket and checks for anomalies between these buckets.
This also means that if the table has enough historical data, the test can start working right away.
- When you do not specify a `timestamp_column`, each time the test runs it calculates the metric for all of the data in the table, and checks for anomalies between the metric from previous runs.
This also means that it will take the test `days_back` days to start working, as it needs to the time to collect the necessary metrics.

If undefined, default is null (no time buckets).

- _Default: none_
Expand Down
14 changes: 12 additions & 2 deletions elementary/monitor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,16 +159,26 @@ def get_cli_properties() -> dict:
if params is None:
return dict()

reload_monitoring_configuration = params.get("reload_monitoring_configuration")
target_path = params.get("target_path")
update_dbt_package = params.get("update_dbt_package")
full_refresh_dbt_package = params.get("full_refresh_dbt_package")
select = params.get("select")
days_back = params.get("days_back")
timezone = params.get("timezone")
group_by = params.get("group_by")
suppression_interval = params.get("suppression_interval")
override_dbt_project_config = params.get("override_dbt_project_config")

return {
"target_path": target_path,
"reload_monitoring_configuration": reload_monitoring_configuration,
"update_dbt_package": update_dbt_package,
"full_refresh_dbt_package": full_refresh_dbt_package,
"select": select,
"days_back": days_back,
"timezone": timezone,
"group_by": group_by,
"suppression_interval": suppression_interval,
"override_dbt_project_config": override_dbt_project_config,
}


Expand Down
6 changes: 6 additions & 0 deletions elementary/monitor/data_monitoring/selector_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,21 @@ def _parse_selector(self, selector: Optional[str] = None) -> SelectorFilterSchem
resource_types_match = resource_types_regex.search(selector)

if last_invocation_match:
if self.tracking:
self.tracking.set_env("select_method", "last_invocation")
data_monitoring_filter = SelectorFilterSchema(
last_invocation=True, selector=selector
)
elif invocation_id_match:
if self.tracking:
self.tracking.set_env("select_method", "invocation_id")
data_monitoring_filter = SelectorFilterSchema(
invocation_id=invocation_id_match.group(1),
selector=selector,
)
elif invocation_time_match:
if self.tracking:
self.tracking.set_env("select_method", "invocation_time")
data_monitoring_filter = SelectorFilterSchema(
invocation_time=invocation_time_match.group(1),
selector=selector,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% macro upload_source_freshness(results) %}
{% set source_freshess_results_dicts = fromjson(results) %}
{% set source_freshness_results_dicts = fromjson(results) %}
{% set source_freshness_results_relation = ref('dbt_source_freshness_results') %}
{% do elementary.upload_artifacts_to_table(source_freshness_results_relation, source_freshess_results_dicts, elementary.flatten_source_freshness, append=True, should_commit=true) %}
{% do elementary.upload_artifacts_to_table(source_freshness_results_relation, source_freshness_results_dicts, elementary.flatten_source_freshness, append=True, should_commit=true) %}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{% set timestamp_column_names = [
"updated_at",
"updated_at_utc",
"created_at",
"created_at_utc",
"inserted_at",
"inserted_at_utc",
"updated_at",
"updated_at_utc",
"_fivetran_synced",
"_airbyte_emitted_at",
"create_date",
Expand Down
Loading