diff --git a/docs/guides/anomaly-detection-configuration/timestamp-column.mdx b/docs/guides/anomaly-detection-configuration/timestamp-column.mdx index 0c92e7292..8427e1c88 100644 --- a/docs/guides/anomaly-detection-configuration/timestamp-column.mdx +++ b/docs/guides/anomaly-detection-configuration/timestamp-column.mdx @@ -14,6 +14,11 @@ Elementary anomaly detection tests will use this column to create time buckets a It is highly recommended to configure a timestamp column (if there is one). The best column for this would be an `updated_at`/`created_at`/`loaded_at` timestamp for each row (date type also works). +- When you specify a `timestamp_column`, when the test runs it splits the data to buckets according to the timestamp in this column, calculates the metric for each bucket and checks for anomalies between these buckets. + This also means that if the table has enough historical data, the test can start working right away. +- When you do not specify a `timestamp_column`, each time the test runs it calculates the metric for all of the data in the table, and checks for anomalies between the metric from previous runs. + This also means that it will take the test `days_back` days to start working, as it needs to the time to collect the necessary metrics. + If undefined, default is null (no time buckets). - _Default: none_ diff --git a/elementary/monitor/cli.py b/elementary/monitor/cli.py index 95a736d6b..4d616e32d 100644 --- a/elementary/monitor/cli.py +++ b/elementary/monitor/cli.py @@ -159,16 +159,26 @@ def get_cli_properties() -> dict: if params is None: return dict() - reload_monitoring_configuration = params.get("reload_monitoring_configuration") target_path = params.get("target_path") update_dbt_package = params.get("update_dbt_package") full_refresh_dbt_package = params.get("full_refresh_dbt_package") + select = params.get("select") + days_back = params.get("days_back") + timezone = params.get("timezone") + group_by = params.get("group_by") + suppression_interval = params.get("suppression_interval") + override_dbt_project_config = params.get("override_dbt_project_config") return { "target_path": target_path, - "reload_monitoring_configuration": reload_monitoring_configuration, "update_dbt_package": update_dbt_package, "full_refresh_dbt_package": full_refresh_dbt_package, + "select": select, + "days_back": days_back, + "timezone": timezone, + "group_by": group_by, + "suppression_interval": suppression_interval, + "override_dbt_project_config": override_dbt_project_config, } diff --git a/elementary/monitor/data_monitoring/selector_filter.py b/elementary/monitor/data_monitoring/selector_filter.py index f11834106..3961b9eed 100644 --- a/elementary/monitor/data_monitoring/selector_filter.py +++ b/elementary/monitor/data_monitoring/selector_filter.py @@ -58,15 +58,21 @@ def _parse_selector(self, selector: Optional[str] = None) -> SelectorFilterSchem resource_types_match = resource_types_regex.search(selector) if last_invocation_match: + if self.tracking: + self.tracking.set_env("select_method", "last_invocation") data_monitoring_filter = SelectorFilterSchema( last_invocation=True, selector=selector ) elif invocation_id_match: + if self.tracking: + self.tracking.set_env("select_method", "invocation_id") data_monitoring_filter = SelectorFilterSchema( invocation_id=invocation_id_match.group(1), selector=selector, ) elif invocation_time_match: + if self.tracking: + self.tracking.set_env("select_method", "invocation_time") data_monitoring_filter = SelectorFilterSchema( invocation_time=invocation_time_match.group(1), selector=selector, diff --git a/elementary/monitor/dbt_project/macros/upload_source_freshness.sql b/elementary/monitor/dbt_project/macros/upload_source_freshness.sql index 14aa52ec7..3a4ae9a93 100644 --- a/elementary/monitor/dbt_project/macros/upload_source_freshness.sql +++ b/elementary/monitor/dbt_project/macros/upload_source_freshness.sql @@ -1,5 +1,5 @@ {% macro upload_source_freshness(results) %} - {% set source_freshess_results_dicts = fromjson(results) %} + {% set source_freshness_results_dicts = fromjson(results) %} {% set source_freshness_results_relation = ref('dbt_source_freshness_results') %} - {% do elementary.upload_artifacts_to_table(source_freshness_results_relation, source_freshess_results_dicts, elementary.flatten_source_freshness, append=True, should_commit=true) %} + {% do elementary.upload_artifacts_to_table(source_freshness_results_relation, source_freshness_results_dicts, elementary.flatten_source_freshness, append=True, should_commit=true) %} {% endmacro %} diff --git a/elementary/monitor/dbt_project/models/tests_recommendation/table_timestamp_columns.sql b/elementary/monitor/dbt_project/models/tests_recommendation/table_timestamp_columns.sql index 3093479f3..08b74cb4e 100644 --- a/elementary/monitor/dbt_project/models/tests_recommendation/table_timestamp_columns.sql +++ b/elementary/monitor/dbt_project/models/tests_recommendation/table_timestamp_columns.sql @@ -1,8 +1,10 @@ {% set timestamp_column_names = [ - "updated_at", - "updated_at_utc", "created_at", "created_at_utc", + "inserted_at", + "inserted_at_utc", + "updated_at", + "updated_at_utc", "_fivetran_synced", "_airbyte_emitted_at", "create_date",