From 3e691d568984b39fb229a52ed588f912196aaafc Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Fri, 27 Sep 2024 08:43:38 -0500 Subject: [PATCH 01/23] Adding Hourly Workflow Counts Realtime CAgg and change DailyWorkflowCount to Materialized Only View --- .../hourly_workflow_classification_count.rb | 12 +++++++++ app/queries/count_classifications.rb | 9 +++++++ ...te_hourly_workflow_classification_count.rb | 25 +++++++++++++++++++ ...efresh_policy_for_hourly_workflow_count.rb | 8 ++++++ ...ention_policy_for_hourly_workflow_count.rb | 8 ++++++ ...assification_count_to_materialized_only.rb | 8 ++++++ 6 files changed, 70 insertions(+) create mode 100644 app/models/classification_counts/hourly_workflow_classification_count.rb create mode 100644 db/migrate/20240926225916_create_hourly_workflow_classification_count.rb create mode 100644 db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb create mode 100644 db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb create mode 100644 db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb diff --git a/app/models/classification_counts/hourly_workflow_classification_count.rb b/app/models/classification_counts/hourly_workflow_classification_count.rb new file mode 100644 index 0000000..53ec7e4 --- /dev/null +++ b/app/models/classification_counts/hourly_workflow_classification_count.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module ClassificationCounts + class HourlyWorkflowClassificationCount < ApplicationRecord + self.table_name = 'hourly_classification_count_per_workflow' + attribute :classification_count, :integer + + def readonly? + true + end + end + end diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 2abd588..0e6a498 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -22,6 +22,15 @@ def initial_scope(relation, period) relation.select(select_and_time_bucket_by(period, 'classification')).group('period').order('period') end + def end_date_includes_today?(params) + includes_today = true + if params[:end_date] + end_date = Date.parse(params[:end_date]) + includes_today = end_date >= Date.today + end + return includes_today + end + def relation(params) if params[:workflow_id] ClassificationCounts::DailyWorkflowClassificationCount diff --git a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb new file mode 100644 index 0000000..e312c5e --- /dev/null +++ b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb @@ -0,0 +1,25 @@ +class CreateHourlyWorkflowClassificationCount < ActiveRecord::Migration[7.0] + # we have to disable the migration transaction because creating materialized views within it is not allowed. + + # Due to how the front end pulls project stats (and workflow stats) all in one go, we hit performance issues; especially if a project has multiple workflows. + # We have discovered that having a non-realtime/materialized only continous aggregate for our daily workflow count cagg is more performant than real time. + # We plan to do the following: + # - Update the daily_classification_count_per_workflow to be materialized only (i.e. non-realtime) + # - Create a subsequent realtime cagg that buckets hourly that we will create data retention policies for. The plan is for up to 72 hours worth of hourly workflow classification counts of data. + # - Update workflow query to first query the daily counts first and the query the hourly counts for just the specific date of NOw. + disable_ddl_transaction! + def change + execute <<~SQL + create materialized view hourly_classification_count_per_workflow + with ( + timescaledb.continuous + ) as + select + time_bucket('1 hour', event_time) as hour, + workflow_id, + count(*) as classification_count + from classification_events where event_time > now() - INTERVAL '5 days' + group by hour, workflow_id; + SQL + end +end diff --git a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb new file mode 100644 index 0000000..3960e70 --- /dev/null +++ b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb @@ -0,0 +1,8 @@ +class AddRefreshPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] + disable_ddl_transaction! + def change + execute <<~SQL + SELECT add_continuous_aggregate_policy('hourly_classification_count_per_workflow',start_offset => INTERVAL '5 days', end_offset => INTERVAL '30 minutes', schedule_interval => INTERVAL '1 h'); + SQL + end +end diff --git a/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb new file mode 100644 index 0000000..1f26b0b --- /dev/null +++ b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb @@ -0,0 +1,8 @@ +class CreateDataRetentionPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] + disable_ddl_transaction! + def change + execute <<~SQL + SELECT add_retention_policy('hourly_classification_count_per_workflow', drop_after => INTERVAL '3 days'); + SQL + end +end diff --git a/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb new file mode 100644 index 0000000..468bf1e --- /dev/null +++ b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb @@ -0,0 +1,8 @@ +class AlterDailyWorkflowClassificationCountToMaterializedOnly < ActiveRecord::Migration[7.0] + disable_ddl_transaction! + def change + execute <<~SQL + ALTER MATERIALIZED VIEW daily_classification_count_per_workflow set (timescaledb.materialized_only = true); + SQL + end +end From 4f1e82a273370e60ae0c81312fcb8e9f5cbdfe1a Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 3 Oct 2024 13:33:58 -0500 Subject: [PATCH 02/23] initial go on using hourly classifications for workflows --- app/queries/count_classifications.rb | 66 +++++++++++++++++++++++++--- db/schema.rb | 2 +- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 0e6a498..71db89b 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -13,7 +13,19 @@ def call(params={}) scoped = @counts scoped = filter_by_workflow_id(scoped, params[:workflow_id]) scoped = filter_by_project_id(scoped, params[:project_id]) - filter_by_date_range(scoped, params[:start_date], params[:end_date]) + if params[:workflow_id].present? + if end_date_includes_today?(params[:end_date]) + scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s) + + puts scoped_upto_yesterday + scoped = append_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) + else + scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) + end + else + scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) + end + return scoped end private @@ -22,12 +34,54 @@ def initial_scope(relation, period) relation.select(select_and_time_bucket_by(period, 'classification')).group('period').order('period') end - def end_date_includes_today?(params) - includes_today = true - if params[:end_date] - end_date = Date.parse(params[:end_date]) - includes_today = end_date >= Date.today + def append_to_scoped(scoped_upto_yesterday, workflow_id, period) + todays_classifications = current_date_workflow_classifications(workflow_id) + most_recent_period_from_scoped = scoped_upto_yesterday[-1].period&.to_date + most_recent_count = scoped_upto_yesterday[-1].count + case period + when 'day' + scoped_upto_yesterday + todays_classifications + when 'week' + if (Date.today - most_recent_period_from_scoped).to_i < 7 + add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) + else + scoped_upto_yesterday + todays_classifications + end + when 'month' + if (Date.today.month == most_recent_period_from_scoped.month) + add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) + else + scoped_upto_yesterday + todays_classifications + end + when 'year' + if (Date.today.year == most_recent_period_from_scoped.year) + add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) + else + scoped_upto_yesterday + todays_classifications + end end + end + + def add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) + current_period_counts = scoped_upto_yesterday[-1].count + todays_classifications[0].count + scoped_upto_yesterday[-1].count = current_period_counts + scoped_upto_yesterday + end + + def current_date_workflow_classifications(workflow_id) + current_day_str = Date.today.to_s + current_hourly_classifications = ClassificationCounts::HourlyWorkflowClassificationCount.select("time_bucket('1 day', hour) AS period, SUM(classification_count)::integer AS count").group('period').order('period').where("hour >= '#{current_day_str}'") + filter_by_workflow_id(current_hourly_classifications, workflow_id) + end + + # if period is day append today's result as a result + # if period is week/month or year first check if today is in the week or month or year + # if it is, add the count to the last count + # if it isn't add a new entry to result + + def end_date_includes_today?(end_date) + includes_today = true + includes_today = Date.parse(end_date) >= Date.today if end_date.present? return includes_today end diff --git a/db/schema.rb b/db/schema.rb index 612a347..4be333d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2024_03_28_183306) do +ActiveRecord::Schema[7.0].define(version: 2024_09_26_233924) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" enable_extension "timescaledb" From 73f8249b828346abccb8bd4e5f55eea08ff490f7 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 3 Oct 2024 13:53:10 -0500 Subject: [PATCH 03/23] remove print statement --- app/queries/count_classifications.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 71db89b..7e033f2 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -16,8 +16,6 @@ def call(params={}) if params[:workflow_id].present? if end_date_includes_today?(params[:end_date]) scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s) - - puts scoped_upto_yesterday scoped = append_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) else scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) From 77cb2629d17d3c6bf81ed1f3db2bc7d75fbb8cdd Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 3 Oct 2024 16:53:27 -0500 Subject: [PATCH 04/23] Update count_classifications.rb --- app/queries/count_classifications.rb | 46 ++++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 7e033f2..948d41c 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -16,7 +16,7 @@ def call(params={}) if params[:workflow_id].present? if end_date_includes_today?(params[:end_date]) scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s) - scoped = append_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) + scoped = include_today_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) else scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) end @@ -32,38 +32,38 @@ def initial_scope(relation, period) relation.select(select_and_time_bucket_by(period, 'classification')).group('period').order('period') end - def append_to_scoped(scoped_upto_yesterday, workflow_id, period) + def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) todays_classifications = current_date_workflow_classifications(workflow_id) - most_recent_period_from_scoped = scoped_upto_yesterday[-1].period&.to_date + most_recent_date_from_scoped = scoped_upto_yesterday[-1].period&.to_date most_recent_count = scoped_upto_yesterday[-1].count + if is_today_part_of_recent_period?(most_recent_date_from_scoped, period) + add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) + else + append_today_to_scoped(scoped_upto_yesterday, todays_classifications) + end + end + + def is_today_part_of_recent_period?(most_recent_date, period) case period when 'day' - scoped_upto_yesterday + todays_classifications + false when 'week' - if (Date.today - most_recent_period_from_scoped).to_i < 7 - add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) - else - scoped_upto_yesterday + todays_classifications - end + (Date.today - most_recent_date).to_i < 7 when 'month' - if (Date.today.month == most_recent_period_from_scoped.month) - add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) - else - scoped_upto_yesterday + todays_classifications - end + Date.today.month == most_recent_date.month when 'year' - if (Date.today.year == most_recent_period_from_scoped.year) - add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) - else - scoped_upto_yesterday + todays_classifications - end + Date.today.year == most_recent_date.year end end - def add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) - current_period_counts = scoped_upto_yesterday[-1].count + todays_classifications[0].count - scoped_upto_yesterday[-1].count = current_period_counts - scoped_upto_yesterday + def append_today_to_scoped(count_records_up_to_yesterday, todays_count) + count_records_up_to_yesterday + todays_count + end + + def add_todays_counts_to_recent_period_counts(count_records_up_to_yesterday, todays_counts + current_period_counts = count_records_up_to_yesterday[-1].count + todays_count[0].count + count_records_up_to_yesterday[-1].count = current_period_counts + count_records_up_to_yesterday end def current_date_workflow_classifications(workflow_id) From 2e9ec6c0f51898d7b211b74ded90fcc7539fb9fe Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 3 Oct 2024 16:54:24 -0500 Subject: [PATCH 05/23] Update count_classifications.rb --- app/queries/count_classifications.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 948d41c..64c23d5 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -60,7 +60,7 @@ def append_today_to_scoped(count_records_up_to_yesterday, todays_count) count_records_up_to_yesterday + todays_count end - def add_todays_counts_to_recent_period_counts(count_records_up_to_yesterday, todays_counts + def add_todays_counts_to_recent_period_counts(count_records_up_to_yesterday, todays_count) current_period_counts = count_records_up_to_yesterday[-1].count + todays_count[0].count count_records_up_to_yesterday[-1].count = current_period_counts count_records_up_to_yesterday From 435992afd00704bef939a0648e22d4b23bbd5d5b Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Sat, 5 Oct 2024 19:56:03 -0500 Subject: [PATCH 06/23] remove unused var --- app/queries/count_classifications.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 64c23d5..013261d 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -35,7 +35,6 @@ def initial_scope(relation, period) def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) todays_classifications = current_date_workflow_classifications(workflow_id) most_recent_date_from_scoped = scoped_upto_yesterday[-1].period&.to_date - most_recent_count = scoped_upto_yesterday[-1].count if is_today_part_of_recent_period?(most_recent_date_from_scoped, period) add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) else @@ -46,11 +45,11 @@ def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) def is_today_part_of_recent_period?(most_recent_date, period) case period when 'day' - false + Date.today == most_recent_date when 'week' (Date.today - most_recent_date).to_i < 7 when 'month' - Date.today.month == most_recent_date.month + (Date.today.month == most_recent_date.month) && (Date.today.year == most_recent_date.year) when 'year' Date.today.year == most_recent_date.year end From 14850841cdebbfdd44a8d354fda976987abdd71d Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 13:59:29 -0500 Subject: [PATCH 07/23] taking care of blank case/ no entry found case --- app/queries/count_classifications.rb | 49 +++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 013261d..a19d04e 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -13,9 +13,22 @@ def call(params={}) scoped = @counts scoped = filter_by_workflow_id(scoped, params[:workflow_id]) scoped = filter_by_project_id(scoped, params[:project_id]) + # Because of how the FE, calls out to this endpoint when querying for a project's workflow's classifications count + # And because of our use of Real Time Aggregates + # Querying the DailyClassificationCountByWorkflow becomes not as performant + + # Because we are limited in resources, we do the following mitigaion for ONLY querying workflow classification counts: + # 1. Create a New HourlyClassificationCountByWorkflow which is RealTime and Create a Data Retention for this new aggregate (this should limit the amount of data the query planner has to sift through) + # 2. Turn off Real Time aggreation for the DailyClassificationCount + # 3. For workflow classification count queries that include the current date's counts, we query current date's counts via the HourlyClassificationCountByWorkflow and query the DailyClassificationCountByWorkflow for everything before the current date's + if params[:workflow_id].present? if end_date_includes_today?(params[:end_date]) scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s) + puts "MDY114 LAST" + puts scoped_upto_yesterday + puts scoped_upto_yesterday.blank? + # puts scoped_upto_yesterday.last.period scoped = include_today_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) else scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) @@ -34,7 +47,26 @@ def initial_scope(relation, period) def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) todays_classifications = current_date_workflow_classifications(workflow_id) - most_recent_date_from_scoped = scoped_upto_yesterday[-1].period&.to_date + puts "MDY114 TDOAYS CLASSIFICATIONS" + puts todays_classifications.blank? + # todays_classifications + return scoped_upto_yesterday if todays_classifications.blank? + + if scoped_upto_yesterday.blank? + puts todays_classifications + puts "mdy114 hits here" + # append new entry where period is start of the week + puts start_of_current_period(period) + todays_classifications[0].period = start_of_current_period(period) + return todays_classifications + end + + most_recent_date_from_scoped = scoped_upto_yesterday[-1].period.to_date + + # If period=week, month, or year, the current date could be part of that week, month or year; + # we check if the current date is part of the period + # if so, we add the count to the most recent period pulled from db + # if not, we append as a new entry for the current period if is_today_part_of_recent_period?(most_recent_date_from_scoped, period) add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) else @@ -42,6 +74,21 @@ def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) end end + def start_of_current_period(period) + today = Date.today + case period + when 'day' + today + when 'week' + # Returns Monday of current week + today.at_beginning_of_week + when 'month' + today.at_beginning_of_month + when 'year' + today.at_beginning_of_year + end + end + def is_today_part_of_recent_period?(most_recent_date, period) case period when 'day' From b4eb0b4edf592abee1c3adc3773714ef533580cd Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 14:22:10 -0500 Subject: [PATCH 08/23] remove logs --- app/queries/count_classifications.rb | 31 +++++++------------ ...assification_count_to_materialized_only.rb | 8 ++++- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index a19d04e..ea8ed1e 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -25,10 +25,6 @@ def call(params={}) if params[:workflow_id].present? if end_date_includes_today?(params[:end_date]) scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s) - puts "MDY114 LAST" - puts scoped_upto_yesterday - puts scoped_upto_yesterday.blank? - # puts scoped_upto_yesterday.last.period scoped = include_today_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period]) else scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) @@ -47,16 +43,10 @@ def initial_scope(relation, period) def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) todays_classifications = current_date_workflow_classifications(workflow_id) - puts "MDY114 TDOAYS CLASSIFICATIONS" - puts todays_classifications.blank? - # todays_classifications return scoped_upto_yesterday if todays_classifications.blank? if scoped_upto_yesterday.blank? - puts todays_classifications - puts "mdy114 hits here" # append new entry where period is start of the week - puts start_of_current_period(period) todays_classifications[0].period = start_of_current_period(period) return todays_classifications end @@ -90,16 +80,17 @@ def start_of_current_period(period) end def is_today_part_of_recent_period?(most_recent_date, period) - case period - when 'day' - Date.today == most_recent_date - when 'week' - (Date.today - most_recent_date).to_i < 7 - when 'month' - (Date.today.month == most_recent_date.month) && (Date.today.year == most_recent_date.year) - when 'year' - Date.today.year == most_recent_date.year - end + most_recent_date == start_of_current_period(period) + # case period + # when 'day' + # Date.today == most_recent_date + # when 'week' + # (Date.today - most_recent_date).to_i < 7 + # when 'month' + # (Date.today.month == most_recent_date.month) && (Date.today.year == most_recent_date.year) + # when 'year' + # Date.today.year == most_recent_date.year + # end end def append_today_to_scoped(count_records_up_to_yesterday, todays_count) diff --git a/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb index 468bf1e..660c042 100644 --- a/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb +++ b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb @@ -1,8 +1,14 @@ class AlterDailyWorkflowClassificationCountToMaterializedOnly < ActiveRecord::Migration[7.0] disable_ddl_transaction! - def change + def up execute <<~SQL ALTER MATERIALIZED VIEW daily_classification_count_per_workflow set (timescaledb.materialized_only = true); SQL end + + def down + execute <<~SQL + ALTER MATERIALIZED VIEW daily_classification_count_per_workflow set (timescaledb.materialized_only = false); + SQL + end end From 3d0831c4cae68ba49f3258ff89776af01da85f49 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 17:13:05 -0500 Subject: [PATCH 09/23] add frames for test --- app/queries/count_classifications.rb | 16 -------- spec/queries/count_classifications_spec.rb | 44 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index ea8ed1e..e1299bb 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -16,7 +16,6 @@ def call(params={}) # Because of how the FE, calls out to this endpoint when querying for a project's workflow's classifications count # And because of our use of Real Time Aggregates # Querying the DailyClassificationCountByWorkflow becomes not as performant - # Because we are limited in resources, we do the following mitigaion for ONLY querying workflow classification counts: # 1. Create a New HourlyClassificationCountByWorkflow which is RealTime and Create a Data Retention for this new aggregate (this should limit the amount of data the query planner has to sift through) # 2. Turn off Real Time aggreation for the DailyClassificationCount @@ -81,16 +80,6 @@ def start_of_current_period(period) def is_today_part_of_recent_period?(most_recent_date, period) most_recent_date == start_of_current_period(period) - # case period - # when 'day' - # Date.today == most_recent_date - # when 'week' - # (Date.today - most_recent_date).to_i < 7 - # when 'month' - # (Date.today.month == most_recent_date.month) && (Date.today.year == most_recent_date.year) - # when 'year' - # Date.today.year == most_recent_date.year - # end end def append_today_to_scoped(count_records_up_to_yesterday, todays_count) @@ -109,11 +98,6 @@ def current_date_workflow_classifications(workflow_id) filter_by_workflow_id(current_hourly_classifications, workflow_id) end - # if period is day append today's result as a result - # if period is week/month or year first check if today is in the week or month or year - # if it is, add the count to the last count - # if it isn't add a new entry to result - def end_date_includes_today?(end_date) includes_today = true includes_today = Date.parse(end_date) >= Date.today if end_date.present? diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index 14c8c1f..b97ea63 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -91,5 +91,49 @@ expect(counts.length).to eq(1) expect(counts[0].count).to eq(1) end + + context 'when params[:workflow_id] present' do + context 'when params[:end_date] is before current date' do + it 'returns counts of events of given date range from DailyWorkflowCounts' do + + end + end + + context 'when params[:end_date] includes current date' do + context 'when no classification count up to previous day' do + context 'when no classifications for current day' do + it 'returns from DailyWorkflowCount' do + end + end + + context 'when there are classifications for current day' do + it 'returns from HourlyWorkflowCount' do + end + + it 'returns proper start of period' do + end + end + end + + context 'when there are classifications up to previous day' do + context 'when there are no classifications for current day' do + it 'returns from DailyWorkflowCount (scoped up to yesterday)' do + end + end + + context 'when there are classifications for current day' do + context 'when current day is part of the most recently pulled period' do + it 'adds the most recent period to the most recently pulled period counts' do + end + end + + context 'when current day is not part of the most recently pulled period' do + it 'appends a new entry to scoped from HourlyWorkflowCount query' do + end + end + end + end + end + end end end From 4168da8fac62969a0220fc8a2e34307423733b70 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 18:33:36 -0500 Subject: [PATCH 10/23] adding testing for cases when end_date is before and after current day --- spec/queries/count_classifications_spec.rb | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index b97ea63..4c0cee0 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -94,15 +94,27 @@ context 'when params[:workflow_id] present' do context 'when params[:end_date] is before current date' do - it 'returns counts of events of given date range from DailyWorkflowCounts' do - + it 'returns counts from DailyWorkflowClassificationCount'do + yesterday = Date.today - 1 + params[:workflow_id] = diff_time_event.workflow_id.to_s + params[:end_date] = yesterday.to_s + counts = count_classifications.call(params) + expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) + expect(counts.length).to eq(1) + expect(counts[0].count).to eq(1) end end context 'when params[:end_date] includes current date' do context 'when no classification count up to previous day' do context 'when no classifications for current day' do - it 'returns from DailyWorkflowCount' do + it 'returns from DailyWorkflowClassificationCount' do + # Select a workflow id that has no classification + params[:workflow_id] = '100' + params[:end_date] = Date.today.to_s + counts = count_classifications.call(params) + expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) + expect(counts.length).to eq(0) end end From 77a14c316ab4a8ba0de3cc218bea708887f27952 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 21:42:01 -0500 Subject: [PATCH 11/23] add tests for testing period and change eriod format to match that of data pull --- app/queries/count_classifications.rb | 2 +- spec/queries/count_classifications_spec.rb | 48 +++++++++++++++------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index e1299bb..3558f51 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -46,7 +46,7 @@ def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) if scoped_upto_yesterday.blank? # append new entry where period is start of the week - todays_classifications[0].period = start_of_current_period(period) + todays_classifications[0].period = start_of_current_period(period)&.to_time&.utc return todays_classifications end diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index 4c0cee0..1af7ef7 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -22,15 +22,14 @@ end describe 'select_and_time_bucket_by' do + let(:counts) { count_classifications.call(params) } it 'buckets counts by year by default' do - counts = count_classifications.call(params) expected_select_query = "SELECT time_bucket('1 year', day) AS period, SUM(classification_count)::integer AS count FROM \"daily_classification_count\" GROUP BY period ORDER BY period" expect(counts.to_sql).to eq(expected_select_query) end it 'buckets counts by given period' do params[:period] = 'week' - counts = count_classifications.call(params) expected_select_query = "SELECT time_bucket('1 week', day) AS period, SUM(classification_count)::integer AS count FROM \"daily_classification_count\" GROUP BY period ORDER BY period" expect(counts.to_sql).to eq(expected_select_query) end @@ -41,13 +40,13 @@ let!(:diff_workflow_event) { create(:classification_with_diff_workflow) } let!(:diff_project_event) { create(:classification_with_diff_project) } let!(:diff_time_event) { create(:classification_created_yesterday) } + let(:counts) { count_classifications.call(params) } it_behaves_like 'is filterable by workflow' it_behaves_like 'is filterable by project' it_behaves_like 'is filterable by date range' it 'returns counts of all events when no params given' do - counts = count_classifications.call(params) # because default is bucket by year and all data created in the same year, we expect counts to look something like # [] current_year = Date.today.year @@ -58,7 +57,6 @@ it 'returns counts bucketed by given period' do params[:period] = 'day' - counts = count_classifications.call(params) expect(counts.length).to eq(2) expect(counts[0].count).to eq(1) expect(counts[0].period).to eq((Date.today - 1).to_s) @@ -69,7 +67,6 @@ it 'returns counts of events with given workflow' do workflow_id = diff_workflow_event.workflow_id params[:workflow_id] = workflow_id.to_s - counts = count_classifications.call(params) expect(counts.length).to eq(1) expect(counts[0].count).to eq(1) end @@ -77,7 +74,6 @@ it 'returns counts of events with given project' do project_id = diff_project_event.project_id params[:project_id] = project_id.to_s - counts = count_classifications.call(params) expect(counts.length).to eq(1) expect(counts[0].count).to eq(1) end @@ -87,7 +83,6 @@ yesterday = Date.today - 1 params[:start_date] = last_week.to_s params[:end_date] = yesterday.to_s - counts = count_classifications.call(params) expect(counts.length).to eq(1) expect(counts[0].count).to eq(1) end @@ -98,7 +93,6 @@ yesterday = Date.today - 1 params[:workflow_id] = diff_time_event.workflow_id.to_s params[:end_date] = yesterday.to_s - counts = count_classifications.call(params) expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) expect(counts.length).to eq(1) expect(counts[0].count).to eq(1) @@ -106,23 +100,49 @@ end context 'when params[:end_date] includes current date' do - context 'when no classification count up to previous day' do - context 'when no classifications for current day' do + before do + params[:end_date] = Date.today.to_s + end + + context 'when 0 classifications up to previous day' do + context 'when 0 classifications for current day' do it 'returns from DailyWorkflowClassificationCount' do # Select a workflow id that has no classification params[:workflow_id] = '100' - params[:end_date] = Date.today.to_s - counts = count_classifications.call(params) expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) expect(counts.length).to eq(0) end end context 'when there are classifications for current day' do - it 'returns from HourlyWorkflowCount' do + before do + params[:workflow_id] = diff_workflow_event.workflow_id.to_s + end + + it "returns today's classifications from HourlyWorkflowClassificationCount" do + expect(counts.model).to be(ClassificationCounts::HourlyWorkflowClassificationCount) + expect(counts.length).to eq(1) + expect(counts[0].count).to eq(1) + end + + it 'returns current date when period is day' do + params[:period] = 'day' + expect(counts[0].period).to eq(Date.today.to_time.utc) + end + + it 'returns start of week when period is week' do + params[:period] = 'week' + expect(counts[0].period).to eq(Date.today.at_beginning_of_week.to_time.utc) + end + + it 'returns start of month when period is month' do + params[:period] = 'month' + expect(counts[0].period).to eq(Date.today.at_beginning_of_month.to_time.utc) end - it 'returns proper start of period' do + it 'returns start of year when period is year' do + params[:period] = 'year' + expect(counts[0].period).to eq(Date.today.at_beginning_of_year.to_time.utc) end end end From b1d4a66c5539ba3ce1a5d93c77bbee254e7e8abc Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:04:52 -0500 Subject: [PATCH 12/23] adding tests for the case when there are classifications from previous day --- app/queries/count_classifications.rb | 6 ++++-- spec/queries/count_classifications_spec.rb | 23 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 3558f51..5fe562b 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -41,12 +41,13 @@ def initial_scope(relation, period) end def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) + period = 'year' if period.nil? todays_classifications = current_date_workflow_classifications(workflow_id) return scoped_upto_yesterday if todays_classifications.blank? if scoped_upto_yesterday.blank? - # append new entry where period is start of the week - todays_classifications[0].period = start_of_current_period(period)&.to_time&.utc + # append new entry where period is start of the period + todays_classifications[0].period = start_of_current_period(period).to_time.utc return todays_classifications end @@ -59,6 +60,7 @@ def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) if is_today_part_of_recent_period?(most_recent_date_from_scoped, period) add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) else + todays_classifications[0].period = start_of_current_period(period).to_time.utc append_today_to_scoped(scoped_upto_yesterday, todays_classifications) end end diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index 1af7ef7..9023062 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -148,19 +148,40 @@ end context 'when there are classifications up to previous day' do - context 'when there are no classifications for current day' do + context 'when there are 0 classifications for current day' do + let!(:classification_created_yesterday_diff_workflow) { create(:classification_created_yesterday, workflow_id: 4, classification_id: 100)} it 'returns from DailyWorkflowCount (scoped up to yesterday)' do + params[:workflow_id] = classification_created_yesterday_diff_workflow.workflow_id.to_s + expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) + expect(counts.length).to eq(1) + expect(counts[0].count).to eq(1) end end context 'when there are classifications for current day' do + before do + allow(Date).to receive(:today).and_return Date.new(2022,10,21) + params[:workflow_id] = diff_workflow_event.workflow_id.to_s + params[:period] = 'year' + end + context 'when current day is part of the most recently pulled period' do it 'adds the most recent period to the most recently pulled period counts' do + create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2022,01,02)) + expect(counts.length).to eq(1) + expect(counts[0].count).to eq(2) + expect(counts[0].period).to eq(Date.today.at_beginning_of_year) end end context 'when current day is not part of the most recently pulled period' do it 'appends a new entry to scoped from HourlyWorkflowCount query' do + create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2021,01,02)) + expect(counts.length).to eq(2) + counts.each { |c| expect(c.count).to eq(1) } + expect(counts[0].class).to be(ClassificationCounts::DailyWorkflowClassificationCount) + expect(counts[1].class).to be(ClassificationCounts::HourlyWorkflowClassificationCount) + expect(counts.last.period).to eq(Date.today.at_beginning_of_year) end end end From d9d5e39d2d405389263f9566b2914a2a3a89034b Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:06:53 -0500 Subject: [PATCH 13/23] update comment on migration --- ...0240926225916_create_hourly_workflow_classification_count.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb index e312c5e..ef0c334 100644 --- a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb +++ b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb @@ -6,7 +6,7 @@ class CreateHourlyWorkflowClassificationCount < ActiveRecord::Migration[7.0] # We plan to do the following: # - Update the daily_classification_count_per_workflow to be materialized only (i.e. non-realtime) # - Create a subsequent realtime cagg that buckets hourly that we will create data retention policies for. The plan is for up to 72 hours worth of hourly workflow classification counts of data. - # - Update workflow query to first query the daily counts first and the query the hourly counts for just the specific date of NOw. + # - Update workflow query to first query the daily counts first and the query the hourly counts for just the specific date of now. disable_ddl_transaction! def change execute <<~SQL From 4ae1eeecbdb5bfbb630c935f586f263889d5b672 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:27:06 -0500 Subject: [PATCH 14/23] update hound comments --- app/queries/count_classifications.rb | 4 ++-- spec/queries/count_classifications_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index 5fe562b..b0e0f5b 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -57,7 +57,7 @@ def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period) # we check if the current date is part of the period # if so, we add the count to the most recent period pulled from db # if not, we append as a new entry for the current period - if is_today_part_of_recent_period?(most_recent_date_from_scoped, period) + if today_part_of_recent_period?(most_recent_date_from_scoped, period) add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications) else todays_classifications[0].period = start_of_current_period(period).to_time.utc @@ -80,7 +80,7 @@ def start_of_current_period(period) end end - def is_today_part_of_recent_period?(most_recent_date, period) + def today_part_of_recent_period?(most_recent_date, period) most_recent_date == start_of_current_period(period) end diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index 9023062..bdeba0b 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -167,7 +167,7 @@ context 'when current day is part of the most recently pulled period' do it 'adds the most recent period to the most recently pulled period counts' do - create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2022,01,02)) + create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2022, 1, 2)) expect(counts.length).to eq(1) expect(counts[0].count).to eq(2) expect(counts[0].period).to eq(Date.today.at_beginning_of_year) @@ -176,7 +176,7 @@ context 'when current day is not part of the most recently pulled period' do it 'appends a new entry to scoped from HourlyWorkflowCount query' do - create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2021,01,02)) + create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2021, 1, 2)) expect(counts.length).to eq(2) counts.each { |c| expect(c.count).to eq(1) } expect(counts[0].class).to be(ClassificationCounts::DailyWorkflowClassificationCount) From 6ee4f6858476b7585ada004381edce19c82053fc Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:34:39 -0500 Subject: [PATCH 15/23] update db.rake with new caggs --- lib/tasks/db.rake | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/tasks/db.rake b/lib/tasks/db.rake index 472a835..0178155 100644 --- a/lib/tasks/db.rake +++ b/lib/tasks/db.rake @@ -35,7 +35,7 @@ namespace :db do ActiveRecord::Base.connection.execute <<-SQL CREATE MATERIALIZED VIEW IF NOT EXISTS daily_classification_count_per_workflow - WITH (timescaledb.continuous) AS + WITH (timescaledb.continuous, timescaledb.materialized_only) AS SELECT time_bucket('1 day', event_time) AS day, workflow_id, count(*) as classification_count @@ -183,6 +183,16 @@ namespace :db do FROM classification_user_groups WHERE user_group_id IS NOT NULL GROUP BY day, user_group_id, user_id, workflow_id; SQL + + ActiveRecord::Base.connection.execute <<-SQL + CREATE MATERIALIZED VIEW IF NOT EXISTS hourly_classification_count_per_workflow + WITH (timescaledb.continuous) AS + SELECT time_bucket('1 hour', event_time) AS hour, + workflow_id, + count(*) as classification_count + FROM classification_events + GROUP BY hour, workflow_id; + SQL end desc 'Drop Continuous Aggregates Views' @@ -203,6 +213,7 @@ namespace :db do DROP MATERIALIZED VIEW IF EXISTS daily_group_classification_count_and_time_per_user CASCADE; DROP MATERIALIZED VIEW IF EXISTS daily_group_classification_count_and_time_per_user_per_project CASCADE; DROP MATERIALIZED VIEW IF EXISTS daily_group_classification_count_and_time_per_user_per_workflow CASCADE; + DROP MATERIALIZED VIEW IF EXISTS hourly_classification_count_per_workflow CASCADE; SQL end From 789b5553eac5bbb77aa9695dcd18ce214cb41819 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:37:40 -0500 Subject: [PATCH 16/23] Update hourly_workflow_classification_count.rb --- .../hourly_workflow_classification_count.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/models/classification_counts/hourly_workflow_classification_count.rb b/app/models/classification_counts/hourly_workflow_classification_count.rb index 53ec7e4..04cd787 100644 --- a/app/models/classification_counts/hourly_workflow_classification_count.rb +++ b/app/models/classification_counts/hourly_workflow_classification_count.rb @@ -1,12 +1,12 @@ # frozen_string_literal: true module ClassificationCounts - class HourlyWorkflowClassificationCount < ApplicationRecord - self.table_name = 'hourly_classification_count_per_workflow' - attribute :classification_count, :integer + class HourlyWorkflowClassificationCount < ApplicationRecord + self.table_name = 'hourly_classification_count_per_workflow' + attribute :classification_count, :integer - def readonly? - true - end + def readonly? + true end end +end From db35f9bde02531f6198a836395df0c7f2e71c674 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:39:01 -0500 Subject: [PATCH 17/23] Update db.rake --- lib/tasks/db.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tasks/db.rake b/lib/tasks/db.rake index 0178155..8b0de7d 100644 --- a/lib/tasks/db.rake +++ b/lib/tasks/db.rake @@ -35,7 +35,7 @@ namespace :db do ActiveRecord::Base.connection.execute <<-SQL CREATE MATERIALIZED VIEW IF NOT EXISTS daily_classification_count_per_workflow - WITH (timescaledb.continuous, timescaledb.materialized_only) AS + WITH (timescaledb.continuous) AS SELECT time_bucket('1 day', event_time) AS day, workflow_id, count(*) as classification_count From 967974bc4af82e6e8185d51ddb2f68e7afebfef1 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:40:19 -0500 Subject: [PATCH 18/23] remove redundant returns --- app/queries/count_classifications.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/queries/count_classifications.rb b/app/queries/count_classifications.rb index b0e0f5b..d75c094 100644 --- a/app/queries/count_classifications.rb +++ b/app/queries/count_classifications.rb @@ -31,7 +31,7 @@ def call(params={}) else scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date]) end - return scoped + scoped end private @@ -103,7 +103,7 @@ def current_date_workflow_classifications(workflow_id) def end_date_includes_today?(end_date) includes_today = true includes_today = Date.parse(end_date) >= Date.today if end_date.present? - return includes_today + includes_today end def relation(params) From 7ba19d4a23b6a8c44d2110faad3c71fa00260651 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:41:39 -0500 Subject: [PATCH 19/23] add frozen string literal true --- ...240926225916_create_hourly_workflow_classification_count.rb | 2 ++ ...40926231010_add_refresh_policy_for_hourly_workflow_count.rb | 3 +++ ...5_create_data_retention_policy_for_hourly_workflow_count.rb | 2 ++ ...daily_workflow_classification_count_to_materialized_only.rb | 2 ++ 4 files changed, 9 insertions(+) diff --git a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb index ef0c334..e4be895 100644 --- a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb +++ b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true. + class CreateHourlyWorkflowClassificationCount < ActiveRecord::Migration[7.0] # we have to disable the migration transaction because creating materialized views within it is not allowed. diff --git a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb index 3960e70..e273a34 100644 --- a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb +++ b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb @@ -1,3 +1,6 @@ +# frozen_string_literal: true + + class AddRefreshPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] disable_ddl_transaction! def change diff --git a/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb index 1f26b0b..23c7aca 100644 --- a/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb +++ b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class CreateDataRetentionPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] disable_ddl_transaction! def change diff --git a/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb index 660c042..9418a27 100644 --- a/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb +++ b/db/migrate/20240926233924_alter_daily_workflow_classification_count_to_materialized_only.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class AlterDailyWorkflowClassificationCountToMaterializedOnly < ActiveRecord::Migration[7.0] disable_ddl_transaction! def up From b751b38dcedb043ee84423942fcaadff3ace2774 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 7 Oct 2024 23:44:12 -0500 Subject: [PATCH 20/23] rubocop fix hound --- ...26231010_add_refresh_policy_for_hourly_workflow_count.rb | 1 - spec/queries/count_classifications_spec.rb | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb index e273a34..ec4364e 100644 --- a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb +++ b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true - class AddRefreshPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] disable_ddl_transaction! def change diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index bdeba0b..67d1e24 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -89,7 +89,7 @@ context 'when params[:workflow_id] present' do context 'when params[:end_date] is before current date' do - it 'returns counts from DailyWorkflowClassificationCount'do + it 'returns counts from DailyWorkflowClassificationCount' do yesterday = Date.today - 1 params[:workflow_id] = diff_time_event.workflow_id.to_s params[:end_date] = yesterday.to_s @@ -149,7 +149,7 @@ context 'when there are classifications up to previous day' do context 'when there are 0 classifications for current day' do - let!(:classification_created_yesterday_diff_workflow) { create(:classification_created_yesterday, workflow_id: 4, classification_id: 100)} + let!(:classification_created_yesterday_diff_workflow) { create(:classification_created_yesterday, workflow_id: 4, classification_id: 100) } it 'returns from DailyWorkflowCount (scoped up to yesterday)' do params[:workflow_id] = classification_created_yesterday_diff_workflow.workflow_id.to_s expect(counts.model).to be(ClassificationCounts::DailyWorkflowClassificationCount) @@ -160,7 +160,7 @@ context 'when there are classifications for current day' do before do - allow(Date).to receive(:today).and_return Date.new(2022,10,21) + allow(Date).to receive(:today).and_return Date.new(2022, 10, 21) params[:workflow_id] = diff_workflow_event.workflow_id.to_s params[:period] = 'year' end From d932fdf482cafa87b9d985a170ea15af90628fda Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Tue, 8 Oct 2024 00:07:52 -0500 Subject: [PATCH 21/23] adding comment on spec --- spec/queries/count_classifications_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index 67d1e24..be04b2c 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -169,6 +169,7 @@ it 'adds the most recent period to the most recently pulled period counts' do create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2022, 1, 2)) expect(counts.length).to eq(1) + # the 2 classifications counted is the one created in L170 as well as diff_workflow_event classification. expect(counts[0].count).to eq(2) expect(counts[0].period).to eq(Date.today.at_beginning_of_year) end From 862dd9b0cbbd1f48dfe15a0283a8ec9df465942f Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Tue, 8 Oct 2024 00:08:48 -0500 Subject: [PATCH 22/23] rename spec to note adding counts --- spec/queries/count_classifications_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/queries/count_classifications_spec.rb b/spec/queries/count_classifications_spec.rb index be04b2c..f8a12fe 100644 --- a/spec/queries/count_classifications_spec.rb +++ b/spec/queries/count_classifications_spec.rb @@ -166,7 +166,7 @@ end context 'when current day is part of the most recently pulled period' do - it 'adds the most recent period to the most recently pulled period counts' do + it 'adds the current day counts to the most recently pulled period counts' do create(:classification_with_diff_workflow, classification_id: 1000, event_time: Date.new(2022, 1, 2)) expect(counts.length).to eq(1) # the 2 classifications counted is the one created in L170 as well as diff_workflow_event classification. From 3bc6875cbf649130a81c124e7920f606e3dc846c Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Tue, 8 Oct 2024 12:24:12 -0500 Subject: [PATCH 23/23] update migrations to be reversible --- ...6225916_create_hourly_workflow_classification_count.rb | 8 +++++++- ...231010_add_refresh_policy_for_hourly_workflow_count.rb | 8 +++++++- ...ate_data_retention_policy_for_hourly_workflow_count.rb | 8 +++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb index e4be895..24f9bf0 100644 --- a/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb +++ b/db/migrate/20240926225916_create_hourly_workflow_classification_count.rb @@ -10,7 +10,7 @@ class CreateHourlyWorkflowClassificationCount < ActiveRecord::Migration[7.0] # - Create a subsequent realtime cagg that buckets hourly that we will create data retention policies for. The plan is for up to 72 hours worth of hourly workflow classification counts of data. # - Update workflow query to first query the daily counts first and the query the hourly counts for just the specific date of now. disable_ddl_transaction! - def change + def up execute <<~SQL create materialized view hourly_classification_count_per_workflow with ( @@ -24,4 +24,10 @@ def change group by hour, workflow_id; SQL end + + def down + execute <<~SQL + DROP materialized view hourly_classification_count_per_workflow; + SQL + end end diff --git a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb index ec4364e..2996462 100644 --- a/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb +++ b/db/migrate/20240926231010_add_refresh_policy_for_hourly_workflow_count.rb @@ -2,9 +2,15 @@ class AddRefreshPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] disable_ddl_transaction! - def change + def up execute <<~SQL SELECT add_continuous_aggregate_policy('hourly_classification_count_per_workflow',start_offset => INTERVAL '5 days', end_offset => INTERVAL '30 minutes', schedule_interval => INTERVAL '1 h'); SQL end + + def down + execute <<~SQL + SELECT remove_continuous_aggregate_policy('hourly_classification_count_per_workflow'); + SQL + end end diff --git a/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb index 23c7aca..592ff11 100644 --- a/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb +++ b/db/migrate/20240926231325_create_data_retention_policy_for_hourly_workflow_count.rb @@ -2,9 +2,15 @@ class CreateDataRetentionPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0] disable_ddl_transaction! - def change + def up execute <<~SQL SELECT add_retention_policy('hourly_classification_count_per_workflow', drop_after => INTERVAL '3 days'); SQL end + + def down + execute <<~SQL + SELECT remove_retention_policy('hourly_classification_count_per_workflow'); + SQL + end end