diff --git a/posthog/management/commands/send_usage_report.py b/posthog/management/commands/send_usage_report.py index 4c67d451c2a8a..03e4b4a102da4 100644 --- a/posthog/management/commands/send_usage_report.py +++ b/posthog/management/commands/send_usage_report.py @@ -1,5 +1,3 @@ -import pprint - from django.core.management.base import BaseCommand from posthog.tasks.usage_report import send_all_org_usage_reports @@ -10,7 +8,6 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument("--dry-run", type=bool, help="Print information instead of sending it") - parser.add_argument("--print-reports", type=bool, help="Print the reports in full") parser.add_argument("--date", type=str, help="The date to be ran in format YYYY-MM-DD") parser.add_argument("--event-name", type=str, help="Override the event name to be sent - for testing") parser.add_argument( @@ -28,20 +25,14 @@ def handle(self, *args, **options): run_async = options["async"] if run_async: - results = send_all_org_usage_reports.delay( + send_all_org_usage_reports.delay( dry_run, date, event_name, skip_capture_event=skip_capture_event, only_organization_id=organization_id ) else: - results = send_all_org_usage_reports( + send_all_org_usage_reports( dry_run, date, event_name, skip_capture_event=skip_capture_event, only_organization_id=organization_id ) - if options["print_reports"]: - print("") # noqa T201 - pprint.pprint(results) # noqa T203 - print("") # noqa T201 if dry_run: print("Dry run so not sent.") # noqa T201 - else: - print(f"{len(results)} Reports sent!") # noqa T201 print("Done!") # noqa T201 diff --git a/posthog/tasks/test/test_usage_report.py b/posthog/tasks/test/test_usage_report.py index e43c7ddb817fc..636b3e76b93e9 100644 --- a/posthog/tasks/test/test_usage_report.py +++ b/posthog/tasks/test/test_usage_report.py @@ -20,6 +20,7 @@ from posthog.hogql.query import execute_hogql_query from posthog.models import Organization, Plugin, Team from posthog.models.dashboard import Dashboard +from posthog.models.event.util import create_event from posthog.models.feature_flag import FeatureFlag from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping @@ -27,7 +28,16 @@ from posthog.models.sharing_configuration import SharingConfiguration from posthog.schema import EventsQuery from posthog.session_recordings.test.test_factory import create_snapshot -from posthog.tasks.usage_report import capture_event, send_all_org_usage_reports +from posthog.tasks.usage_report import ( + _get_all_org_reports, + _get_all_usage_data_as_team_rows, + _get_full_org_usage_report, + _get_full_org_usage_report_as_dict, + _get_team_report, + capture_event, + get_instance_metadata, + send_all_org_usage_reports, +) from posthog.test.base import ( APIBaseTest, ClickhouseDestroyTablesMixin, @@ -37,8 +47,7 @@ flush_persons_and_events, snapshot_clickhouse_queries, ) -from posthog.models.event.util import create_event -from posthog.utils import get_machine_id +from posthog.utils import get_machine_id, get_previous_day logger = structlog.get_logger(__name__) @@ -296,16 +305,20 @@ def _test_usage_report(self) -> List[dict]: self._create_plugin("Installed but not enabled", False) self._create_plugin("Installed and enabled", True) - all_reports = send_all_org_usage_reports(dry_run=False) + period = get_previous_day() + period_start, period_end = period + all_reports = _get_all_org_reports(period_start, period_end) + report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.organization.id)], get_instance_metadata(period)) + ) - report = all_reports[0] assert report["table_sizes"] assert report["table_sizes"]["posthog_event"] < 10**7 # <10MB assert report["table_sizes"]["posthog_sessionrecordingevent"] < 10**7 # <10MB assert len(all_reports) == 2 - expectation = [ + expectations = [ { "deployment_infrastructure": "tests", "realm": "hosted-clickhouse", @@ -316,12 +329,12 @@ def _test_usage_report(self) -> List[dict]: "site_url": "http://test.posthog.com", "product": "open source", "helm": {}, - "clickhouse_version": all_reports[0]["clickhouse_version"], + "clickhouse_version": report["clickhouse_version"], "users_who_logged_in": [], "users_who_logged_in_count": 0, "users_who_signed_up": [], "users_who_signed_up_count": 0, - "table_sizes": all_reports[0]["table_sizes"], + "table_sizes": report["table_sizes"], "plugins_installed": {"Installed and enabled": 1, "Installed but not enabled": 1}, "plugins_enabled": {"Installed and enabled": 1}, "instance_tag": "none", @@ -441,12 +454,12 @@ def _test_usage_report(self) -> List[dict]: "site_url": "http://test.posthog.com", "product": "open source", "helm": {}, - "clickhouse_version": all_reports[1]["clickhouse_version"], + "clickhouse_version": report["clickhouse_version"], "users_who_logged_in": [], "users_who_logged_in_count": 0, "users_who_signed_up": [], "users_who_signed_up_count": 0, - "table_sizes": all_reports[1]["table_sizes"], + "table_sizes": report["table_sizes"], "plugins_installed": {"Installed and enabled": 1, "Installed but not enabled": 1}, "plugins_enabled": {"Installed and enabled": 1}, "instance_tag": "none", @@ -525,18 +538,22 @@ def _test_usage_report(self) -> List[dict]: }, ] - for item in expectation: + for item in expectations: item.update(**self.expected_properties) # tricky: list could be in different order assert len(all_reports) == 2 - for report in all_reports: - if report["organization_id"] == expectation[0]["organization_id"]: - assert report == expectation[0] - elif report["organization_id"] == expectation[1]["organization_id"]: - assert report == expectation[1] + full_reports = [] + for expectation in expectations: + report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report( + all_reports[expectation["organization_id"]], get_instance_metadata(period) + ) + ) + assert report == expectation + full_reports.append(report) - return all_reports + return full_reports @freeze_time("2022-01-10T00:01:00Z") @patch("os.environ", {"DEPLOYMENT": "tests"}) @@ -552,6 +569,8 @@ def test_unlicensed_usage_report(self, mock_post: MagicMock, mock_client: MagicM mock_client.return_value = mock_posthog all_reports = self._test_usage_report() + with self.settings(SITE_URL="http://test.posthog.com"): + send_all_org_usage_reports() # Check calls to other services mock_post.assert_not_called() @@ -597,20 +616,21 @@ def test_usage_report_hogql_queries(self) -> None: run_events_query(query=EventsQuery(select=["event"], limit=50), team=self.team) sync_execute("SYSTEM FLUSH LOGS") - all_reports = send_all_org_usage_reports(dry_run=False, at=str(now() + relativedelta(days=1))) - assert len(all_reports) == 1 + period = get_previous_day(at=now() + relativedelta(days=1)) + period_start, period_end = period + all_reports = _get_all_usage_data_as_team_rows(period_start, period_end) - report = all_reports[0]["teams"][str(self.team.pk)] + report = _get_team_report(all_reports, self.team) # We selected 200 or 50 rows, but still read 100 rows to return the query - assert report["hogql_app_rows_read"] == 100 - assert report["hogql_app_bytes_read"] > 0 - assert report["event_explorer_app_rows_read"] == 100 - assert report["event_explorer_app_bytes_read"] > 0 + assert report.hogql_app_rows_read == 100 + assert report.hogql_app_bytes_read > 0 + assert report.event_explorer_app_rows_read == 100 + assert report.event_explorer_app_bytes_read > 0 # Nothing was read via the API - assert report["hogql_api_rows_read"] == 0 - assert report["event_explorer_api_rows_read"] == 0 + assert report.hogql_api_rows_read == 0 + assert report.event_explorer_api_rows_read == 0 @freeze_time("2022-01-10T00:01:00Z") @@ -680,21 +700,19 @@ def test_usage_report_decide_requests(self, billing_task_mock: MagicMock, postho flush_persons_and_events() with self.settings(DECIDE_BILLING_ANALYTICS_TOKEN="correct"): - all_reports = send_all_org_usage_reports(dry_run=False, at=str(now() + relativedelta(days=1))) + period = get_previous_day(at=now() + relativedelta(days=1)) + period_start, period_end = period + all_reports = _get_all_org_reports(period_start, period_end) assert len(all_reports) == 3 - all_reports = sorted(all_reports, key=lambda x: x["organization_name"]) - - assert [all_reports["organization_name"] for all_reports in all_reports] == [ - "Org 1", - "Org 2", - "PostHog", - ] - - org_1_report = all_reports[0] - org_2_report = all_reports[1] - analytics_report = all_reports[2] + org_1_report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.org_1.id)], get_instance_metadata(period)) + ) + assert org_1_report["organization_name"] == "Org 1" + org_2_report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.org_2.id)], get_instance_metadata(period)) + ) assert org_1_report["organization_name"] == "Org 1" assert org_1_report["decide_requests_count_in_period"] == 11 @@ -721,26 +739,6 @@ def test_usage_report_decide_requests(self, billing_task_mock: MagicMock, postho assert org_2_report["teams"]["5"]["billable_feature_flag_requests_count_in_period"] == 0 assert org_2_report["teams"]["5"]["billable_feature_flag_requests_count_in_month"] == 0 - # billing service calls are made only for org1, which has decide requests, and analytics org - which has decide usage events. - calls = [ - call( - org_1_report["organization_id"], - ANY, - ), - call( - analytics_report["organization_id"], - ANY, - ), - ] - assert billing_task_mock.delay.call_count == 2 - billing_task_mock.delay.assert_has_calls( - calls, - any_order=True, - ) - - # capture usage report calls are made for all orgs - assert posthog_capture_mock.return_value.capture.call_count == 3 - @patch("posthog.tasks.usage_report.Client") @patch("posthog.tasks.usage_report.send_report_to_billing_service") def test_usage_report_local_evaluation_requests( @@ -792,21 +790,19 @@ def test_usage_report_local_evaluation_requests( flush_persons_and_events() with self.settings(DECIDE_BILLING_ANALYTICS_TOKEN="correct"): - all_reports = send_all_org_usage_reports(dry_run=False, at=str(now() + relativedelta(days=1))) + period = get_previous_day(at=now() + relativedelta(days=1)) + period_start, period_end = period + all_reports = _get_all_org_reports(period_start, period_end) assert len(all_reports) == 3 - all_reports = sorted(all_reports, key=lambda x: x["organization_name"]) - - assert [all_reports["organization_name"] for all_reports in all_reports] == [ - "Org 1", - "Org 2", - "PostHog", - ] - - org_1_report = all_reports[0] - org_2_report = all_reports[1] - analytics_report = all_reports[2] + org_1_report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.org_1.id)], get_instance_metadata(period)) + ) + assert org_1_report["organization_name"] == "Org 1" + org_2_report = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.org_2.id)], get_instance_metadata(period)) + ) assert org_1_report["organization_name"] == "Org 1" assert org_1_report["local_evaluation_requests_count_in_period"] == 11 @@ -837,26 +833,6 @@ def test_usage_report_local_evaluation_requests( assert org_2_report["teams"]["5"]["billable_feature_flag_requests_count_in_period"] == 0 assert org_2_report["teams"]["5"]["billable_feature_flag_requests_count_in_month"] == 0 - # billing service calls are made only for org1, which has decide requests, and analytics org - which has local evaluation usage events. - calls = [ - call( - org_1_report["organization_id"], - ANY, - ), - call( - analytics_report["organization_id"], - ANY, - ), - ] - assert billing_task_mock.delay.call_count == 2 - billing_task_mock.delay.assert_has_calls( - calls, - any_order=True, - ) - - # capture usage report calls are made for all orgs - assert posthog_capture_mock.return_value.capture.call_count == 3 - class SendUsageTest(LicensedTestMixin, ClickhouseDestroyTablesMixin, APIBaseTest): def setUp(self) -> None: @@ -907,18 +883,26 @@ def test_send_usage(self, mock_post: MagicMock, mock_client: MagicMock) -> None: mock_posthog = MagicMock() mock_client.return_value = mock_posthog - all_reports = send_all_org_usage_reports(dry_run=False) + period = get_previous_day() + period_start, period_end = period + all_reports = _get_all_org_reports(period_start, period_end) + full_report_as_dict = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.organization.id)], get_instance_metadata(period)) + ) + send_all_org_usage_reports(dry_run=False) license = License.objects.first() assert license token = build_billing_token(license, self.organization) mock_post.assert_called_once_with( - f"{BILLING_SERVICE_URL}/api/usage", json=all_reports[0], headers={"Authorization": f"Bearer {token}"} + f"{BILLING_SERVICE_URL}/api/usage", + json=full_report_as_dict, + headers={"Authorization": f"Bearer {token}"}, ) mock_posthog.capture.assert_any_call( get_machine_id(), "organization usage report", - {**all_reports[0], "scope": "machine"}, + {**full_report_as_dict, "scope": "machine"}, groups={"instance": ANY}, timestamp=None, ) @@ -935,18 +919,26 @@ def test_send_usage_cloud(self, mock_post: MagicMock, mock_client: MagicMock) -> mock_posthog = MagicMock() mock_client.return_value = mock_posthog - all_reports = send_all_org_usage_reports(dry_run=False) + period = get_previous_day() + period_start, period_end = period + all_reports = _get_all_org_reports(period_start, period_end) + full_report_as_dict = _get_full_org_usage_report_as_dict( + _get_full_org_usage_report(all_reports[str(self.organization.id)], get_instance_metadata(period)) + ) + send_all_org_usage_reports(dry_run=False) license = License.objects.first() assert license token = build_billing_token(license, self.organization) mock_post.assert_called_once_with( - f"{BILLING_SERVICE_URL}/api/usage", json=all_reports[0], headers={"Authorization": f"Bearer {token}"} + f"{BILLING_SERVICE_URL}/api/usage", + json=full_report_as_dict, + headers={"Authorization": f"Bearer {token}"}, ) mock_posthog.capture.assert_any_call( self.user.distinct_id, "organization usage report", - {**all_reports[0], "scope": "user"}, + {**full_report_as_dict, "scope": "user"}, groups={"instance": "http://localhost:8000", "organization": str(self.organization.id)}, timestamp=None, ) diff --git a/posthog/tasks/usage_report.py b/posthog/tasks/usage_report.py index 45f82b9882374..612213086629e 100644 --- a/posthog/tasks/usage_report.py +++ b/posthog/tasks/usage_report.py @@ -534,6 +534,281 @@ def convert_team_usage_rows_to_dict(rows: List[Union[dict, Tuple[int, int]]]) -> return team_id_map +def _get_all_usage_data(period_start: datetime, period_end: datetime) -> Dict[str, Any]: + """ + Gets all usage data for the specified period. Clickhouse is good at counting things so + we count across all teams rather than doing it one by one + """ + return dict( + teams_with_event_count_lifetime=get_teams_with_event_count_lifetime(), + teams_with_event_count_in_period=get_teams_with_billable_event_count_in_period( + period_start, period_end, count_distinct=True + ), + teams_with_event_count_in_month=get_teams_with_billable_event_count_in_period( + period_start.replace(day=1), period_end + ), + teams_with_event_count_with_groups_in_period=get_teams_with_event_count_with_groups_in_period( + period_start, period_end + ), + # teams_with_event_count_by_lib=get_teams_with_event_count_by_lib(period_start, period_end), + # teams_with_event_count_by_name=get_teams_with_event_count_by_name(period_start, period_end), + teams_with_recording_count_in_period=get_teams_with_recording_count_in_period(period_start, period_end), + teams_with_recording_count_total=get_teams_with_recording_count_total(), + teams_with_decide_requests_count_in_period=get_teams_with_feature_flag_requests_count_in_period( + period_start, period_end, FlagRequestType.DECIDE + ), + teams_with_decide_requests_count_in_month=get_teams_with_feature_flag_requests_count_in_period( + period_start.replace(day=1), period_end, FlagRequestType.DECIDE + ), + teams_with_local_evaluation_requests_count_in_period=get_teams_with_feature_flag_requests_count_in_period( + period_start, period_end, FlagRequestType.LOCAL_EVALUATION + ), + teams_with_local_evaluation_requests_count_in_month=get_teams_with_feature_flag_requests_count_in_period( + period_start.replace(day=1), period_end, FlagRequestType.LOCAL_EVALUATION + ), + teams_with_group_types_total=list( + GroupTypeMapping.objects.values("team_id").annotate(total=Count("id")).order_by("team_id") + ), + teams_with_dashboard_count=list( + Dashboard.objects.values("team_id").annotate(total=Count("id")).order_by("team_id") + ), + teams_with_dashboard_template_count=list( + Dashboard.objects.filter(creation_mode="template") + .values("team_id") + .annotate(total=Count("id")) + .order_by("team_id") + ), + teams_with_dashboard_shared_count=list( + Dashboard.objects.filter(sharingconfiguration__enabled=True) + .values("team_id") + .annotate(total=Count("id")) + .order_by("team_id") + ), + teams_with_dashboard_tagged_count=list( + Dashboard.objects.filter(tagged_items__isnull=False) + .values("team_id") + .annotate(total=Count("id")) + .order_by("team_id") + ), + teams_with_ff_count=list(FeatureFlag.objects.values("team_id").annotate(total=Count("id")).order_by("team_id")), + teams_with_ff_active_count=list( + FeatureFlag.objects.filter(active=True).values("team_id").annotate(total=Count("id")).order_by("team_id") + ), + teams_with_hogql_app_bytes_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_bytes", + query_types=["hogql_query", "HogQLQuery"], + access_method="", + ), + teams_with_hogql_app_rows_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_rows", + query_types=["hogql_query", "HogQLQuery"], + access_method="", + ), + teams_with_hogql_app_duration_ms=get_teams_with_hogql_metric( + period_start, + period_end, + metric="query_duration_ms", + query_types=["hogql_query", "HogQLQuery"], + access_method="", + ), + teams_with_hogql_api_bytes_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_bytes", + query_types=["hogql_query", "HogQLQuery"], + access_method="personal_api_key", + ), + teams_with_hogql_api_rows_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_rows", + query_types=["hogql_query", "HogQLQuery"], + access_method="personal_api_key", + ), + teams_with_hogql_api_duration_ms=get_teams_with_hogql_metric( + period_start, + period_end, + metric="query_duration_ms", + query_types=["hogql_query", "HogQLQuery"], + access_method="personal_api_key", + ), + teams_with_event_explorer_app_bytes_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_bytes", + query_types=["EventsQuery"], + access_method="", + ), + teams_with_event_explorer_app_rows_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_rows", + query_types=["EventsQuery"], + access_method="", + ), + teams_with_event_explorer_app_duration_ms=get_teams_with_hogql_metric( + period_start, + period_end, + metric="query_duration_ms", + query_types=["EventsQuery"], + access_method="", + ), + teams_with_event_explorer_api_bytes_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_bytes", + query_types=["EventsQuery"], + access_method="personal_api_key", + ), + teams_with_event_explorer_api_rows_read=get_teams_with_hogql_metric( + period_start, + period_end, + metric="read_rows", + query_types=["EventsQuery"], + access_method="personal_api_key", + ), + teams_with_event_explorer_api_duration_ms=get_teams_with_hogql_metric( + period_start, + period_end, + metric="query_duration_ms", + query_types=["EventsQuery"], + access_method="personal_api_key", + ), + ) + + +def _get_all_usage_data_as_team_rows(period_start: datetime, period_end: datetime) -> Dict[str, Any]: + """ + Gets all usage data for the specified period as a map of team_id -> value. This makes it faster + to access the data than looping over all_data to find what we want. + """ + all_data = _get_all_usage_data(period_start, period_end) + # convert it to a map of team_id -> value + for key, rows in all_data.items(): + all_data[key] = convert_team_usage_rows_to_dict(rows) + return all_data + + +def _get_teams_for_usage_reports() -> Sequence[Team]: + return list( + Team.objects.select_related("organization").exclude( + Q(organization__for_internal_metrics=True) | Q(is_demo=True) + ) + ) + + +def _get_team_report(all_data: Dict[str, Any], team: Team) -> UsageReportCounters: + decide_requests_count_in_month = all_data["teams_with_decide_requests_count_in_month"].get(team.id, 0) + decide_requests_count_in_period = all_data["teams_with_decide_requests_count_in_period"].get(team.id, 0) + local_evaluation_requests_count_in_period = all_data["teams_with_local_evaluation_requests_count_in_period"].get( + team.id, 0 + ) + local_evaluation_requests_count_in_month = all_data["teams_with_local_evaluation_requests_count_in_month"].get( + team.id, 0 + ) + return UsageReportCounters( + event_count_lifetime=all_data["teams_with_event_count_lifetime"].get(team.id, 0), + event_count_in_period=all_data["teams_with_event_count_in_period"].get(team.id, 0), + event_count_in_month=all_data["teams_with_event_count_in_month"].get(team.id, 0), + event_count_with_groups_in_period=all_data["teams_with_event_count_with_groups_in_period"].get(team.id, 0), + # event_count_by_lib: Di all_data["teams_with_#"].get(team.id, 0), + # event_count_by_name: Di all_data["teams_with_#"].get(team.id, 0), + recording_count_in_period=all_data["teams_with_recording_count_in_period"].get(team.id, 0), + recording_count_total=all_data["teams_with_recording_count_total"].get(team.id, 0), + group_types_total=all_data["teams_with_group_types_total"].get(team.id, 0), + decide_requests_count_in_period=decide_requests_count_in_period, + decide_requests_count_in_month=decide_requests_count_in_month, + local_evaluation_requests_count_in_period=local_evaluation_requests_count_in_period, + local_evaluation_requests_count_in_month=local_evaluation_requests_count_in_month, + billable_feature_flag_requests_count_in_month=decide_requests_count_in_month + + (local_evaluation_requests_count_in_month * 10), + billable_feature_flag_requests_count_in_period=decide_requests_count_in_period + + (local_evaluation_requests_count_in_period * 10), + dashboard_count=all_data["teams_with_dashboard_count"].get(team.id, 0), + dashboard_template_count=all_data["teams_with_dashboard_template_count"].get(team.id, 0), + dashboard_shared_count=all_data["teams_with_dashboard_shared_count"].get(team.id, 0), + dashboard_tagged_count=all_data["teams_with_dashboard_tagged_count"].get(team.id, 0), + ff_count=all_data["teams_with_ff_count"].get(team.id, 0), + ff_active_count=all_data["teams_with_ff_active_count"].get(team.id, 0), + hogql_app_bytes_read=all_data["teams_with_hogql_app_bytes_read"].get(team.id, 0), + hogql_app_rows_read=all_data["teams_with_hogql_app_rows_read"].get(team.id, 0), + hogql_app_duration_ms=all_data["teams_with_hogql_app_duration_ms"].get(team.id, 0), + hogql_api_bytes_read=all_data["teams_with_hogql_api_bytes_read"].get(team.id, 0), + hogql_api_rows_read=all_data["teams_with_hogql_api_rows_read"].get(team.id, 0), + hogql_api_duration_ms=all_data["teams_with_hogql_api_duration_ms"].get(team.id, 0), + event_explorer_app_bytes_read=all_data["teams_with_event_explorer_app_bytes_read"].get(team.id, 0), + event_explorer_app_rows_read=all_data["teams_with_event_explorer_app_rows_read"].get(team.id, 0), + event_explorer_app_duration_ms=all_data["teams_with_event_explorer_app_duration_ms"].get(team.id, 0), + event_explorer_api_bytes_read=all_data["teams_with_event_explorer_api_bytes_read"].get(team.id, 0), + event_explorer_api_rows_read=all_data["teams_with_event_explorer_api_rows_read"].get(team.id, 0), + event_explorer_api_duration_ms=all_data["teams_with_event_explorer_api_duration_ms"].get(team.id, 0), + ) + + +def _add_team_report_to_org_reports( + org_reports: Dict[str, OrgReport], team: Team, team_report: UsageReportCounters, period_start: datetime +) -> None: + org_id = str(team.organization.id) + if org_id not in org_reports: + org_report = OrgReport( + date=period_start.strftime("%Y-%m-%d"), + organization_id=org_id, + organization_name=team.organization.name, + organization_created_at=team.organization.created_at.isoformat(), + organization_user_count=get_org_user_count(org_id), + team_count=1, + teams={str(team.id): team_report}, + **dataclasses.asdict(team_report), # Clone the team report as the basis + ) + org_reports[org_id] = org_report + else: + org_report = org_reports[org_id] + org_report.teams[str(team.id)] = team_report + org_report.team_count += 1 + + # Iterate on all fields of the UsageReportCounters and add the values from the team report to the org report + for field in dataclasses.fields(UsageReportCounters): + if hasattr(team_report, field.name): + setattr( + org_report, + field.name, + getattr(org_report, field.name) + getattr(team_report, field.name), + ) + + +def _get_all_org_reports(period_start: datetime, period_end: datetime) -> Dict[str, OrgReport]: + all_data = _get_all_usage_data_as_team_rows(period_start, period_end) + + teams = _get_teams_for_usage_reports() + + org_reports: Dict[str, OrgReport] = {} + + print("Generating reports for teams...") # noqa T201 + time_now = datetime.now() + for team in teams: + team_report = _get_team_report(all_data, team) + _add_team_report_to_org_reports(org_reports, team, team_report, period_start) + + time_since = datetime.now() - time_now + print(f"Generating reports for teams took {time_since.total_seconds()} seconds.") # noqa T201 + return org_reports + + +def _get_full_org_usage_report(org_report: OrgReport, instance_metadata: InstanceMetadata) -> FullUsageReport: + return FullUsageReport( + **dataclasses.asdict(org_report), + **dataclasses.asdict(instance_metadata), + ) + + +def _get_full_org_usage_report_as_dict(full_report: FullUsageReport) -> Dict[str, Any]: + return dataclasses.asdict(full_report) + + @app.task(ignore_result=True, max_retries=3, autoretry_for=(Exception,)) def send_all_org_usage_reports( dry_run: bool = False, @@ -541,7 +816,7 @@ def send_all_org_usage_reports( capture_event_name: Optional[str] = None, skip_capture_event: bool = False, only_organization_id: Optional[str] = None, -) -> List[dict]: # Dict[str, OrgReport]: +) -> None: capture_event_name = capture_event_name or "organization usage report" at_date = parser.parse(at) if at else None @@ -550,250 +825,8 @@ def send_all_org_usage_reports( instance_metadata = get_instance_metadata(period) - # Clickhouse is good at counting things so we count across all teams rather than doing it one by one try: - all_data = dict( - teams_with_event_count_lifetime=get_teams_with_event_count_lifetime(), - teams_with_event_count_in_period=get_teams_with_billable_event_count_in_period( - period_start, period_end, count_distinct=True - ), - teams_with_event_count_in_month=get_teams_with_billable_event_count_in_period( - period_start.replace(day=1), period_end - ), - teams_with_event_count_with_groups_in_period=get_teams_with_event_count_with_groups_in_period( - period_start, period_end - ), - # teams_with_event_count_by_lib=get_teams_with_event_count_by_lib(period_start, period_end), - # teams_with_event_count_by_name=get_teams_with_event_count_by_name(period_start, period_end), - teams_with_recording_count_in_period=get_teams_with_recording_count_in_period(period_start, period_end), - teams_with_recording_count_total=get_teams_with_recording_count_total(), - teams_with_decide_requests_count_in_period=get_teams_with_feature_flag_requests_count_in_period( - period_start, period_end, FlagRequestType.DECIDE - ), - teams_with_decide_requests_count_in_month=get_teams_with_feature_flag_requests_count_in_period( - period_start.replace(day=1), period_end, FlagRequestType.DECIDE - ), - teams_with_local_evaluation_requests_count_in_period=get_teams_with_feature_flag_requests_count_in_period( - period_start, period_end, FlagRequestType.LOCAL_EVALUATION - ), - teams_with_local_evaluation_requests_count_in_month=get_teams_with_feature_flag_requests_count_in_period( - period_start.replace(day=1), period_end, FlagRequestType.LOCAL_EVALUATION - ), - teams_with_group_types_total=list( - GroupTypeMapping.objects.values("team_id").annotate(total=Count("id")).order_by("team_id") - ), - teams_with_dashboard_count=list( - Dashboard.objects.values("team_id").annotate(total=Count("id")).order_by("team_id") - ), - teams_with_dashboard_template_count=list( - Dashboard.objects.filter(creation_mode="template") - .values("team_id") - .annotate(total=Count("id")) - .order_by("team_id") - ), - teams_with_dashboard_shared_count=list( - Dashboard.objects.filter(sharingconfiguration__enabled=True) - .values("team_id") - .annotate(total=Count("id")) - .order_by("team_id") - ), - teams_with_dashboard_tagged_count=list( - Dashboard.objects.filter(tagged_items__isnull=False) - .values("team_id") - .annotate(total=Count("id")) - .order_by("team_id") - ), - teams_with_ff_count=list( - FeatureFlag.objects.values("team_id").annotate(total=Count("id")).order_by("team_id") - ), - teams_with_ff_active_count=list( - FeatureFlag.objects.filter(active=True) - .values("team_id") - .annotate(total=Count("id")) - .order_by("team_id") - ), - teams_with_hogql_app_bytes_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_bytes", - query_types=["hogql_query", "HogQLQuery"], - access_method="", - ), - teams_with_hogql_app_rows_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_rows", - query_types=["hogql_query", "HogQLQuery"], - access_method="", - ), - teams_with_hogql_app_duration_ms=get_teams_with_hogql_metric( - period_start, - period_end, - metric="query_duration_ms", - query_types=["hogql_query", "HogQLQuery"], - access_method="", - ), - teams_with_hogql_api_bytes_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_bytes", - query_types=["hogql_query", "HogQLQuery"], - access_method="personal_api_key", - ), - teams_with_hogql_api_rows_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_rows", - query_types=["hogql_query", "HogQLQuery"], - access_method="personal_api_key", - ), - teams_with_hogql_api_duration_ms=get_teams_with_hogql_metric( - period_start, - period_end, - metric="query_duration_ms", - query_types=["hogql_query", "HogQLQuery"], - access_method="personal_api_key", - ), - teams_with_event_explorer_app_bytes_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_bytes", - query_types=["EventsQuery"], - access_method="", - ), - teams_with_event_explorer_app_rows_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_rows", - query_types=["EventsQuery"], - access_method="", - ), - teams_with_event_explorer_app_duration_ms=get_teams_with_hogql_metric( - period_start, - period_end, - metric="query_duration_ms", - query_types=["EventsQuery"], - access_method="", - ), - teams_with_event_explorer_api_bytes_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_bytes", - query_types=["EventsQuery"], - access_method="personal_api_key", - ), - teams_with_event_explorer_api_rows_read=get_teams_with_hogql_metric( - period_start, - period_end, - metric="read_rows", - query_types=["EventsQuery"], - access_method="personal_api_key", - ), - teams_with_event_explorer_api_duration_ms=get_teams_with_hogql_metric( - period_start, - period_end, - metric="query_duration_ms", - query_types=["EventsQuery"], - access_method="personal_api_key", - ), - ) - - # The data is all as raw rows which will dramatically slow down the upcoming loop - # so we convert it to a map of team_id -> value - for key, rows in all_data.items(): - all_data[key] = convert_team_usage_rows_to_dict(rows) - - teams: Sequence[Team] = list( - Team.objects.select_related("organization").exclude( - Q(organization__for_internal_metrics=True) | Q(is_demo=True) - ) - ) - - org_reports: Dict[str, OrgReport] = {} - - print("Generating reports for teams...") # noqa T201 - time_now = datetime.now() - for team in teams: - decide_requests_count_in_month = all_data["teams_with_decide_requests_count_in_month"].get(team.id, 0) - decide_requests_count_in_period = all_data["teams_with_decide_requests_count_in_period"].get(team.id, 0) - local_evaluation_requests_count_in_period = all_data[ - "teams_with_local_evaluation_requests_count_in_period" - ].get(team.id, 0) - local_evaluation_requests_count_in_month = all_data[ - "teams_with_local_evaluation_requests_count_in_month" - ].get(team.id, 0) - - team_report = UsageReportCounters( - event_count_lifetime=all_data["teams_with_event_count_lifetime"].get(team.id, 0), - event_count_in_period=all_data["teams_with_event_count_in_period"].get(team.id, 0), - event_count_in_month=all_data["teams_with_event_count_in_month"].get(team.id, 0), - event_count_with_groups_in_period=all_data["teams_with_event_count_with_groups_in_period"].get( - team.id, 0 - ), - # event_count_by_lib: Di all_data["teams_with_#"].get(team.id, 0), - # event_count_by_name: Di all_data["teams_with_#"].get(team.id, 0), - recording_count_in_period=all_data["teams_with_recording_count_in_period"].get(team.id, 0), - recording_count_total=all_data["teams_with_recording_count_total"].get(team.id, 0), - group_types_total=all_data["teams_with_group_types_total"].get(team.id, 0), - decide_requests_count_in_period=decide_requests_count_in_period, - decide_requests_count_in_month=decide_requests_count_in_month, - local_evaluation_requests_count_in_period=local_evaluation_requests_count_in_period, - local_evaluation_requests_count_in_month=local_evaluation_requests_count_in_month, - billable_feature_flag_requests_count_in_month=decide_requests_count_in_month - + (local_evaluation_requests_count_in_month * 10), - billable_feature_flag_requests_count_in_period=decide_requests_count_in_period - + (local_evaluation_requests_count_in_period * 10), - dashboard_count=all_data["teams_with_dashboard_count"].get(team.id, 0), - dashboard_template_count=all_data["teams_with_dashboard_template_count"].get(team.id, 0), - dashboard_shared_count=all_data["teams_with_dashboard_shared_count"].get(team.id, 0), - dashboard_tagged_count=all_data["teams_with_dashboard_tagged_count"].get(team.id, 0), - ff_count=all_data["teams_with_ff_count"].get(team.id, 0), - ff_active_count=all_data["teams_with_ff_active_count"].get(team.id, 0), - hogql_app_bytes_read=all_data["teams_with_hogql_app_bytes_read"].get(team.id, 0), - hogql_app_rows_read=all_data["teams_with_hogql_app_rows_read"].get(team.id, 0), - hogql_app_duration_ms=all_data["teams_with_hogql_app_duration_ms"].get(team.id, 0), - hogql_api_bytes_read=all_data["teams_with_hogql_api_bytes_read"].get(team.id, 0), - hogql_api_rows_read=all_data["teams_with_hogql_api_rows_read"].get(team.id, 0), - hogql_api_duration_ms=all_data["teams_with_hogql_api_duration_ms"].get(team.id, 0), - event_explorer_app_bytes_read=all_data["teams_with_event_explorer_app_bytes_read"].get(team.id, 0), - event_explorer_app_rows_read=all_data["teams_with_event_explorer_app_rows_read"].get(team.id, 0), - event_explorer_app_duration_ms=all_data["teams_with_event_explorer_app_duration_ms"].get(team.id, 0), - event_explorer_api_bytes_read=all_data["teams_with_event_explorer_api_bytes_read"].get(team.id, 0), - event_explorer_api_rows_read=all_data["teams_with_event_explorer_api_rows_read"].get(team.id, 0), - event_explorer_api_duration_ms=all_data["teams_with_event_explorer_api_duration_ms"].get(team.id, 0), - ) - - org_id = str(team.organization.id) - - if org_id not in org_reports: - org_report = OrgReport( - date=period_start.strftime("%Y-%m-%d"), - organization_id=org_id, - organization_name=team.organization.name, - organization_created_at=team.organization.created_at.isoformat(), - organization_user_count=get_org_user_count(org_id), - team_count=1, - teams={str(team.id): team_report}, - **dataclasses.asdict(team_report), # Clone the team report as the basis - ) - org_reports[org_id] = org_report - else: - org_report = org_reports[org_id] - org_report.teams[str(team.id)] = team_report - org_report.team_count += 1 - - # Iterate on all fields of the UsageReportCounters and add the values from the team report to the org report - for field in dataclasses.fields(UsageReportCounters): - if hasattr(team_report, field.name): - setattr( - org_report, - field.name, - getattr(org_report, field.name) + getattr(team_report, field.name), - ) - time_since = datetime.now() - time_now - print(f"Generating reports for teams took {time_since.total_seconds()} seconds.") # noqa T201 - - all_reports = [] + org_reports = _get_all_org_reports(period_start, period_end) print("Sending usage reports to PostHog and Billing...") # noqa T201 time_now = datetime.now() @@ -803,12 +836,8 @@ def send_all_org_usage_reports( if only_organization_id and only_organization_id != org_id: continue - full_report = FullUsageReport( - **dataclasses.asdict(org_report), - **dataclasses.asdict(instance_metadata), - ) - full_report_dict = dataclasses.asdict(full_report) - all_reports.append(full_report_dict) + full_report = _get_full_org_usage_report(org_report, instance_metadata) + full_report_dict = _get_full_org_usage_report_as_dict(full_report) if dry_run: continue @@ -823,7 +852,6 @@ def send_all_org_usage_reports( send_report_to_billing_service.delay(org_id, full_report_dict) time_since = datetime.now() - time_now print(f"Sending usage reports to PostHog and Billing took {time_since.total_seconds()} seconds.") # noqa T201 - return all_reports except Exception as err: capture_exception(err) raise err