From e77037b48084bb8944456cddc9891605e114190e Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Wed, 18 Oct 2023 09:32:06 +0200 Subject: [PATCH] fix(queries): export limit (#18046) --- frontend/src/models/cohortsModel.ts | 1 - .../nodes/DataTable/DataTableExport.tsx | 40 ++++++++++------- frontend/src/queries/query.test.ts | 1 - frontend/src/queries/query.ts | 3 -- frontend/src/scenes/persons/personsLogic.tsx | 1 - .../src/scenes/retention/RetentionModal.tsx | 1 - frontend/src/types.ts | 2 - posthog/api/query.py | 8 ++-- posthog/api/test/test_exports.py | 1 - posthog/api/test/test_query.py | 44 ++++++++++++++++++- posthog/hogql/constants.py | 2 +- posthog/hogql/query.py | 10 +++-- posthog/hogql_queries/events_query_runner.py | 10 +++-- .../insights/lifecycle_query_runner.py | 10 ++++- .../insights/trends/trends_query_runner.py | 10 ++++- posthog/hogql_queries/persons_query_runner.py | 10 ++++- posthog/hogql_queries/query_runner.py | 42 ++++++++++++++---- posthog/tasks/exporter.py | 3 +- posthog/tasks/exports/csv_exporter.py | 16 +++---- .../tasks/exports/test/test_csv_exporter.py | 3 +- 20 files changed, 149 insertions(+), 69 deletions(-) diff --git a/frontend/src/models/cohortsModel.ts b/frontend/src/models/cohortsModel.ts index 3d09a903db3432..6d0ca78e3b38d8 100644 --- a/frontend/src/models/cohortsModel.ts +++ b/frontend/src/models/cohortsModel.ts @@ -85,7 +85,6 @@ export const cohortsModel = kea({ export_format: ExporterFormat.CSV, export_context: { path: `/api/cohort/${id}/persons`, - max_limit: 10000, }, } if (columns && columns.length > 0) { diff --git a/frontend/src/queries/nodes/DataTable/DataTableExport.tsx b/frontend/src/queries/nodes/DataTable/DataTableExport.tsx index 5dd37916349d12..4970f0fd8e429a 100644 --- a/frontend/src/queries/nodes/DataTable/DataTableExport.tsx +++ b/frontend/src/queries/nodes/DataTable/DataTableExport.tsx @@ -4,7 +4,11 @@ import { IconExport } from 'lib/lemon-ui/icons' import { triggerExport } from 'lib/components/ExportButton/exporter' import { ExporterFormat } from '~/types' import { DataNode, DataTableNode, NodeKind } from '~/queries/schema' -import { defaultDataTableColumns, extractExpressionComment } from '~/queries/nodes/DataTable/utils' +import { + defaultDataTableColumns, + extractExpressionComment, + removeExpressionComment, +} from '~/queries/nodes/DataTable/utils' import { isEventsQuery, isHogQLQuery, isPersonsNode } from '~/queries/utils' import { getPersonsEndpoint } from '~/queries/query' import { ExportWithConfirmation } from '~/queries/nodes/DataTable/ExportWithConfirmation' @@ -18,26 +22,28 @@ const EXPORT_MAX_LIMIT = 10000 function startDownload(query: DataTableNode, onlySelectedColumns: boolean): void { const exportContext = isPersonsNode(query.source) - ? { path: getPersonsEndpoint(query.source), max_limit: EXPORT_MAX_LIMIT } - : { source: query.source, max_limit: EXPORT_MAX_LIMIT } + ? { path: getPersonsEndpoint(query.source) } + : { source: query.source } if (!exportContext) { throw new Error('Unsupported node type') } - const columnMapping = { - url: ['properties.$current_url', 'properties.$screen_name'], - time: 'timestamp', - event: 'event', - source: 'properties.$lib', - person: isPersonsNode(query.source) - ? ['distinct_ids.0', 'properties.email'] - : ['person.distinct_ids.0', 'person.properties.email'], - } - if (onlySelectedColumns) { - exportContext['columns'] = (query.columns ?? defaultDataTableColumns(query.source.kind)) - ?.flatMap((c) => columnMapping[c] || c) - .filter((c) => c !== 'person.$delete') + exportContext['columns'] = ( + (isEventsQuery(query.source) ? query.source.select : null) ?? + query.columns ?? + defaultDataTableColumns(query.source.kind) + )?.filter((c) => c !== 'person.$delete') + + if (isEventsQuery(query.source)) { + exportContext['columns'] = exportContext['columns'].map((c: string) => + removeExpressionComment(c) === 'person' ? 'person.properties.email' : c + ) + } else if (isPersonsNode(query.source)) { + exportContext['columns'] = exportContext['columns'].map((c: string) => + removeExpressionComment(c) === 'person' ? 'properties.email' : c + ) + } } triggerExport({ export_format: ExporterFormat.CSV, @@ -185,7 +191,7 @@ export function DataTableExport({ query }: DataTableExportProps): JSX.Element | (isEventsQuery(source) || isPersonsNode(source) ? source.properties?.length || 0 : 0) + (isEventsQuery(source) && source.event ? 1 : 0) + (isPersonsNode(source) && source.search ? 1 : 0) - const canExportAllColumns = isEventsQuery(source) || isPersonsNode(source) + const canExportAllColumns = (isEventsQuery(source) && source.select.includes('*')) || isPersonsNode(source) const showExportClipboardButtons = isPersonsNode(source) || isEventsQuery(source) || isHogQLQuery(source) return ( diff --git a/frontend/src/queries/query.test.ts b/frontend/src/queries/query.test.ts index 286af39f048e00..5ae28a1a657e09 100644 --- a/frontend/src/queries/query.test.ts +++ b/frontend/src/queries/query.test.ts @@ -67,7 +67,6 @@ describe('query', () => { 'timestamp', ], }, - max_limit: 10000, }) }) diff --git a/frontend/src/queries/query.ts b/frontend/src/queries/query.ts index ea3ccd5476dfa9..646cfc180feff2 100644 --- a/frontend/src/queries/query.ts +++ b/frontend/src/queries/query.ts @@ -32,8 +32,6 @@ import { currentSessionId } from 'lib/internalMetrics' import { featureFlagLogic } from 'lib/logic/featureFlagLogic' import { FEATURE_FLAGS } from 'lib/constants' -const EXPORT_MAX_LIMIT = 10000 - //get export context for a given query export function queryExportContext( query: N, @@ -47,7 +45,6 @@ export function queryExportContext( } else if (isEventsQuery(query) || isPersonsQuery(query)) { return { source: query, - max_limit: EXPORT_MAX_LIMIT, } } else if (isHogQLQuery(query)) { return { source: query } diff --git a/frontend/src/scenes/persons/personsLogic.tsx b/frontend/src/scenes/persons/personsLogic.tsx index c660dd15aabea3..005924ec569240 100644 --- a/frontend/src/scenes/persons/personsLogic.tsx +++ b/frontend/src/scenes/persons/personsLogic.tsx @@ -170,7 +170,6 @@ export const personsLogic = kea({ path: cohort ? api.cohorts.determineListUrl(cohort, listFilters) : api.persons.determineListUrl(listFilters), - max_limit: 10000, }, }, ], diff --git a/frontend/src/scenes/retention/RetentionModal.tsx b/frontend/src/scenes/retention/RetentionModal.tsx index b47409b554953d..a47a895d40fffa 100644 --- a/frontend/src/scenes/retention/RetentionModal.tsx +++ b/frontend/src/scenes/retention/RetentionModal.tsx @@ -47,7 +47,6 @@ export function RetentionModal(): JSX.Element | null { export_format: ExporterFormat.CSV, export_context: { path: row?.people_url, - max_limit: 10000, }, }) } diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 6322a3fb6c4d09..a1652c4a75eb58 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -2911,13 +2911,11 @@ export type OnlineExportContext = { query?: any body?: any filename?: string - max_limit?: number } export type QueryExportContext = { source: Record filename?: string - max_limit?: number } export type ExportContext = OnlineExportContext | LocalExportContext | QueryExportContext diff --git a/posthog/api/query.py b/posthog/api/query.py index 6608ed82bd73da..387d6d75acf22d 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -212,7 +212,7 @@ def _unwrap_pydantic_dict(response: Any) -> Dict: def process_query( - team: Team, query_json: Dict, default_limit: Optional[int] = None, request: Optional[Request] = None + team: Team, query_json: Dict, in_export_context: Optional[bool] = False, request: Optional[Request] = None ) -> Dict: # query_json has been parsed by QuerySchemaParser # it _should_ be impossible to end up in here with a "bad" query @@ -221,10 +221,10 @@ def process_query( if query_kind in QUERY_WITH_RUNNER: refresh_requested = refresh_requested_by_client(request) if request else False - query_runner = get_query_runner(query_json, team) + query_runner = get_query_runner(query_json, team, in_export_context=in_export_context) return _unwrap_pydantic_dict(query_runner.run(refresh_requested=refresh_requested)) elif query_kind in QUERY_WITH_RUNNER_NO_CACHE: - query_runner = get_query_runner(query_json, team) + query_runner = get_query_runner(query_json, team, in_export_context=in_export_context) return _unwrap_pydantic_dict(query_runner.calculate()) elif query_kind == "HogQLQuery": hogql_query = HogQLQuery.model_validate(query_json) @@ -240,7 +240,7 @@ def process_query( filters=hogql_query.filters, modifiers=hogql_query.modifiers, placeholders=values, - default_limit=default_limit, + in_export_context=in_export_context, explain=hogql_query.explain, ) return _unwrap_pydantic_dict(hogql_response) diff --git a/posthog/api/test/test_exports.py b/posthog/api/test/test_exports.py index 7db2b4525a24d1..448afe2a06da9e 100644 --- a/posthog/api/test/test_exports.py +++ b/posthog/api/test/test_exports.py @@ -411,7 +411,6 @@ def requests_side_effect(*args, **kwargs): f"/api/projects/{self.team.pk}/exports/", { "export_context": { - "max_limit": 10000, "path": path, }, "export_format": "text/csv", diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index a98cc0816d902e..ce07a013c72ab7 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -421,7 +421,8 @@ def test_full_hogql_query(self): ) @patch("posthog.hogql.constants.DEFAULT_RETURNED_ROWS", 10) - def test_full_hogql_query_limit(self, DEFAULT_RETURNED_ROWS=10): + @patch("posthog.hogql.constants.MAX_SELECT_RETURNED_ROWS", 15) + def test_full_hogql_query_limit(self, MAX_SELECT_RETURNED_ROWS=15, DEFAULT_RETURNED_ROWS=10): random_uuid = str(UUIDT()) with freeze_time("2020-01-10 12:00:00"): for _ in range(20): @@ -439,7 +440,28 @@ def test_full_hogql_query_limit(self, DEFAULT_RETURNED_ROWS=10): self.assertEqual(len(response.get("results", [])), 10) @patch("posthog.hogql.constants.DEFAULT_RETURNED_ROWS", 10) - def test_full_events_query_limit(self, DEFAULT_RETURNED_ROWS=10): + @patch("posthog.hogql.constants.MAX_SELECT_RETURNED_ROWS", 15) + def test_full_hogql_query_limit_exported(self, MAX_SELECT_RETURNED_ROWS=15, DEFAULT_RETURNED_ROWS=10): + random_uuid = str(UUIDT()) + with freeze_time("2020-01-10 12:00:00"): + for _ in range(20): + _create_event(team=self.team, event="sign up", distinct_id=random_uuid, properties={"key": "test_val1"}) + flush_persons_and_events() + + with freeze_time("2020-01-10 12:14:00"): + response = process_query( + team=self.team, + query_json={ + "kind": "HogQLQuery", + "query": f"select event from events where distinct_id='{random_uuid}'", + }, + in_export_context=True, # This is the only difference + ) + self.assertEqual(len(response.get("results", [])), 15) + + @patch("posthog.hogql.constants.DEFAULT_RETURNED_ROWS", 10) + @patch("posthog.hogql.constants.MAX_SELECT_RETURNED_ROWS", 15) + def test_full_events_query_limit(self, MAX_SELECT_RETURNED_ROWS=15, DEFAULT_RETURNED_ROWS=10): random_uuid = str(UUIDT()) with freeze_time("2020-01-10 12:00:00"): for _ in range(20): @@ -454,6 +476,24 @@ def test_full_events_query_limit(self, DEFAULT_RETURNED_ROWS=10): self.assertEqual(len(response.get("results", [])), 10) + @patch("posthog.hogql.constants.DEFAULT_RETURNED_ROWS", 10) + @patch("posthog.hogql.constants.MAX_SELECT_RETURNED_ROWS", 15) + def test_full_events_query_limit_exported(self, MAX_SELECT_RETURNED_ROWS=15, DEFAULT_RETURNED_ROWS=10): + random_uuid = str(UUIDT()) + with freeze_time("2020-01-10 12:00:00"): + for _ in range(20): + _create_event(team=self.team, event="sign up", distinct_id=random_uuid, properties={"key": "test_val1"}) + flush_persons_and_events() + + with freeze_time("2020-01-10 12:14:00"): + response = process_query( + team=self.team, + query_json={"kind": "EventsQuery", "select": ["event"], "where": [f"distinct_id = '{random_uuid}'"]}, + in_export_context=True, + ) + + self.assertEqual(len(response.get("results", [])), 15) + def test_property_definition_annotation_does_not_break_things(self): PropertyDefinition.objects.create(team=self.team, name="$browser", property_type=PropertyType.String) diff --git a/posthog/hogql/constants.py b/posthog/hogql/constants.py index 452eaa2fa63bfe..cd08da81fca308 100644 --- a/posthog/hogql/constants.py +++ b/posthog/hogql/constants.py @@ -20,7 +20,7 @@ # Limit applied to SELECT statements without LIMIT clause when queried via the API DEFAULT_RETURNED_ROWS = 100 # Max limit for all SELECT queries, and the default for CSV exports. -MAX_SELECT_RETURNED_ROWS = 10000 +MAX_SELECT_RETURNED_ROWS = 10000 # sync with CSV_EXPORT_LIMIT # Settings applied at the SELECT level diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 7938b4673a9cf5..723476b0ab5e4a 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -27,7 +27,7 @@ def execute_hogql_query( workload: Workload = Workload.ONLINE, settings: Optional[HogQLGlobalSettings] = None, modifiers: Optional[HogQLQueryModifiers] = None, - default_limit: Optional[int] = None, + in_export_context: Optional[bool] = False, timings: Optional[HogQLTimings] = None, explain: Optional[bool] = False, ) -> HogQLQueryResponse: @@ -61,15 +61,17 @@ def execute_hogql_query( select_query = replace_placeholders(select_query, placeholders) with timings.measure("max_limit"): - from posthog.hogql.constants import DEFAULT_RETURNED_ROWS + from posthog.hogql.constants import DEFAULT_RETURNED_ROWS, MAX_SELECT_RETURNED_ROWS select_queries = ( select_query.select_queries if isinstance(select_query, ast.SelectUnionQuery) else [select_query] ) for one_query in select_queries: if one_query.limit is None: - # One more "max" of MAX_SELECT_RETURNED_ROWS (100k) in applied in the query printer. - one_query.limit = ast.Constant(value=default_limit or DEFAULT_RETURNED_ROWS) + # One more "max" of MAX_SELECT_RETURNED_ROWS (10k) in applied in the query printer. + one_query.limit = ast.Constant( + value=MAX_SELECT_RETURNED_ROWS if in_export_context else DEFAULT_RETURNED_ROWS + ) # Get printed HogQL query, and returned columns. Using a cloned query. with timings.measure("hogql"): diff --git a/posthog/hogql_queries/events_query_runner.py b/posthog/hogql_queries/events_query_runner.py index d2097f4ebb2d96..ff85691b983d36 100644 --- a/posthog/hogql_queries/events_query_runner.py +++ b/posthog/hogql_queries/events_query_runner.py @@ -43,14 +43,13 @@ def __init__( query: EventsQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None, - default_limit: Optional[int] = None, + in_export_context: Optional[bool] = False, ): - super().__init__(query, team, timings) + super().__init__(query, team, timings, in_export_context) if isinstance(query, EventsQuery): self.query = query else: self.query = EventsQuery.model_validate(query) - self.default_limit = default_limit def to_query(self) -> ast.SelectQuery: # Note: This code is inefficient and problematic, see https://github.com/PostHog/posthog/issues/13485 for details. @@ -193,6 +192,7 @@ def calculate(self) -> EventsQueryResponse: workload=Workload.ONLINE, query_type="EventsQuery", timings=self.timings, + in_export_context=self.in_export_context, ) # Convert star field from tuple to dict in each result @@ -267,7 +267,9 @@ def limit(self) -> int: return ( min( MAX_SELECT_RETURNED_ROWS, - self.default_limit or DEFAULT_RETURNED_ROWS if self.query.limit is None else self.query.limit, + (MAX_SELECT_RETURNED_ROWS if self.in_export_context else DEFAULT_RETURNED_ROWS) + if self.query.limit is None + else self.query.limit, ) + 1 ) diff --git a/posthog/hogql_queries/insights/lifecycle_query_runner.py b/posthog/hogql_queries/insights/lifecycle_query_runner.py index e1cf92e7314727..fcd41df697735c 100644 --- a/posthog/hogql_queries/insights/lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/lifecycle_query_runner.py @@ -22,8 +22,14 @@ class LifecycleQueryRunner(QueryRunner): query: LifecycleQuery query_type = LifecycleQuery - def __init__(self, query: LifecycleQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None): - super().__init__(query, team, timings) + def __init__( + self, + query: LifecycleQuery | Dict[str, Any], + team: Team, + timings: Optional[HogQLTimings] = None, + in_export_context: Optional[bool] = False, + ): + super().__init__(query, team, timings, in_export_context) def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: placeholders = { diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 675f320b2f7ac8..672007501c8b6c 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -30,8 +30,14 @@ class TrendsQueryRunner(QueryRunner): query_type = TrendsQuery series: List[SeriesWithExtras] - def __init__(self, query: TrendsQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None): - super().__init__(query, team, timings) + def __init__( + self, + query: TrendsQuery | Dict[str, Any], + team: Team, + timings: Optional[HogQLTimings] = None, + in_export_context: Optional[int] = None, + ): + super().__init__(query, team, timings, in_export_context) self.series = self.setup_series() def _is_stale(self, cached_result_package): diff --git a/posthog/hogql_queries/persons_query_runner.py b/posthog/hogql_queries/persons_query_runner.py index ce6cf4130bbf01..f1a23f28ea8086 100644 --- a/posthog/hogql_queries/persons_query_runner.py +++ b/posthog/hogql_queries/persons_query_runner.py @@ -19,8 +19,14 @@ class PersonsQueryRunner(QueryRunner): query: PersonsQuery query_type = PersonsQuery - def __init__(self, query: PersonsQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None): - super().__init__(query, team, timings) + def __init__( + self, + query: PersonsQuery | Dict[str, Any], + team: Team, + timings: Optional[HogQLTimings] = None, + in_export_context: Optional[bool] = False, + ): + super().__init__(query=query, team=team, timings=timings, in_export_context=in_export_context) if isinstance(query, PersonsQuery): self.query = query else: diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index d89202883607ae..fe5fc6ee28118d 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -78,7 +78,7 @@ def get_query_runner( query: Dict[str, Any] | RunnableQueryNode, team: Team, timings: Optional[HogQLTimings] = None, - default_limit: Optional[int] = None, + in_export_context: Optional[bool] = False, ) -> "QueryRunner": kind = None if isinstance(query, dict): @@ -89,21 +89,39 @@ def get_query_runner( if kind == "LifecycleQuery": from .insights.lifecycle_query_runner import LifecycleQueryRunner - return LifecycleQueryRunner(query=cast(LifecycleQuery | Dict[str, Any], query), team=team, timings=timings) + return LifecycleQueryRunner( + query=cast(LifecycleQuery | Dict[str, Any], query), + team=team, + timings=timings, + in_export_context=in_export_context, + ) if kind == "TrendsQuery": from .insights.trends.trends_query_runner import TrendsQueryRunner - return TrendsQueryRunner(query=cast(TrendsQuery | Dict[str, Any], query), team=team, timings=timings) + return TrendsQueryRunner( + query=cast(TrendsQuery | Dict[str, Any], query), + team=team, + timings=timings, + in_export_context=in_export_context, + ) if kind == "EventsQuery": from .events_query_runner import EventsQueryRunner return EventsQueryRunner( - query=cast(EventsQuery | Dict[str, Any], query), team=team, timings=timings, default_limit=default_limit + query=cast(EventsQuery | Dict[str, Any], query), + team=team, + timings=timings, + in_export_context=in_export_context, ) if kind == "PersonsQuery": from .persons_query_runner import PersonsQueryRunner - return PersonsQueryRunner(query=cast(PersonsQuery | Dict[str, Any], query), team=team, timings=timings) + return PersonsQueryRunner( + query=cast(PersonsQuery | Dict[str, Any], query), + team=team, + timings=timings, + in_export_context=in_export_context, + ) if kind == "WebOverviewStatsQuery": from .web_analytics.overview_stats import WebOverviewStatsQueryRunner @@ -125,10 +143,18 @@ class QueryRunner(ABC): query_type: Type[RunnableQueryNode] team: Team timings: HogQLTimings - - def __init__(self, query: RunnableQueryNode | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None): + in_export_context: bool + + def __init__( + self, + query: RunnableQueryNode | Dict[str, Any], + team: Team, + timings: Optional[HogQLTimings] = None, + in_export_context: Optional[bool] = False, + ): self.team = team self.timings = timings or HogQLTimings() + self.in_export_context = in_export_context or False if isinstance(query, self.query_type): self.query = query # type: ignore else: @@ -141,7 +167,7 @@ def calculate(self) -> BaseModel: raise NotImplementedError() def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: - cache_key = self._cache_key() + cache_key = self._cache_key() + ("_export" if self.in_export_context else "") tag_queries(cache_key=cache_key) if not refresh_requested: diff --git a/posthog/tasks/exporter.py b/posthog/tasks/exporter.py index 87456edb021659..01c85537602f01 100644 --- a/posthog/tasks/exporter.py +++ b/posthog/tasks/exporter.py @@ -47,8 +47,7 @@ def export_asset(exported_asset_id: int, limit: Optional[int] = None) -> None: is_csv_export = exported_asset.export_format == ExportedAsset.ExportFormat.CSV if is_csv_export: - max_limit = exported_asset.export_context.get("max_limit", 10000) - csv_exporter.export_csv(exported_asset, limit=limit, max_limit=max_limit) + csv_exporter.export_csv(exported_asset, limit=limit) EXPORT_QUEUED_COUNTER.labels(type="csv").inc() else: image_exporter.export_image(exported_asset) diff --git a/posthog/tasks/exports/csv_exporter.py b/posthog/tasks/exports/csv_exporter.py index 64cb23cbac0acd..96432441196689 100644 --- a/posthog/tasks/exports/csv_exporter.py +++ b/posthog/tasks/exports/csv_exporter.py @@ -13,6 +13,7 @@ from posthog.utils import absolute_uri from .ordered_csv_renderer import OrderedCsvRenderer from ..exporter import EXPORT_FAILED_COUNTER, EXPORT_ASSET_UNKNOWN_COUNTER, EXPORT_SUCCEEDED_COUNTER, EXPORT_TIMER +from ...constants import CSV_EXPORT_LIMIT logger = structlog.get_logger(__name__) @@ -164,20 +165,15 @@ class UnexpectedEmptyJsonResponse(Exception): pass -def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000, max_limit: int = 3_500) -> None: +def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000) -> None: resource = exported_asset.export_context columns: List[str] = resource.get("columns", []) - all_csv_rows: List[Any] = [] if resource.get("source"): - from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS - query = resource.get("source") - query_response = process_query( - team=exported_asset.team, query_json=query, default_limit=MAX_SELECT_RETURNED_ROWS - ) + query_response = process_query(team=exported_asset.team, query_json=query, in_export_context=True) all_csv_rows = _convert_response_to_csv_data(query_response) else: @@ -189,7 +185,7 @@ def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000, max_limit: {"id": exported_asset.created_by_id}, datetime.timedelta(minutes=15), PosthogJwtAudience.IMPERSONATED_USER ) - while len(all_csv_rows) < max_limit: + while len(all_csv_rows) < CSV_EXPORT_LIMIT: response = make_api_call(access_token, body, limit, method, next_url, path) if response.status_code != 200: @@ -269,14 +265,14 @@ def make_api_call( raise ex -def export_csv(exported_asset: ExportedAsset, limit: Optional[int] = None, max_limit: int = 3_500) -> None: +def export_csv(exported_asset: ExportedAsset, limit: Optional[int] = None) -> None: if not limit: limit = 1000 try: if exported_asset.export_format == "text/csv": with EXPORT_TIMER.labels(type="csv").time(): - _export_to_csv(exported_asset, limit, max_limit) + _export_to_csv(exported_asset, limit) EXPORT_SUCCEEDED_COUNTER.labels(type="csv").inc() else: EXPORT_ASSET_UNKNOWN_COUNTER.labels(type="csv").inc() diff --git a/posthog/tasks/exports/test/test_csv_exporter.py b/posthog/tasks/exports/test/test_csv_exporter.py index 0df5c1cf2759b7..62ca713517f0e0 100644 --- a/posthog/tasks/exports/test/test_csv_exporter.py +++ b/posthog/tasks/exports/test/test_csv_exporter.py @@ -280,8 +280,9 @@ def test_raises_expected_error_when_json_is_none(self, patched_api_call) -> None csv_exporter.export_csv(self._create_asset()) @patch("posthog.hogql.constants.MAX_SELECT_RETURNED_ROWS", 10) + @patch("posthog.hogql.constants.DEFAULT_RETURNED_ROWS", 5) @patch("posthog.models.exported_asset.UUIDT") - def test_csv_exporter_hogql_query(self, mocked_uuidt, MAX_SELECT_RETURNED_ROWS=10) -> None: + def test_csv_exporter_hogql_query(self, mocked_uuidt, DEFAULT_RETURNED_ROWS=5, MAX_SELECT_RETURNED_ROWS=10) -> None: random_uuid = str(UUIDT()) for i in range(15): _create_event(