Skip to content

Commit

Permalink
feat(data-warehouse): virtual data warehouse table (#20888)
Browse files Browse the repository at this point in the history
* create virtual table definition with the shape of events

* add conditionals

* update schema

* add distinct_id field

* maths working

* add prop math selection

* add field

* Update UI snapshots for `chromium` (2)

* Update UI snapshots for `chromium` (2)

* typing

* clean up snapshot

* cleanup

* remove unnecessary

* more cleanup

* add breakdown

* format

* typing

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
EDsCODE and github-actions[bot] authored Mar 15, 2024
1 parent bb9b0ee commit 9f52e06
Show file tree
Hide file tree
Showing 23 changed files with 468 additions and 539 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,18 @@ function DefinitionView({ group }: { group: TaxonomicFilterGroup }): JSX.Element
onChange={(value) => setLocalDefinition({ id_field: value })}
/>

<label className="definition-popover-edit-form-label" htmlFor="ID Field">
<label className="definition-popover-edit-form-label" htmlFor="Distinct Id Field">
<span className="label-text">Distinct ID field</span>
</label>
<LemonSelect
value={
'distinct_id_field' in localDefinition ? localDefinition.distinct_id_field : undefined
}
options={columnOptions}
onChange={(value) => setLocalDefinition({ distinct_id_field: value })}
/>

<label className="definition-popover-edit-form-label" htmlFor="Timestamp Field">
<span className="label-text">Timestamp field</span>
</label>
<LemonSelect
Expand All @@ -313,7 +324,9 @@ function DefinitionView({ group }: { group: TaxonomicFilterGroup }): JSX.Element
'id_field' in localDefinition &&
localDefinition.id_field &&
'timestamp_field' in localDefinition &&
localDefinition.timestamp_field
localDefinition.timestamp_field &&
'distinct_id_field' in localDefinition &&
localDefinition.distinct_id_field
? null
: 'Field mappings must be specified'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { LemonButton, LemonButtonProps } from 'lib/lemon-ui/LemonButton'
import { LemonDropdown } from 'lib/lemon-ui/LemonDropdown'
import { useEffect, useState } from 'react'

import { AnyDataNode } from '~/queries/schema'
import { AnyDataNode, DatabaseSchemaQueryResponseField } from '~/queries/schema'

export interface TaxonomicPopoverProps<ValueType extends TaxonomicFilterValue = TaxonomicFilterValue>
extends Omit<LemonButtonProps, 'children' | 'onClick' | 'sideIcon' | 'sideAction'> {
Expand All @@ -19,6 +19,7 @@ export interface TaxonomicPopoverProps<ValueType extends TaxonomicFilterValue =
placeholder?: React.ReactNode
placeholderClass?: string
dropdownMatchSelectWidth?: boolean
schemaColumns?: DatabaseSchemaQueryResponseField[]
allowClear?: boolean
style?: React.CSSProperties
excludedProperties?: { [key in TaxonomicFilterGroupType]?: TaxonomicFilterValue[] }
Expand Down Expand Up @@ -49,6 +50,7 @@ export function TaxonomicPopover<ValueType extends TaxonomicFilterValue = Taxono
allowClear = false,
excludedProperties,
metadataSource,
schemaColumns,
...buttonPropsRest
}: TaxonomicPopoverProps<ValueType>): JSX.Element {
const [localValue, setLocalValue] = useState<ValueType>(value || ('' as ValueType))
Expand Down Expand Up @@ -85,6 +87,7 @@ export function TaxonomicPopover<ValueType extends TaxonomicFilterValue = Taxono
}}
taxonomicGroupTypes={groupTypes ?? [groupType]}
eventNames={eventNames}
schemaColumns={schemaColumns}
metadataSource={metadataSource}
excludedProperties={excludedProperties}
/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export const legacyEntityToNode = (
custom_name: entity.custom_name || undefined,
id_field: 'id_field' in entity ? entity.id_field : undefined,
timestamp_field: 'timestamp_field' in entity ? entity.timestamp_field : undefined,
distinct_id_field: 'distinct_id_field' in entity ? entity.distinct_id_field : undefined,
table_name: 'table_name' in entity ? entity.table_name : undefined,
}

Expand All @@ -103,6 +104,7 @@ export const legacyEntityToNode = (
...shared,
id_field: entity.id_field || undefined,
timestamp_field: entity.timestamp_field || undefined,
distinct_id_field: entity.distinct_id_field || undefined,
table_name: entity.table_name || undefined,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,12 @@ export const seriesNodeToFilter = (
math_group_type_index: node.math_group_type_index,
properties: node.properties as any, // TODO,
...(isDataWarehouseNode(node)
? { table_name: node.table_name, id_field: node.id_field, timestamp_field: node.timestamp_field }
? {
table_name: node.table_name,
id_field: node.id_field,
timestamp_field: node.timestamp_field,
distinct_id_field: node.distinct_id_field,
}
: {}),
})
return entity
Expand Down
30 changes: 29 additions & 1 deletion frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -764,12 +764,34 @@
"required": ["kind", "source"],
"type": "object"
},
"DataWarehouseEventsModifier": {
"additionalProperties": false,
"properties": {
"distinct_id_field": {
"type": "string"
},
"id_field": {
"type": "string"
},
"table_name": {
"type": "string"
},
"timestamp_field": {
"type": "string"
}
},
"required": ["table_name", "timestamp_field", "distinct_id_field", "id_field"],
"type": "object"
},
"DataWarehouseNode": {
"additionalProperties": false,
"properties": {
"custom_name": {
"type": "string"
},
"distinct_id_field": {
"type": "string"
},
"fixedProperties": {
"description": "Fixed properties in the query, can't be edited in the interface (e.g. scoping down by person)",
"items": {
Expand Down Expand Up @@ -837,7 +859,7 @@
"type": "string"
}
},
"required": ["id", "id_field", "kind", "table_name", "timestamp_field"],
"required": ["distinct_id_field", "id", "id_field", "kind", "table_name", "timestamp_field"],
"type": "object"
},
"DataWarehousePropertyFilter": {
Expand Down Expand Up @@ -2526,6 +2548,12 @@
"additionalProperties": false,
"description": "HogQL Query Options are automatically set per team. However, they can be overriden in the query.",
"properties": {
"dataWarehouseEventsModifiers": {
"items": {
"$ref": "#/definitions/DataWarehouseEventsModifier"
},
"type": "array"
},
"inCohortVia": {
"enum": ["auto", "leftjoin", "subquery", "leftjoin_conjoined"],
"type": "string"
Expand Down
9 changes: 9 additions & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ export interface HogQLQueryModifiers {
personsArgMaxVersion?: 'auto' | 'v1' | 'v2'
inCohortVia?: 'auto' | 'leftjoin' | 'subquery' | 'leftjoin_conjoined'
materializationMode?: 'auto' | 'legacy_null_as_string' | 'legacy_null_as_null' | 'disabled'
dataWarehouseEventsModifiers?: DataWarehouseEventsModifier[]
}

export interface DataWarehouseEventsModifier {
table_name: string
timestamp_field: string
distinct_id_field: string
id_field: string
}

export interface HogQLQueryResponse {
Expand Down Expand Up @@ -376,6 +384,7 @@ export interface DataWarehouseNode extends EntityNode {
id_field: string
table_name: string
timestamp_field: string
distinct_id_field: string
}

export interface ActionsNode extends EntityNode {
Expand Down
1 change: 1 addition & 0 deletions frontend/src/scenes/data-warehouse/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export interface DataWarehouseTableBaseType {
// used for selecting in trends series
id_field?: string
timestamp_field?: string
distinct_id_field?: string
}

export interface DataWarehousePostHogTableType extends DataWarehouseTableBaseType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ export function ActionFilterRow({
const { setNodeRef, attributes, transform, transition, listeners, isDragging } = useSortable({ id: filter.uuid })

const propertyFiltersVisible = typeof filter.order === 'number' ? entityFilterVisible[filter.order] : false
const mathDisabledReason =
filter.type === EntityTypes.DATA_WAREHOUSE ? 'Data Warehouse Series only supports total counts' : ''

let name: string | null | undefined, value: PropertyFilterValue
const {
Expand Down Expand Up @@ -239,6 +237,7 @@ export function ActionFilterRow({
name: item?.name ?? '',
id_field: item?.id_field,
timestamp_field: item?.timestamp_field,
distinct_id_field: item?.distinct_id_field,
table_name: item?.name,
index,
})
Expand Down Expand Up @@ -375,7 +374,6 @@ export function ActionFilterRow({
index={index}
onMathSelect={onMathSelect}
disabled={readOnly}
disabledReason={mathDisabledReason}
style={{ maxWidth: '100%', width: 'initial' }}
mathAvailability={mathAvailability}
/>
Expand All @@ -385,9 +383,15 @@ export function ActionFilterRow({
<TaxonomicStringPopover
groupType={TaxonomicFilterGroupType.NumericalEventProperties}
groupTypes={[
TaxonomicFilterGroupType.DataWarehouseProperties,
TaxonomicFilterGroupType.NumericalEventProperties,
TaxonomicFilterGroupType.Sessions,
]}
schemaColumns={
filter.type == TaxonomicFilterGroupType.DataWarehouse && filter.name
? externalTablesMap[filter.name]?.columns
: []
}
value={mathProperty}
onChange={(currentValue) => onMathPropertySelect(index, currentValue)}
eventNames={name ? [name] : []}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export type LocalFilter = ActionFilter & {
uuid: string
id_field?: string
timestamp_field?: string
distinct_id_field?: string
table_name?: string
}

Expand Down Expand Up @@ -104,6 +105,7 @@ export const entityFilterLogic = kea<entityFilterLogicType>([
index: number
id_field?: string
timestamp_field?: string
distinct_id_field?: string
table_name?: string
}
) => ({
Expand Down Expand Up @@ -196,7 +198,17 @@ export const entityFilterLogic = kea<entityFilterLogicType>([
hideModal: () => {
actions.selectFilter(null)
},
updateFilter: async ({ type, index, name, id, custom_name, id_field, timestamp_field, table_name }) => {
updateFilter: async ({
type,
index,
name,
id,
custom_name,
id_field,
timestamp_field,
distinct_id_field,
table_name,
}) => {
actions.setFilters(
values.localFilters.map((filter, i) => {
if (i === index) {
Expand All @@ -210,11 +222,16 @@ export const entityFilterLogic = kea<entityFilterLogicType>([
id_field: typeof id_field === 'undefined' ? filter.id_field : id_field,
timestamp_field:
typeof timestamp_field === 'undefined' ? filter.timestamp_field : timestamp_field,
distinct_id_field:
typeof distinct_id_field === 'undefined'
? filter.distinct_id_field
: distinct_id_field,
table_name: typeof table_name === 'undefined' ? filter.table_name : table_name,
}
} else {
delete filter.id_field
delete filter.timestamp_field
delete filter.distinct_id_field
delete filter.table_name
return {
...filter,
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,7 @@ export interface ActionFilter extends EntityFilter {
export interface DataWarehouseFilter extends ActionFilter {
id_field: string
timestamp_field: string
distinct_id_field: string
table_name: string
}

Expand Down
28 changes: 28 additions & 0 deletions posthog/hogql/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,34 @@ def create_hogql_database(
for table in DataWarehouseTable.objects.filter(team_id=team.pk).exclude(deleted=True):
tables[table.name] = table.hogql_definition()

if modifiers.dataWarehouseEventsModifiers:
for warehouse_modifier in modifiers.dataWarehouseEventsModifiers:
# TODO: add all field mappings
if "id" not in tables[warehouse_modifier.table_name].fields.keys():
tables[warehouse_modifier.table_name].fields["id"] = ExpressionField(
name="id",
expr=parse_expr(warehouse_modifier.id_field),
)

if "timestamp" not in tables[warehouse_modifier.table_name].fields.keys():
tables[warehouse_modifier.table_name].fields["timestamp"] = ExpressionField(
name="timestamp",
expr=ast.Call(name="toDateTime", args=[ast.Field(chain=[warehouse_modifier.timestamp_field])]),
)

# TODO: Need to decide how the distinct_id and person_id fields are going to be handled
if "distinct_id" not in tables[warehouse_modifier.table_name].fields.keys():
tables[warehouse_modifier.table_name].fields["distinct_id"] = ExpressionField(
name="distinct_id",
expr=parse_expr(warehouse_modifier.distinct_id_field),
)

if "person_id" not in tables[warehouse_modifier.table_name].fields.keys():
tables[warehouse_modifier.table_name].fields["person_id"] = ExpressionField(
name="person_id",
expr=parse_expr(warehouse_modifier.distinct_id_field),
)

for saved_query in DataWarehouseSavedQuery.objects.filter(team_id=team.pk).exclude(deleted=True):
tables[saved_query.name] = saved_query.hogql_definition()

Expand Down
14 changes: 14 additions & 0 deletions posthog/hogql_queries/insights/data_warehouse_mixin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from posthog.hogql import ast
from posthog.models.filters.mixins.utils import cached_property
from posthog.schema import ActionsNode, EventsNode, DataWarehouseNode


class DataWarehouseInsightQueryMixin:
series: EventsNode | ActionsNode | DataWarehouseNode

@cached_property
def _table_expr(self) -> ast.Field:
if isinstance(self.series, DataWarehouseNode):
return ast.Field(chain=[self.series.table_name])

return ast.Field(chain=["events"])
14 changes: 13 additions & 1 deletion posthog/hogql_queries/insights/trends/aggregation_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from posthog.models.team.team import Team
from posthog.schema import EventsNode, ActionsNode, DataWarehouseNode
from posthog.models.filters.mixins.utils import cached_property
from posthog.hogql_queries.insights.data_warehouse_mixin import DataWarehouseInsightQueryMixin


class QueryAlternator:
Expand Down Expand Up @@ -48,7 +49,7 @@ def replace_select_from(self, join_expr: ast.JoinExpr) -> None:
self._select_from = join_expr


class AggregationOperations:
class AggregationOperations(DataWarehouseInsightQueryMixin):
team: Team
series: Union[EventsNode, ActionsNode, DataWarehouseNode]
query_date_range: QueryDateRange
Expand Down Expand Up @@ -155,6 +156,8 @@ def _math_func(self, method: str, override_chain: Optional[List[str | int]]) ->

if self.series.math_property == "$session_duration":
chain = ["session_duration"]
elif isinstance(self.series, DataWarehouseNode) and self.series.math_property:
chain = [self.series.math_property]
else:
chain = ["properties", self.series.math_property]

Expand Down Expand Up @@ -344,6 +347,14 @@ def _events_query(

query = parse_select(
"""
SELECT
count({id_field}) AS total
FROM {table} AS e
WHERE {events_where_clause}
GROUP BY {person_field}
"""
if isinstance(self.series, DataWarehouseNode)
else """
SELECT
count({id_field}) AS total
FROM events AS e
Expand All @@ -353,6 +364,7 @@ def _events_query(
""",
placeholders={
"id_field": self._id_field,
"table": self._table_expr,
"events_where_clause": where_clause_combined,
"sample": sample_value,
"person_field": ast.Field(
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def _all_breakdown_values(self) -> List[str | int | float | None]:
chart_display_type=self._trends_display().display_type,
breakdown_filter=self.query.breakdownFilter,
query_date_range=self.query_date_range,
modifiers=self.modifiers,
)
return cast(List[str | int | float | None], breakdown.get_breakdown_values())

Expand Down
Loading

0 comments on commit 9f52e06

Please sign in to comment.