diff --git a/apps/framework-docs/public/Avatars/Logos-avatar=5-Color-SM.png b/apps/framework-docs/public/Avatars/Logos-avatar=5-Color-SM.png new file mode 100644 index 000000000..108d68766 Binary files /dev/null and b/apps/framework-docs/public/Avatars/Logos-avatar=5-Color-SM.png differ diff --git a/apps/framework-docs/src/components/language-wrappers.tsx b/apps/framework-docs/src/components/language-wrappers.tsx index 4175c3541..40ce2b3bf 100644 --- a/apps/framework-docs/src/components/language-wrappers.tsx +++ b/apps/framework-docs/src/components/language-wrappers.tsx @@ -30,7 +30,6 @@ export const Python: React.FC = ({ {children} ); - //return language === "python" ? <>{children} : null; }; export const LanguageSwitch: React.FC<{ diff --git a/apps/framework-docs/src/pages/consumption-apis.mdx b/apps/framework-docs/src/pages/consumption-apis.mdx index f778ca9a3..19e187dcf 100644 --- a/apps/framework-docs/src/pages/consumption-apis.mdx +++ b/apps/framework-docs/src/pages/consumption-apis.mdx @@ -1,18 +1,41 @@ -import { Callout } from "../components"; -import { Tabs } from "nextra/components"; +import { Callout, TypeScript, Python, LanguageSwitcher } from "../components"; +import { Tabs, FileTree } from "nextra/components"; # Introduction to Consumption APIs + -Moose **Consumption APIs** offer a powerful and flexible way to create custom API endpoints, allowing your applications to access and retrieve data from your OLAP database. These APIs act as the final layer in your Moose application, dynamically generating and executing SQL queries based on parameters received from incoming requests from your data consumers. +Consumption APIs offer a powerful and flexible way to create custom API endpoints, allowing your applications to access and retrieve data from your OLAP database. These APIs act as the final layer in your Moose application, dynamically generating and executing SQL queries based on parameters received from incoming requests from your data consumers. ## Core Concepts ### File and Folder Conventions -Consumption APIs are defined as individual files within the `/apis` folder of your Moose application. These files, written in TypeScript (`.ts`) or Python (`.py`), are automatically mapped to API endpoints based on their filenames. +Consumption APIs are defined as individual `.ts``.py` files within the `/apis` folder of your Moose application. These files are automatically mapped to API endpoints based on their filenames. + + + + + + + + + + + + + + + + + + + + + + - A file named `myMooseApi.ts` would correspond to the `/consumption/myMooseApi` + A file named `myMooseApi.ts``myMooseApi.py` would correspond to the `/consumption/myMooseApi` endpoint. @@ -25,13 +48,9 @@ Each API file contains a route handler function that processes requests to its c 3. Executing the queries against your database 4. Processing and formatting the results before sending the response -#### Language-Specific Conventions - -The definition of route handler functions follows language-specific conventions: - - - For Typescript, you must export the function as the default export of the file: + + This route handler function must be the default export of the file: ```ts filename="/apis/dailyActiveUsers.ts" copy {8} import { ConsumptionUtil } from "@514labs/moose-lib"; @@ -54,11 +73,11 @@ sql`SELECT ); } -```` - +``` + - -For Python, you must define a single function named `run()` in the file. + +Consumption APIs are defined as a function named `run()` in the file: ```python filename="/apis/dailyActiveUsers.py" copy {1} def run(client, params): minDailyActiveUsers = int(params.get('minDailyActiveUsers', [0])[0]) @@ -79,10 +98,8 @@ def run(client, params): } ) -```` - - - +``` + #### Query Parameters and Dynamic Data Retrieval diff --git a/apps/framework-docs/src/pages/consumption-apis/creating-endpoints.mdx b/apps/framework-docs/src/pages/consumption-apis/creating-endpoints.mdx index 5f6091400..617f64daa 100644 --- a/apps/framework-docs/src/pages/consumption-apis/creating-endpoints.mdx +++ b/apps/framework-docs/src/pages/consumption-apis/creating-endpoints.mdx @@ -1,12 +1,31 @@ -import { Callout } from "../../components"; +import { Callout, LanguageSwitcher, TypeScript, Python } from "../../components"; import { FileTree } from "nextra/components"; # Initialize Your API Endpoint + + ## via the CLI -To create a new Consumption API endpoint, leverage the `moose-cli consumption init` command. This will generate a `.ts` or `.py` file in the `/apis` directory. +To create a new Consumption API endpoint, leverage the `moose-cli consumption init` command. + + +```bash filename="Terminal" copy +npx moose-cli consumption init +``` + + + +```bash filename="Terminal" copy +moose-cli consumption init +``` + + +- ``: The name of the route you want to create. + +This command will generate a `.ts``.py` file in the `/apis` directory. + @@ -14,13 +33,23 @@ To create a new Consumption API endpoint, leverage the `moose-cli consumption in + + + + + + + + + + For detailed information about the moose block init command, you can run the following in your terminal: -```bash copy +```bash filename="Terminal" copy npx moose-cli consumption init --help ``` ## Manually -You can alternatively manually create a new `.ts` or `.py` file inside the `/apis` folder of your project. +You can alternatively manually create a new `.ts``.py` file inside the `/apis` folder of your project. diff --git a/apps/framework-docs/src/pages/consumption-apis/implement-route-handler.mdx b/apps/framework-docs/src/pages/consumption-apis/implement-route-handler.mdx index 715d4cc45..9324e4d04 100644 --- a/apps/framework-docs/src/pages/consumption-apis/implement-route-handler.mdx +++ b/apps/framework-docs/src/pages/consumption-apis/implement-route-handler.mdx @@ -115,4 +115,4 @@ Once your route handler is defined, you can test it by making a request to your https://localhost:4000/consumption/dailyActiveUsers?limit=10&minDailyActiveUsers=5 ``` -This example passes limit and minDailyActiveUsers as query parameters. Be sure that the endpoint URL matches the filename of your API handler, as Moose maps endpoints based on the filenames in the /apis folder. +This example passes `limit` and `minDailyActiveUsers` as query parameters. Be sure that the endpoint URL matches the filename of your API handler, as Moose maps endpoints based on the filenames in the `/apis` folder. diff --git a/apps/framework-docs/src/pages/data-models.mdx b/apps/framework-docs/src/pages/data-models.mdx index 584d5bf6f..1e537c4af 100644 --- a/apps/framework-docs/src/pages/data-models.mdx +++ b/apps/framework-docs/src/pages/data-models.mdx @@ -1,5 +1,5 @@ import { FileTree, Tabs } from "nextra/components"; -import { Callout, LanguageSwitcher, TypeScript, Python } from "../components"; +import { LanguageSwitcher, TypeScript, Python, Callout } from "../components"; # Introduction to Data Models @@ -58,33 +58,221 @@ export interface UserActivity { activity: string; } ``` - -Data Models are represented as `dataclasses` (using the `@dataclass` decorator), and must be defined in a `.py` file: +Data Models are defined as Python dataclasses using the `@dataclass` decorator and must be located within a `.py` file. To ensure Moose automatically detects and registers the dataclass as a Data Model, apply the `@moose_data_model` decorator to your dataclass: + + + Import the `@moose_data_model` decorator from the `moose_lib` package. + ```py filename="datamodels/models.py" copy +from moose_lib import Key, moose_data_model from dataclasses import dataclass from typing import List +@moose_data_model @dataclass class UserActivity: - event_id: str + event_id: Key[str] timestamp: str user_id: str activity: str -``` +``` - - You can define multiple Data Models within a single file. - - The name of the file is not important, but it must have a `.ts` `.py` extension. - - Any exported interface dataclass that adheres to the appropriate file and folder conventions will be automatically detected by Moose and used to generate the necessary infrastructure. - + - You can define multiple Data Models within a single file. + - The file name is flexible, but it must have a `.ts` or `.py` extension. + - Moose automatically detects and generates the necessary infrastructure for any exported interfacesdataclasses decorated with the `@moose_data_model` decorator that adhere to the prescribed file and folder structure conventions. ### How Moose Interprets Data Models -Moose will automatically detect and use any exported interfaces or dataclasses that adhere to the file and folder conventions. The property names and data types in the Data Model are interpreted and translated into the infrastructure components (SDK, Ingestion API, Streaming Topic, Database Landing Table, Database View). +Moose will automatically detect and use any exported interfacesdataclasses decorated with the `@moose_data_model` decorator that adhere to the file and folder conventions. The property names and data types in the Data Model are interpreted and translated into the infrastructure components (SDK, Ingestion API, Streaming Topic, Database Landing Table, Database View). + + + +## Data Model Examples + + +### Basic Data Model + +```json filename="sample_data.json" copy +{ + "example_UUID": "123e4567-e89b-12d3-a456-426614174000", + "example_string": "string", + "example_number": 123, + "example_boolean": true, + "example_array": [1, 2, 3] +} +``` + +```ts filename="datamodels/models.ts" copy +import { Key } from "@514labs/moose-lib"; + +export interface BasicDataModel { + example_UUID: Key; + example_string: string; + example_number: number; + example_boolean: boolean; + example_array: number[]; +} +``` + + + +```py filename="datamodels/models.py" copy +from dataclasses import dataclass +from moose_lib import Key, moose_data_model + +@moose_data_model +@dataclass +class BasicDataModel: + example_UUID: Key[str] + example_string: str + example_number: int + example_boolean: bool + example_array: list[int] +``` + + +### Optional Fields +```json filename="sample.json" copy +[ + { + "example_UUID": "123e4567-e89b-12d3-a456-426614174000", + "example_string": "string", + "example_number": 123, + "example_boolean": true, + "example_array": [1, 2, 3], + "example_optional_string": "optional" + }, + { + "example_UUID": "123e4567-e89b-12d3-a456-426614174000", + "example_string": "string", + "example_number": 123, + "example_boolean": true, + "example_array": [1, 2, 3] + } +] +``` + +```ts filename="datamodels/models.ts" copy {9} +import { Key } from "@514labs/moose-lib"; + +export interface DataModelWithOptionalField { + example_UUID: Key; + example_string: string; + example_number: number; + example_boolean: boolean; + example_array: number[]; + example_optional_string?: string; // Use the `?` operator to mark a field as optional +} +``` + + + +```py filename="datamodels/models.py" copy {12} +from dataclasses import dataclass +from typing import Optional +from moose_lib import Key, moose_data_model + +@moose_data_model +@dataclass +class DataModelWithOptionalField: + example_UUID: Key[str] + example_string: str + example_number: int + example_boolean: bool + example_array: list[int] + example_optional_string: Optional[str] # Use the Optional type to mark a field as optional +``` + + +### Nested Fields +```json filename="sample.json" copy +{ + "example_UUID": "123e4567-e89b-12d3-a456-426614174000", + "example_string": "string", + "example_number": 123, + "example_boolean": true, + "example_array": [1, 2, 3], + "example_nested_object": { + "example_nested_number": 456, + "example_nested_boolean": true, + "example_nested_array": [4, 5, 6] + } +} +``` + + + +```ts filename="datamodels/models.ts" copy {3-8, 16} +import { Key } from "@514labs/moose-lib"; + +// Define the nested object interface separately +interface NestedObject { + example_nested_number: number; + example_nested_boolean: boolean; + example_nested_array: number[]; +} + +export interface DataModelWithNestedObject { + example_UUID: Key; + example_string: string; + example_number: number; + example_boolean: boolean; + example_array: number[]; + example_nested_object: NestedObject; // Reference nested object interface +} +``` + + +```ts filename="datamodels/models.ts" copy {9-13} +import { Key } from "@514labs/moose-lib"; + +export interface DataModelWithInlineNestedObject { + example_UUID: Key; + example_string: string; + example_number: number; + example_boolean: boolean; + example_array: number[]; + example_nested_object: { // Define the nested object inline + example_nested_number: number; + example_nested_boolean: boolean; + example_nested_array: number[]; + }; +} +``` + + + + + +```py filename="datamodels/models.py" copy {4-8, 17} +from moose_lib import Key, moose_data_model +from dataclasses import dataclass + +@dataclass +class ExampleNestedObject: + example_nested_number: int + example_nested_boolean: bool + example_nested_array: list[int] + +@moose_data_model ## Only register the outer dataclass +@dataclass +class DataModelWithNestedObject: + example_UUID: Key[str] + example_string: str + example_number: int + example_boolean: bool + example_array: list[int] + example_nested_object: ExampleNestedObject +``` +The `example_nested_object` field in the Data Model is a nested object that is defined using the `ExampleNestedObject` dataclass. The `@moose_data_model` decorator is used to register the `DataModelWithNestedObject` dataclass with Moose. + + +Moose offers a CLI helper to help automatically generate a Data Model and infer its schema based on a sample JSON file containing the data you want to ingest. The next section will provide more details on how to use this helper. diff --git a/apps/framework-docs/src/pages/data-models/_meta.json b/apps/framework-docs/src/pages/data-models/_meta.json index 8385db02e..20b5ad16d 100644 --- a/apps/framework-docs/src/pages/data-models/_meta.json +++ b/apps/framework-docs/src/pages/data-models/_meta.json @@ -1,6 +1,9 @@ { "init-model": "Creating Data Models", - "model-data": "Define Data Model Schema", + "model-data": "Supported Data Types", "configure-model": "Configuring Infrastructure", - "dcm": "Changing Data Models" + "dcm": "Changing Data Models", + "examples": { + "display": "hidden" + } } diff --git a/apps/framework-docs/src/pages/data-models/configure-model.mdx b/apps/framework-docs/src/pages/data-models/configure-model.mdx index bc1d272d9..fd0f19a44 100644 --- a/apps/framework-docs/src/pages/data-models/configure-model.mdx +++ b/apps/framework-docs/src/pages/data-models/configure-model.mdx @@ -1,44 +1,80 @@ import { Tabs } from "nextra/components"; +import { Callout, LanguageSwitcher, TypeScript, Python } from "../../components"; # Configuring Data Models + + While Moose provides sensible defaults for provisioning OLAP storage and infrastructure based on your Data Models, it also supports flexible customization for configurations like ingestion formats and storage options. -### `DataModelConfig` -To set the configuration for a Data Model, export a configuration object from the model file with the data model name as a prefix and `Config` as the suffix, like so: + +### `DataModelConfig` +Instantiate and export a `DataModelConfig` with your desired configuration options, using the type parameter `T` to specify the Data Model you wish to configure: ```ts filename="datamodels/models.ts" copy -import { Key, DataModelConfig, IngestionFormat } from "@514labs/moose-lib"; +import { DataModelConfig, IngestionFormat } from "@514labs/moose-lib"; -export const UserActivityConfig: DataModelConfig = { +export const ExampleDataModelConfig: DataModelConfig = { ingestion: { format: IngestionFormat.JSON, }, storage: { enabled: true, - order_by_fields: ["timestamp"], + order_by_fields: ["example_timestamp"], }, }; -export interface UserActivity { - eventId: Key; - timestamp: string; - userId: string; - activity: string; +export interface ExampleDataModel { + example_timestamp: Date; + example_string: string; + example_number: number; + example_boolean: boolean; + example_string_array: string[]; } ``` + + + +### `DataModelConfig` +Use the `DataModelConfig` class to specify configuration options for your Python Data Model. + +```py filename="datamodels/models.py" copy +from moose_lib import DataModelConfig, IngestionConfig, IngestionFormat, StorageConfig, moose_data_model +from dataclasses import dataclass +from datetime import datetime + +@dataclass +class ExampleDataModel: + example_timestamp: datetime.datetime + example_string: str + example_int: int + example_float: float + example_bool: bool + example_string_array: list[str] + +example_config = DataModelConfig( + ingestion=IngestionConfig(format=IngestionFormat.JSON), + storage=StorageConfig(enabled=False, order_by_fields=["example_timestamp"]) +) + +moose_data_model(ExampleDataModel, user_activity_config) +``` + -## Config Options + +If you configure the `storage` option to specify a field name for the `order_by_fields` option, then you are not required to specify a `Key` field in the Data Model. + -The `DataModelConfig` object supports the following fields: +## Config Options -- `ingestion` -- `storage` +`DataModelConfig` supports the following parameters: +- `ingestion` (`IngestionConfig`): Configures the ingestion endpoint for the data model. +- `storage`(`StorageConfig`): Configures the storage layer for the data model. ### `ingestion` -The ingestion configuration object is used to define how the API endpoint will receive data. The configuration supports the following options: +Set the `ingestion` parameter to define how the API endpoint will receive data. The configuration supports the following options: - **`format`**: Specifies the format of the data that the ingestion endpoint will accept. - **Type**: `IngestionFormat` @@ -49,66 +85,80 @@ The ingestion configuration object is used to define how the API endpoint will r #### Examples - - - ```ts filename="datamodels/models.ts" copy - export const UserActivityConfig: DataModelConfig = { - ingestion: { - format: IngestionFormat.JSON, - }, - }; - - export interface UserActivity { - ... - } - ``` - - - ```ts filename="datamodels/models.ts" copy - export const UserActivityConfig: DataModelConfig = { - ingestion: { - format: IngestionFormat.JSON_ARRAY, - }, - }; - - ``` - - + + +```ts filename="datamodels/models.ts" copy {3} +export const UserActivityConfig: DataModelConfig = { + ingestion: { + format: IngestionFormat.JSON, + }, +}; + +export interface UserActivity { + ... +} +``` + + +```ts filename="datamodels/models.ts" copy {3} +export const UserActivityConfig: DataModelConfig = { + ingestion: { + format: IngestionFormat.JSON_ARRAY, + }, +}; + +export interface UserActivity { + ... +} +``` + + ### `storage` The `storage` field configures the persistence layer for the data model. It supports the following options: - **`enabled`**: A boolean value that determines whether to enable a table in the OLAP storage for the data model. - - **Type**: `boolean` - - **Default**: `true` + + - **Type**:`boolean` + - **Default**: `true` - **Description**: When set to `true`, a table will be created in the OLAP storage for the data model, allowing data to be stored and queried. Setting it to `false` will disable this table creation. -- **`order_by_fields`**: An array of strings that adds an `ORDER BY` clause to the table in the OLAP storage. This is useful for optimizing queries that sort the data model by specific fields. - - **Type**: `string[]` - - **Default**: `[]` - - **Description**: This is an array of field names by which the table data should be ordered. Ordering the data on disk can improve query performance for specific use cases, such as time-series analysis or sorting by frequently queried fields. + + + - **Type**: `bool` + - **Default**: `True` + - **Description**: When set to `True`, a table will be created in the OLAP storage for the data model, allowing data to be stored and queried. Setting it to `False` will disable this table creation. + + +- **`order_by_fields`**: An array of strings that adds an `ORDER BY` clause to the table in the OLAP storage. This is useful for optimizing queries that sort the Data Model by specific fields. + + - **Type**: `string[]` + + + - **Type**: `list[str]` + + - **Default**: `[]` + - **Description**: This is an array of field names by which the table data should be ordered. The strings must be a subset of the fields in the Data Model. #### Examples - - ```ts filename="/datamodels/models.ts" copy - export const UserActivityConfig: DataModelConfig = { - storage: { - enabled: false, - }, - }; - - ``` - - - ```ts filename="datamodels/models.ts" copy - export const UserActivityConfig: DataModelConfig = { - storage: { - order_by_fields: ["timestamp"], - }, - }; - - ``` - + +```ts filename="/datamodels/models.ts" copy +export const UserActivityConfig: DataModelConfig = { + storage: { + enabled: false, + }, +}; +``` + + +```ts filename="datamodels/models.ts" copy + export const UserActivityConfig: DataModelConfig = { + storage: { + order_by_fields: ["timestamp"], + }, + }; +``` + diff --git a/apps/framework-docs/src/pages/data-models/init-model.mdx b/apps/framework-docs/src/pages/data-models/init-model.mdx index 002bf9102..5197b5e18 100644 --- a/apps/framework-docs/src/pages/data-models/init-model.mdx +++ b/apps/framework-docs/src/pages/data-models/init-model.mdx @@ -11,31 +11,31 @@ import { ## Using the CLI -To create a new Data Model in your Moose application, use the `moose data-model init` command. This will generate a `.ts` `.py` file in the `/datamodels` directory and define your Data Model within it. +To create a new Data Model in your Moose application, use the `data-model init` command. This will generate a file in the `/datamodels` directory of your Moose application and define your Data Model within it. -For detailed information about the `moose data-model init` command, you can run the following in your terminal: ```bash filename="Terminal" copy -npx moose-cli data-model init --help +npx moose-cli data-model init --sample ``` ```bash filename="Terminal" copy -moose-cli data-model init --help +moose-cli data-model init --sample ``` - - If you have sample data in a `.json` or `.csv` file, you can use the - `--sample` flag to specify the path to that file. The CLI will then infer the - Data Model schema from the provided file and create the Data Model for you. +- ``: The path to the sample data file. +- ``: The name of the Data Model. + + +If you have a `.json` or `.csv` file with sample data representing the structure of the data you want to ingest, you can use the `--sample` flag to specify the path to that file. The CLI will then infer the Data Model schema from the provided file and create the Data Model for you. ## Creating a new Data Model Manually There are two additional ways to initialize a new Data Model in your Moose project: -- By creating a new `.ts` `.py` file in the `/datamodels` directory of your Moose application and defining a new interface dataclass inside this file. -- By appending a new interface or dataclass to an existing `.ts` `.py` file in your `/datamodels` directory +- By creating a new `.ts` `.py` file in the `/datamodels` directory of your Moose application and defining a new Data Model inside this file. +- By appending a new Data Model to an existing `.ts` `.py` file in your `/datamodels` directory. diff --git a/apps/framework-docs/src/pages/data-models/model-data.mdx b/apps/framework-docs/src/pages/data-models/model-data.mdx index 1bf043d75..1c3a51d96 100644 --- a/apps/framework-docs/src/pages/data-models/model-data.mdx +++ b/apps/framework-docs/src/pages/data-models/model-data.mdx @@ -6,7 +6,7 @@ import { } from "../../components"; import { Tabs } from "nextra/components"; -# Defining Data Model Schemas +# Supported Data Types @@ -20,16 +20,9 @@ The schema defines the names and data types of the properties in your Data Model ### `Key[T: (str, int)] = T` -The `Key``Key[T: (str, int)] = T` type is specific to Moose and is used to define a primary key for your Data Model. If your Data Model requires a composite key, you can apply the `Key` type to multiple columns. +The `Key` type is specific to Moose and is used to define a [primary key](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#primary-keys-and-indexes-in-queries) in the OLAP storage table for your Data Model. If your Data Model requires a composite key, you can apply the `Key` type to multiple columns. -When provisioning the OLAP storage table, the `Key` type will be used to define the primary key. The meaning of a primary key depends on the database you are using. For instance, in Clickhouse, it will define the [primary key](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#primary-keys-and-indexes-in-queries) of the table. - - - It is mandatory for every Data Model to designate at least one field as the - `Key` type - - -## Other Supported Types +## Supported Data Types Moose supports various data types for Data Models, as summarized in the table below: @@ -52,6 +45,7 @@ Moose supports various data types for Data Models, as summarized in the table be All Typescript number types are mapped to Float64 + @@ -66,11 +60,10 @@ Moose supports various data types for Data Models, as summarized in the table be | DateTime | datetime | ✅ | | Json | dict | ❌ | | Bytes | bytes | ❌ | - | Array | list | ✅ | - | nullable | nullable | ✅ | + | Array | list[] | ✅ | + | nullable | Optional[T] | ✅ | - It is mandatory for every Data Model to designate at least one field as the - `Key` type +If you do not specify a `Key` type, you must set up a `DataModelConfig` to specify the properties that will be used for the `order_by_fields`. Learn more about [DataModelConfig](./configure-model.mdx). diff --git a/apps/framework-docs/src/pages/db-processing.mdx b/apps/framework-docs/src/pages/db-processing.mdx index 50d44ae08..b7a129fda 100644 --- a/apps/framework-docs/src/pages/db-processing.mdx +++ b/apps/framework-docs/src/pages/db-processing.mdx @@ -1,8 +1,10 @@ import { Callout } from "../components"; +import { TypeScript, Python, LanguageSwitcher } from "../components"; # Introduction to Blocks + -The Blocks primitive is central to managing database operations in your Moose application. A Blocks object facilitates both setup and teardown operations, allowing you to define complex workflows that interact with your database. The setup and teardown parameters within the Blocks object accept arrays of SQL statements, which Moose executes sequentially to manage the state of your database. +The Blocks primitive is central to managing database operations in your Moose application. The Blocks primtive facilitates both setup and teardown operations, allowing you to define complex workflows that interact with your database. The setup and teardown parameters within the Blocks object accept arrays of SQL statements, which Moose executes sequentially to manage the state of your database. Some key applications of Blocks are to facilitate: @@ -16,16 +18,21 @@ Some key applications of Blocks are to facilitate: ### File and Folder Conventions -In a Moose project, Blocks are stored in the `/blocks` folder. Each block is defined as a `.ts` or `.py` file within this directory. The file naming convention should be clear and descriptive, reflecting the block’s purpose or function (e.g., user_activity_aggregation.ts). +In a Moose project, Blocks are stored in the `/blocks` folder. Each block is defined as a `.ts``.py` file within this directory. + + +The file name itself is not important, but the file name should be clear and descriptive. + ### Blocks Definition -Within each block file, you define a `Blocks` object that serves as a container for SQL operations related to setting up and tearing down database structures. This is commonly used to manage materialized views, tables, and other database resources: +Within each block file, you instantiate a `Blocks` object, making it the default export of the file,class that serves as a container for SQL operations related to setting up and tearing down database structures. This is commonly used to manage materialized views, tables, and other database resources: + ```ts filename="/blocks/.ts" copy import { Blocks } from "@514labs/moose-lib"; -const blocks: Blocks = { +export default { setup: [ // Array of SQL statements for setting up database resources `CREATE TABLE my_table (...)`, @@ -36,10 +43,30 @@ const blocks: Blocks = { `DROP VIEW IF EXISTS my_view`, `DROP TABLE IF EXISTS my_table`, ], -}; +} as Blocks; +``` + + + +```py filename="/blocks/.py" copy +from moose_lib import Blocks + +block = Blocks( + setup=[ + # Array of SQL statements for setting up database resources + "CREATE TABLE my_table (...)", + "CREATE MATERIALIZED VIEW my_view AS SELECT ...", + ], + teardown=[ + # Array of SQL statements for tearing down resources + "DROP VIEW IF EXISTS my_view", + "DROP TABLE IF EXISTS my_table", + ], +) ``` + -Object Parameters +`Blocks` Parameters - `setup`: An array of SQL statements that will be executed when the setup phase is triggered. This parameter is typically used to define operations such as creating tables, indexes, and materialized views. - `teardown`: An array of SQL statements that will be executed during the teardown phase. This parameter is typically used to drop or remove tables, views, or any other resources created during setup. diff --git a/apps/framework-docs/src/pages/db-processing/create-views.mdx b/apps/framework-docs/src/pages/db-processing/create-views.mdx index ceeaa6027..c7e5fe06a 100644 --- a/apps/framework-docs/src/pages/db-processing/create-views.mdx +++ b/apps/framework-docs/src/pages/db-processing/create-views.mdx @@ -1,32 +1,44 @@ -import { Callout } from "../../components"; +import { Callout, LanguageSwitcher, TypeScript, Python } from "../../components"; # Creating Materialized Views -One powerful use of the `Blocks` object in Moose is setting up materialized views, which summarize and transform large datasets into easily queryable tables. This setup precomputes the results of your queries, leading to more efficient data retrieval from your Moose application. + -## Setting up Materialized Views with `createAggregation()` +Materialized views help summarize and transform large datasets into easily queryable tables, precomputing results for efficient data retrieval. + +## Setting up Materialized Views with `createAggregation()``create_aggregation()` - Before setting up your aggregation, make sure you have already initialized a - Block. Learn how to [initialize a new block](./init-block.mdx). + Before setting up your aggregation, make sure you have already [initialized a new block](./init-block.mdx). -The `createAggregation()` function in Moose makes it easy to define a materialized view by specifying a query and a destination table for your aggregated data. +The `createAggregation()``create_aggregation()` function in Moose makes it easy to define a materialized view by specifying a query and a destination table for your aggregated data. ### Example -Using the `ParsedActivity_0_0` table from the starter code provided by `create-moose-app`, here’s how to create a `UserActivitySummary` table and a corresponding `UserActivitySummaryMV` materialized view. This view tracks the number of unique users (`unique_user_count`) and total activities (`activity_count`) for each activity type: - -```typescript filename="/blocks/example-aggregation.ts" copy + +```ts filename="/blocks/example-aggregation.ts" copy import { createAggregation, + AggregationCreateOptions, Blocks, ClickHouseEngines, } from "@514labs/moose-lib"; -// Define the destination table and materialized view -const DESTINATION_TABLE = "UserActivitySummary"; -const MATERIALIZED_VIEW = "UserActivitySummaryMV"; +const destinationTable = "UserActivitySummary"; +const materializedView = "UserActivitySummaryMV"; + +// Define the table options +const tableCreateOptions = { + name: destinationTable, + columns: { + activity: "String", + unique_user_count: "AggregateFunction(uniq, String)", + activity_count: "AggregateFunction(count, String)", + }, + orderBy: "activity", + engine: ClickHouseEngines.AggregatingMergeTree, +} as TableCreateOptions; // Define the query to aggregate the data const selectQuery = ` @@ -43,34 +55,77 @@ const selectQuery = ` // Create the aggregation export default { setup: createAggregation({ - tableCreateOptions: { - name: DESTINATION_TABLE, // Name of the destination table - columns: { - activity: "String", // Column type for activity name - unique_user_count: "AggregateFunction(uniq, String)", // Column type for unique user count - activity_count: "AggregateFunction(count, String)", // Column type for total activity count - }, - orderBy: "activity", // Order the table by activity name - engine: ClickHouseEngines.AggregatingMergeTree, // Use the AggregatingMergeTree engine for efficient aggregate funciton performance - }, - materializedViewName: MATERIALIZED_VIEW, // Name of the materialized view - select: selectQuery, // SQL query for aggregating data + tableCreateOptions: tableCreateOptions, + materializedViewName: materializedView, + select: selectQuery, }), } as Blocks; ``` + + + +```py filename="/blocks/example-aggregation.py" copy +from moose_lib import ( + AggregationCreateOptions, + AggregationDropOptions, + Blocks, + ClickHouseEngines, + TableCreateOptions, + create_aggregation, + drop_aggregation, +) + +destination_table = "DailyActiveUsers" +materialized_view = "DailyActiveUsers_mv" + +select_sql = """ +SELECT + toStartOfDay(timestamp) as date, + uniqState(userId) as dailyActiveUsers +FROM ParsedActivity_0_0 +WHERE activity = 'Login' +GROUP BY toStartOfDay(timestamp) +""" + +teardown_queries = drop_aggregation( + AggregationDropOptions(materialized_view, destination_table) +) + +table_options = TableCreateOptions( + name=destination_table, + columns={"date": "Date", "dailyActiveUsers": "AggregateFunction(uniq, String)"}, + engine=ClickHouseEngines.MergeTree, + order_by="date", +) + +aggregation_options = AggregationCreateOptions( + table_create_options=table_options, + materialized_view_name=materialized_view, + select=select_sql, +) + +setup_queries = create_aggregation(aggregation_options) + +block = Blocks(teardown=teardown_queries, setup=setup_queries) +``` + -### Explanation of Parameters - -- `tableCreateOptions`: Defines the schema for the destination table: +### Example Explanation - - `name`: The table name where the aggregated data will be stored. - - `columns`: An object specifying column names and data types. - - `orderBy`: Defines the column by which the table will be ordered. In this case, it’s the `activity` column. - - `engine`: Specifies the ClickHouse engine to use, such as `MergeTree` for efficient storage and querying. +- `tableCreateOptions``table_create_options`: Defines the destination table schema + - `name`: Table name for aggregated data + - `columns`: Column names and data types + - `orderBy``order_by`: Column for table ordering + - `engine`: ClickHouse engine (e.g., `AggregatingMergeTree` for efficient aggregations) -- `materializedViewName`: The name of the materialized view that will be created. + +- `materializedViewName`: Name of the materialized view + + +- `materialized_view_name`: Name of the materialized view + -- `select`: A SQL query that determines how data will be aggregated from the source table and inserted into the materialized view. +- `select`: SQL query for data aggregation from the source table Test your query in a SQL explorer for a more efficient workflow. Once @@ -80,173 +135,68 @@ export default { to ensure it adheres to ClickHouse's syntax. -### How the Materialized View is Created +### Materialized View Creation Process -When you save the file, Moose will: +When you save the file, Moose: +1. Creates a new table using the defined structure in `tableCreateOptions``table_create_options` +2. Executes the `select` query to aggregate data +3. Populates the destination table with the results -1. Create a new table in your database using the structure defined in the `tableCreateOptions`. -2. Execute the `select` query to aggregate data from the source table. -3. Populate the destination table with the aggregated result set. - - - Materialized Views in ClickHouse automatically refresh your target table with - the latest query results whenever new data is added to your source tables. - [Learn more about ClickHouse Materialized - Views.](https://clickhouse.com/blog/using-materialized-views-in-clickhouse) + + ClickHouse Materialized Views automatically refresh your target table when new data is added to source tables. [Learn more](https://clickhouse.com/blog/using-materialized-views-in-clickhouse) --- ## API Reference -This section provides detailed reference information for the `Blocks` object and its key methods: `createAggregation()` and `dropAggregation()`. - -### Defining a `Blocks` Object - -The `Blocks` object in Moose handles the setup and teardown of data aggregations, such as materialized views. It consists of two main parameters: `setup` and `teardown`. Each accepts an array of SQL statements that Moose will execute against the database to manage the lifecycle of the data. - -#### Example Usage - -```typescript -import { Blocks } from "@514labs/moose-lib"; - -export default { - setup: [...createAggregation({...})], - teardown: [...dropAggregation({...})], -} as Blocks; -``` - ---- - -### `createAggregation()` Function +### `Blocks` Object +- `setup` _(list[str])__(string[])_: SQL statements to set up the aggregation +- `teardown` _(list[str])__(string[])_: SQL statements to tear down the aggregation -The `createAggregation()` function sets up a materialized view based on a SQL query. It specifies the structure of the destination table and the query that will populate it. - -#### Signature - -```typescript -createAggregation(options: AggregationCreateOptions): string[] -``` +### `createAggregation()``create_aggregation()` Function +Sets up a materialized view based on a SQL query. #### Parameters +- `options` (`AggregationCreateOptions`): + - `tableCreateOptions``table_create_options`: Destination table creation options + - `materializedViewName``materialized_view_name`: Materialized view name + - `select`: Aggregation SQL query -- `options` _(AggregationCreateOptions)_: - An object defining how the aggregation will be set up, including table creation options, the materialized view name, and the aggregation query. - -##### `AggregationCreateOptions` Object Properties - -- `tableCreateOptions` _(TableCreateOptions)_: - Defines how the destination table should be created, including: - - - `name`: Name of the table. - - `columns`: A list of column names and their data types. - - `engine`: The ClickHouse engine to use (e.g., `MergeTree`). - - `orderBy`: The column used to order the table. - -- `materializedViewName` _(string)_: - The name of the materialized view to create. - -- `select` _(string)_: - The SQL query that defines how the data will be aggregated and inserted into the materialized view. - -##### Example Usage - -```typescript -createAggregation({ - tableCreateOptions: { - name: "UserActionSummary", - columns: { - activity: "String", - unique_user_count: "AggregateFunction(uniq, String)", - activity_count: "AggregateFunction(count, String)", - }, - orderBy: "activity", - engine: ClickHouseEngines.AggregatingMergeTree, - }, - materializedViewName: "UserActionSummaryMV", - select: `SELECT - activity, - uniqState(userId) as unique_user_count, - countState(activity) AS activity_count - FROM ParsedActivity_0_0 - GROUP BY activity`, -}); -``` - -#### **Return Value** +#### Returns +`string[]``list[str]`: SQL statements to set up the materialized view and destination table -- `string[]`: - Returns an array of SQL statements that will be executed to set up the materialized view and its destination table. - ---- - -### `dropAggregation()` Function - -The `dropAggregation()` function removes an existing aggregation by dropping the materialized view and the associated table. - -#### Signature - -```typescript -dropAggregation(options: AggregationDropOptions): string[] -``` +### `dropAggregation()``drop_aggregation()` Function +Removes an existing aggregation. #### Parameters +- `options` (`AggregationDropOptions`): + - `viewName``view_name`: Materialized view name to drop + - `tableName``table_name`: Destination table name to drop -- `options` _(AggregationDropOptions)_: - An object specifying which materialized view and table should be dropped. - -##### `AggregationDropOptions` Object Properties +#### Returns +`string[]``list[str]`: SQL statements to drop the specified materialized view and table -- `viewName` _(string)_: - The name of the materialized view to drop. - -- `tableName` _(string)_: - The name of the destination table to drop. - -##### Example Usage +### `ClickHouseEngines` +Enum defining available ClickHouse storage engines in Moose. + ```typescript -dropAggregation({ - viewName: "UserActivitySummaryMV", - tableName: "UserActivitySummary", -}); -``` - -#### Return Value - -- **`string[]`**: - Returns an array of SQL statements that, when executed in order, will drop the specified materialized view and its associated table. - ---- - -### ClickHouseEngines Enum - -The `ClickHouseEngines` enum defines the storage engines available in Moose for creating tables in ClickHouse. These engines determine how data is stored, indexed, and queried. - -#### Available Engines - -```typescript filename="moose-lib-ts/blocks.ts" enum ClickHouseEngines { - MergeTree = "MergeTree", - ReplacingMergeTree = "ReplacingMergeTree", - SummingMergeTree = "SummingMergeTree", - AggregatingMergeTree = "AggregatingMergeTree", - CollapsingMergeTree = "CollapsingMergeTree", - VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree", - GraphiteMergeTree = "GraphiteMergeTree", + MergeTree, + ReplacingMergeTree, + SummingMergeTree, + AggregatingMergeTree, + CollapsingMergeTree, + VersionedCollapsingMergeTree, + GraphiteMergeTree, } ``` + -#### Usage + + Learn more about ClickHouse table engine types and use cases in the [ClickHouse documentation](https://clickhouse.com/docs/en/engines/table-engines/). + -```typescript -import { ClickHouseEngines } from "@514labs/moose-lib"; -const engine = ClickHouseEngines.ReplacingMergeTree; -``` - - Want to learn more about the different ClickHouse table engine types and their - use cases? Check out the [ClickHouse - documentation](https://clickhouse.com/docs/en/engines/table-engines/). - diff --git a/apps/framework-docs/src/pages/index.mdx b/apps/framework-docs/src/pages/index.mdx index eabd11612..cca780fd0 100644 --- a/apps/framework-docs/src/pages/index.mdx +++ b/apps/framework-docs/src/pages/index.mdx @@ -22,7 +22,7 @@ import { # Introduction -Welcome to [Moose](https://www.moosejs.com/)! Moose is still in early development, and we're talking +Welcome to [Moose](https://getmoose.dev)! Moose is still in early development, and we're talking to users and potential users all the time to make sure we're creating something valuable. If you're at all interested in what we're building here, then we'd love to talk to you! Please reach out over [email](mailto:hello@moosejs.dev), diff --git a/apps/framework-docs/src/pages/learn-moose/create-project.mdx b/apps/framework-docs/src/pages/learn-moose/create-project.mdx index bd97af613..0042d15bd 100644 --- a/apps/framework-docs/src/pages/learn-moose/create-project.mdx +++ b/apps/framework-docs/src/pages/learn-moose/create-project.mdx @@ -48,11 +48,9 @@ moose-cli dev You should expect to see the following printed to your terminal: -{" "} - -{" "} + You have set up a Moose instance at `http://localhost:4000` diff --git a/apps/framework-docs/src/pages/quickstart.mdx b/apps/framework-docs/src/pages/quickstart.mdx index 4cc836ff9..36d1239b9 100644 --- a/apps/framework-docs/src/pages/quickstart.mdx +++ b/apps/framework-docs/src/pages/quickstart.mdx @@ -46,7 +46,7 @@ git --version && python3 --version && docker --version Navigate to where you want to create your project and run the following command to create a new Python virtual environment: ```bash filename="Terminal" copy -python -m venv venv +python3 -m venv venv ``` Create a new Python virtual environment for each project to avoid conflicts. @@ -86,15 +86,21 @@ npx create-moose-app@latest my-moose-app ts This will automatically create a new project folder and initialize a skeleton Moose app with the entire project structure you need to get started. - + ### Navigate to your Project Directory and Install Project Dependencies + ```bash filename="Terminal" copy cd my-moose-app && npm install ``` - + +```bash filename="Terminal" copy +cd my-moose-app && pip install -e . +``` + + ### Start Local Dev Server @@ -193,11 +199,11 @@ export interface ParsedActivity { ```python filename="/datamodels/models.py" copy +from moose_lib import Key, moose_data_model from dataclasses import dataclass -import datetime - -type Key[T: (str, int)] = T +from datetime import datetime +@moose_data_model @dataclass class UserActivity: eventId: Key[str] @@ -205,17 +211,16 @@ timestamp: str userId: str activity: str +@moose_data_model @dataclass class ParsedActivity: eventId: Key[str] timestamp: datetime userId: str activity: str - ``` - The file defines `UserActivity` and `ParsedActivity` data models, which structure data for ingestion into your Moose infrastructure.

Moose automatically created database tables and ingestion points for these models, as seen in the `moose-ls` output. diff --git a/apps/framework-docs/src/pages/stream-processing.mdx b/apps/framework-docs/src/pages/stream-processing.mdx index be2f1133a..8da4cbad3 100644 --- a/apps/framework-docs/src/pages/stream-processing.mdx +++ b/apps/framework-docs/src/pages/stream-processing.mdx @@ -1,6 +1,6 @@ import { FileTree, Tabs } from "nextra/components"; import { Callout } from "../components"; -import { LanguageSwitcher } from "../components"; +import { LanguageSwitcher, TypeScript, Python } from "../components"; # Introduction to Streaming Functions @@ -14,7 +14,7 @@ Streaming Functions in Moose enable real-time data processing, allowing you to t - Filter out unwanted data - Implement arbitrary logic to handle and respond to incoming data -They are defined as regular functions in TypeScriptPython files, and are executed in real-time as data flows into your Moose application. +They are defined as regular functions in TypeScriptPython files, and are executed in real-time as data flows into your Moose application. ## Core Concepts @@ -96,38 +96,30 @@ The function body contains the processing logic. We will go into more detail on
-A Streaming Function is defined as a `Flow` object with a `run` attribute that is assigned the function: +A Streaming Function is defined as a `StreamingFunction` object with a `run` attribute that is assigned the function: ```python filename="/functions/SourceDataModel__DestinationDataModel.py" copy +from moose_lib import StreamingFunction from app.datamodels.models import SourceDataModel, DestinationDataModel -from dataclasses import dataclass -from typing import Callable - - -@dataclass -class Flow: - run: Callable - def function_name(source: SourceDataModel) -> DestinationDataModel: # Transformation logic pass -my_flow = Flow( +streaming_function = StreamingFunction( run=function_name ) ``` -- The `Flow` object is how Moose identifies the entry point for the Streaming Function. +- The `StreamingFunction` object is how Moose identifies the entry point for the Streaming Function. - The `run` attribute is assigned the function that Moose will call when it receives a new message on the streaming topic associated with the source Data Model. - - The function accepts a single parameter `source` of type `SourceDataModel`. This is the data that Moose will pass to the function when it receives a new message on the streaming topic associated with the source Data Model. - The return type is `DestinationDataModel`. If a value is returned Moose automatically serializes it to JSON and writes it to the streaming topic associated with the destination Data Model. The function name itself is not important, as Moose identifies the function by - whatever is assigned to the `run` attribute of the Flow object. + whatever is assigned to the `run` attribute of the `StreamingFunction` object. diff --git a/apps/framework-docs/src/pages/stream-processing/implement-function.mdx b/apps/framework-docs/src/pages/stream-processing/implement-function.mdx index 980e796d4..07c194cf1 100644 --- a/apps/framework-docs/src/pages/stream-processing/implement-function.mdx +++ b/apps/framework-docs/src/pages/stream-processing/implement-function.mdx @@ -1,9 +1,11 @@ import { Steps } from "nextra/components"; -import { Callout } from "../../components"; +import { Callout, LanguageSwitcher, TypeScript, Python } from "../../components"; # Implementing Streaming Functions -With the Streaming Function file set up, you can now define the processing logic to transform the data from the source Data Model to the destination Data Model. Since Streaming Functions are just ordinary Typescript/Python functions, you can leverage the full power of the language to manipulate the data as needed. + + +Streaming Functions are just ordinary TypescriptPython functions. Inside the function, you can leverage the full power of the language to process, enrich, filter, or otherwise manipulate the source data as needed. This guide introduces some common operations you might perform within a Streaming Function. @@ -15,60 +17,194 @@ This guide introduces some common operations you might perform within a Streamin guide.
-## Examples -### Transforming Data +### Basic Data Manipulation + -Here's an example Streaming Function that converts timestamps to UTC: + +```ts filename="/functions/SourceDataModel__ManipulatedDestinationDataModel.ts" copy {15} +import { SourceDataModel } from "datamodels/path/to/SourceDataModel.ts"; +import { ManipulatedDestinationDataModel } from "datamodels/path/to/ManipulatedDestinationDataModel.ts"; -```ts filename="/functions/UserActivity__ParsedActivity.ts" copy {12} -// Example streaming function: Converts local timestamps in UserEvent data to UTC. -// Imports: Source (UserActivity) and Destination (ParsedActivity) data models. -import { UserActivity, ParsedActivity } from "datamodels/models.ts"; +export default function manipulateData(source: SourceDataModel): ManipulatedDestinationDataModel { + // Manipulate multiple fields from the source data into the destination format. + const manipulatedNumber = source.numberField1 + source.numberField2; + const manipulatedString = source.stringField.toUpperCase(); + const manipulatedDate = new Date(source.dateField).toISOString(); -// The 'convertUtc' function transforms UserActivity data to ParsedActivity format. -export default function convertUtc(source: UserActivity): ParsedActivity { - // Convert local timestamp to UTC and return new ParsedActivity object. return { - id: source.id, // Retain original event ID. - userId: "puid" + source.userId, // Example: Prefix user ID. - activity: source.activity, // Copy activity unchanged. - timestamp: new Date(source.timestamp), // Convert timestamp to UTC. + id: source.id, + manipulatedNumber: manipulatedNumber, + manipulatedString: manipulatedString, + manipulatedDate: manipulatedDate, }; } ``` + + + +```python filename="/functions/SourceDataModel__ManipulatedDestinationDataModel.py" copy {15} +from moose_lib import StreamingFunction +from app.datamodels.models import SourceDataModel, ManipulatedDestinationDataModel + +def manipulate_data(source: SourceDataModel) -> ManipulatedDestinationDataModel: + # Manipulate multiple fields from the source data into the destination format. + manipulated_integer = source.integer_field1 + source.integer_field2 + manipulated_string = source.string_field.upper() + manipulated_datetime = source.datetime_field.isoformat() + + return ManipulatedDestinationDataModel( + id=source.id, + manipulated_integer=manipulated_integer, + manipulated_string=manipulated_string, + manipulated_datetime=manipulated_datetime, + ) + +streaming_function = StreamingFunction( + run=manipulate_data +) +``` + -Adjust function specifics according to your data transformation needs. - -### Data Filtering +### Data Validation and Filtering -Here's an example streaming function file that filters out unwanted data based on a specific condition: +By returning `null``None` you can discard the data. -```ts filename="/functions/UserActivity__ParsedActivity.ts" copy {7-9} -// Example streaming function: Filters out UserEvent data with 'activity' field set to 'ignore'. -// Imports: Source (UserActivity) and Destination (ParsedActivity) data models. -import { UserActivity, ParsedActivity } from "datamodels/models.ts"; + +```ts filename="/functions/SourceDataModel__ValidatedDestinationDataModel.ts" copy {12} +import { SourceDataModel } from "datamodels/models.ts"; +import { DestinationDataModel } from "datamodels/models.ts"; -export default function filterActivity( - source: UserActivity, -): ParsedActivity | null { - // Filter out UserActivity data with 'activity' field set to 'ignore'. - if (source.activity === "ignore") { +export default function validateData(source: SourceDataModel): ValidatedDestinationDataModel | null { + // Validate the source data before processing. + if (!source.requiredField) { + // Discard the data if validation fails. return null; } - // Return ParsedActivity object for all other UserActivity data. return { id: source.id, - userId: "puid" + source.userId, - activity: source.activity, + validField: source.requiredField, timestamp: new Date(source.timestamp), }; } ``` + + + +```python filename="/functions/SourceDataModel__ValidatedDestinationDataModel.py" copy {12} +from moose_lib import StreamingFunction +from app.datamodels.models import SourceDataModel, ValidatedDestinationDataModel + +def validate_data(source: SourceDataModel) -> ValidatedDestinationDataModel | None: + # Validate the source data before processing. + if not source.required_field: + # Discard the data if validation fails. + return None + + return ValidatedDestinationDataModel( + id=source.id, + valid_field=source.required_field, + timestamp=source.timestamp + ) + +streaming_function = StreamingFunction( + run=validate_data +) +``` + + +### Data Augmentation with External API +You can use the `fetch``requests` module to enrich your source data with additional data from an external API. + + +```ts filename="/functions/SourceDataModel__AugmentedDestinationDataModel.ts" copy {20} +import { SourceDataModel } from "datamodels/path/to/SourceDataModel.ts"; +import { AugmentedDestinationDataModel } from "datamodels/path/to/AugmentedDestinationDataModel.ts"; + +export default async function augmentData(source: SourceDataModel): Promise { + // Fetch additional information from an external API + const response = await fetch(`https://api.example.com/data/${source.id}`); + const extraData = await response.json(); + + // Combine source data with fetched extra data + return { + ...source, + extraField: extraData.field, + additionalInfo: extraData.info, + }; +} +``` + + Make sure to return a promise to fetch data from an external API. You must also make the function `async` and use `await` to fetch data from an external API. + + + + +```python filename="/functions/SourceDataModel__AugmentedDestinationDataModel.py" copy {20} +from moose_lib import StreamingFunction +from app.datamodels.models import SourceDataModel, AugmentedDestinationDataModel +import requests + +def augment_data(source: SourceDataModel) -> AugmentedDestinationDataModel: + # Fetch additional information from an external API + response = requests.get(f"https://api.example.com/data/{source.id}") + extra_data = response.json() + + # Combine source data with fetched extra data + return AugmentedDestinationDataModel( + id=source.id, + extra_field=extra_data['field'], + additional_info=extra_data['info'], + # Add other necessary fields here + ) + +streaming_function = StreamingFunction( + run=augment_data +) +``` + + ### Splitting Data Into Multiple Entries +To create multiple entries, you can return an array of the destination Data Model. A common use case is when you have a Data Model with a nested list of objects that you want to unnest and store in their own table. Moose will convert each object inside the array into a separate database entry. -To create multiple entries you can return a array of the destination data model. -For example using the code above you can return an array with a type of `ParsedActivity[]`. -Moose would convert each object inside of the array into a seperate database entry. + +```ts filename="/functions/SourceDataModel__UnnestedDestinationDataModel.ts" copy {15} +import { SourceDataModel } from "datamodels/models.ts"; +import { UnnestedDestinationDataModel } from "datamodels/models.ts"; + +export default function reshapeData(source: SourceDataModel): UnnestedDestinationDataModel[] { + // Unnest a list of objects into their own table rows + const objects = source.nestedListOfObjects; + + return objects.map((object) => ({ + id: source.id, // Keep the ID of the parent object + ...object, + })); +} +``` + + + +```python filename="/functions/SourceDataModel__DestinationDataModel.py" copy {15} +from moose_lib import StreamingFunction +from app.datamodels.models import SourceDataModel, DestinationDataModel + +def reshape_data(source: SourceDataModel) -> list[DestinationDataModel]: + + objects = source.nested_list_of_objects + + return [ + DestinationDataModel( + id=source.id, + **item + ) + for item in items + ] + +streaming_function = StreamingFunction( + run=reshape_data +) +``` + diff --git a/apps/framework-docs/src/pages/stream-processing/init-function.mdx b/apps/framework-docs/src/pages/stream-processing/init-function.mdx index 055763792..52445f065 100644 --- a/apps/framework-docs/src/pages/stream-processing/init-function.mdx +++ b/apps/framework-docs/src/pages/stream-processing/init-function.mdx @@ -1,29 +1,58 @@ import { FileTree } from "nextra/components"; -import { Callout } from "../../components"; +import { Callout, LanguageSwitcher, TypeScript, Python } from "../../components"; # Initialize a Streaming Function + + ## Using the CLI Initialize your Streaming Function with the `moose function init` CLI command: + +```bash filename="Terminal" copy +npx moose-cli function init --source --destination +``` + + + ```bash filename="Terminal" copy -moose function init --source --destination +moose-cli function init --source --destination ``` + This command generates a new Streaming Function file in the `/functions` directory of your Moose app according to the [file structure described previously](../process-data). + + + + + + + + + + + + + + + + + + `SOURCE_DATA_MODEL` and `DESTINATION_DATA_MODEL` must be valid data models that exist in your `/datamodels` directory.
-
You can find a list of your existing data models by running `moose ls`. +
You can find a list of your existing data models by running `moose-cli ls`.
## Manual Initialization -You can alternatively create a new Streaming Function file manually in the `/functions` directory of your Moose app. The file name should be in the format `SourceDataModel__DestinationDataModel.ts`. +You can alternatively create a new Streaming Function file manually in the `/functions` directory of your Moose app. The file name should be in the format `SourceDataModel__DestinationDataModel.ts``SourceDataModel__DestinationDataModel.py`. Inside the file, define the function as follows: + ```ts filename="/functions/SourceDataModel__DestinationDataModel.ts" copy import { SourceDataModel } from "../datamodels/path/to/SourceDataModel"; import { DestinationDataModel } from "../datamodels/path/to/DestinationDataModel"; @@ -34,28 +63,35 @@ export default function functionName( // Transformation logic } ``` + + + +```py filename="/functions/SourceDataModel__DestinationDataModel.py" copy +from app.datamodels.path.to.SourceDataModel import SourceDataModel +from app.datamodels.path.to.DestinationDataModel import DestinationDataModel +from moose_lib import StreamingFunction + +def functionName(source: SourceDataModel) -> DestinationDataModel | list[DestinationDataModel] | None: + # Transformation logic + + +my_streaming_function = StreamingFunction(run=functionName) +``` + -
    -
  • - Make sure to import the source and destination Data Models from the - correct paths in your app -
  • -
  • - Replace `functionName` with whatever descriptive name you choose for your - function -
  • -
  • - Replace `SourceDataModel` and `DestinationDataModel` with the actual names - of your Data Models -
  • -
  • - The default export is important because Moose uses it to identify the - entry point into the Streaming Function -
  • -
+- Ensure you import the source and destination Data Models from the correct paths within your application. +- Replace `functionName` with a descriptive name that clearly reflects the function's purpose. +- Substitute `SourceDataModel` and `DestinationDataModel` with the actual names of your Data Models. + +- The default export is crucial because Moose uses it to identify the entry point of the Streaming Function. + + +- Assign your function to the `run` parameter in the `StreamingFunction` class to designate it as the entry point. +
+ ## Next Steps With your Streaming Function initialized, you can now start defining the transformation logic to process data from the source Data Model to the destination Data Model. We'll cover this in more detail in the next section. diff --git a/apps/framework-docs/src/pages/stream-processing/test-function.mdx b/apps/framework-docs/src/pages/stream-processing/test-function.mdx index 124610504..d4a68df22 100644 --- a/apps/framework-docs/src/pages/stream-processing/test-function.mdx +++ b/apps/framework-docs/src/pages/stream-processing/test-function.mdx @@ -40,7 +40,7 @@ After the streaming function processes the data, check the destination table in [Learn more about how to connect to your local OLAP - database](/getting-started/quickstart). + database](/quickstart). diff --git a/apps/framework-landing/app/layout.tsx b/apps/framework-landing/app/layout.tsx index f1163b6cc..9491cda01 100644 --- a/apps/framework-landing/app/layout.tsx +++ b/apps/framework-landing/app/layout.tsx @@ -3,7 +3,7 @@ import "@514labs/design-system-base/globals.css"; import localFont from "next/font/local"; import { ThemeProvider } from "@514labs/design-system-components/components"; import { cn } from "@514labs/design-system-components/utils"; -import { Nav } from "@514labs/design-system-components/trackable-components"; +import { Nav } from "./nav"; import Script from "next/script"; import { GoogleTagManager } from "@next/third-parties/google"; @@ -26,18 +26,14 @@ export const metadata: Metadata = { }; const default_navigation = [ - { name: "docs", href: "https://docs.moosejs.com" }, - { name: "boreal", href: "https://boreal.cloud" }, + { name: "Host with Boreal", href: "https://boreal.cloud" }, + { name: "Docs", href: "https://docs.getmoose.dev" }, + { name: "GitHub", href: "https://github.com/514-labs/moose" }, { - name: "templates", - href: "/templates", - items: [ - { name: "product analytics", href: "/templates/product-analytics" }, - ], + name: "Slack", + href: "https://join.slack.com/t/moose-community/shared_invite/zt-2345678901-23456789012345678901234567890123", }, - { name: "community", href: "/community" }, - { name: "blog", href: "https://www.fiveonefour.com/blog" }, - { name: "github", href: "https://github.com/514-labs/moose" }, + { name: "Blog", href: "https://www.fiveonefour.com/blog" }, ]; export default async function RootLayout({ @@ -71,12 +67,7 @@ export default async function RootLayout({ enableSystem disableTransitionOnChange > -