Skip to content

Commit

Permalink
feature: synthetic datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
geclos committed Sep 27, 2024
1 parent 6319fa9 commit 71880b3
Show file tree
Hide file tree
Showing 18 changed files with 810 additions and 69 deletions.
3 changes: 3 additions & 0 deletions .cursorignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
node_modules
drizzle
102 changes: 102 additions & 0 deletions apps/web/src/actions/datasets/generateDataset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
'use server'

import {
ChainObjectResponse,
Dataset,
StreamEventTypes,
} from '@latitude-data/core/browser'
import { BadRequestError } from '@latitude-data/core/lib/errors'
import { createDataset } from '@latitude-data/core/services/datasets/create'
import { ChainEventDto, Latitude } from '@latitude-data/sdk'
import env from '$/env'
import { getCurrentUser } from '$/services/auth/getCurrentUser'
import { createStreamableValue } from 'ai/rsc'

type GenerateDatasetActionProps = {
parameters: Record<string, unknown>
description: string
rowCount: number
name: string
}

export async function generateDatasetAction({
parameters,
description,
rowCount,
name,
}: GenerateDatasetActionProps) {
if (!env.DATASET_GENERATOR_PROJECT_ID) {
throw new BadRequestError('PROJECT_ID_DATASET_GENERATION is not set')
}
if (!env.DATASET_GENERATOR_DOCUMENT_PATH) {
throw new BadRequestError('DATASET_GENERATOR_DOCUMENT_PATH is not set')
}
if (!env.DATASET_GENERATOR_WORKSPACE_APIKEY) {
throw new BadRequestError('DATASET_GENERATOR_WORKSPACE_APIKEY is not set')
}

let response: Dataset | undefined
const { user, workspace } = await getCurrentUser()
const stream = createStreamableValue<
{ event: StreamEventTypes; data: ChainEventDto },
Error
>()
const gateway = {
host: env.GATEWAY_HOSTNAME,
port: env.GATEWAY_PORT,
ssl: env.GATEWAY_SSL,
}
const sdk = new Latitude(env.DATASET_GENERATOR_WORKSPACE_APIKEY, {
gateway,
projectId: env.DATASET_GENERATOR_PROJECT_ID,
})
const sdkResponse = await sdk.run(env.DATASET_GENERATOR_DOCUMENT_PATH, {
parameters: {
row_count: rowCount,
parameters,
user_message: description,
},
onError: (error) => {
stream.error({
name: error.name,
message: error.message,
stack: error.stack,
})
},
})

try {
const sdkResult = await sdkResponse
const csv = (sdkResult?.response! as ChainObjectResponse).object.csv
const result = await createDataset({
author: user,
workspace,
data: {
name,
file: new File([csv], `${name}.csv`, { type: 'text/csv' }),
csvDelimiter: ',',
},
})
if (result.error) {
stream.error({
name: result.error.name,
message: result.error.message,
stack: result.error.stack,
})
} else {
response = result.value
stream.done()
}
} catch (error) {
stream.error({
name: (error as Error).name,
message: (error as Error).message,
stack: (error as Error).stack,
})
}

return {
output: stream.value,
response,
}
}
68 changes: 68 additions & 0 deletions apps/web/src/actions/sdk/generateDatasetPreviewAction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
'use server'

import { StreamEventTypes } from '@latitude-data/core/browser'
import { BadRequestError } from '@latitude-data/core/lib/errors'
import { ChainEventDto, Latitude } from '@latitude-data/sdk'
import env from '$/env'
import { createStreamableValue } from 'ai/rsc'

type RunDocumentActionProps = {
projectId: number
documentUuid: string
parameters: Record<string, unknown>
description: string
}

export async function generateDatasetPreviewAction({
parameters,
description,
}: RunDocumentActionProps) {
const stream = createStreamableValue<
{ event: StreamEventTypes; data: ChainEventDto },
Error
>()
if (!env.DATASET_GENERATOR_PROJECT_ID) {
throw new BadRequestError('PROJECT_ID_DATASET_GENERATION is not set')
}
if (!env.DATASET_GENERATOR_DOCUMENT_PATH) {
throw new BadRequestError('DATASET_GENERATOR_DOCUMENT_PATH is not set')
}
if (!env.DATASET_GENERATOR_WORKSPACE_APIKEY) {
throw new BadRequestError('DATASET_GENERATOR_WORKSPACE_APIKEY is not set')
}

const gateway = {
host: env.GATEWAY_HOSTNAME,
port: env.GATEWAY_PORT,
ssl: env.GATEWAY_SSL,
}
const sdk = new Latitude(env.DATASET_GENERATOR_WORKSPACE_APIKEY, {
gateway,
projectId: env.DATASET_GENERATOR_PROJECT_ID,
})
const response = await sdk.run(env.DATASET_GENERATOR_DOCUMENT_PATH, {
parameters: {
row_count: 10,
parameters,
user_message: description,
},
onEvent: (event) => {
stream.update(event)
},
onError: (error) => {
stream.error({
name: error.name,
message: error.message,
stack: error.stack,
})
},
onFinished: () => {
stream.done()
},
})

return {
output: stream.value,
response,
}
}
48 changes: 48 additions & 0 deletions apps/web/src/app/(private)/datasets/generate/CsvPreviewTable.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeader,
TableRow,
Text,
} from '@latitude-data/web-ui'

interface CsvPreviewTableProps {
csvData: {
headers: string[]
data: {
record: Record<string, string>
info: { columns: { name: string }[] }
}[]
}
}

export function CsvPreviewTable({ csvData }: CsvPreviewTableProps) {
return (
<Table>
<TableHeader>
<TableRow>
{csvData.headers.map((header, index) => (
<TableHead key={index}>
<Text.H5>{header}</Text.H5>
</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{csvData.data.map(({ record }, rowIndex) => (
<TableRow key={rowIndex} hoverable={false}>
{csvData.headers.map((header, cellIndex) => (
<TableCell key={cellIndex}>
<div className='py-1'>
<Text.H5>{record[header]}</Text.H5>
</div>
</TableCell>
))}
</TableRow>
))}
</TableBody>
</Table>
)
}
Loading

0 comments on commit 71880b3

Please sign in to comment.