Skip to content

Commit

Permalink
feature: synthetic datasets (#295)
Browse files Browse the repository at this point in the history
  • Loading branch information
geclos authored Sep 27, 2024
1 parent 1ca36f5 commit 9094fb5
Show file tree
Hide file tree
Showing 32 changed files with 922 additions and 89 deletions.
3 changes: 3 additions & 0 deletions .cursorignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
node_modules
drizzle
2 changes: 2 additions & 0 deletions apps/infra/Pulumi.core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ encryptionsalt: v1:K+dTqOgU40c=:v1:xzsAOOiJEEAsdCQ4:Oe7NKHjXdYdZrcrrBa1/0yolY5M3
config:
infra:DATABASE_PASSWORD:
secure: v1:WABt/tJjfsAKMplU:RRLtj5mTu301x4sn2Tr91u4O0Msd3mWVJutcDg==
infra:DATASET_GENERATOR_WORKSPACE_APIKEY:
secure: v1:l9AShyNWMRBbgHre:cKM7YpzKRoAxChL40q2RixG0mm081qGrkpKSAitb9E5TMhXR7AVOtDaG9TiwYG9NXQNsCw==
infra:DEFAULT_PROJECT_ID:
secure: v1:M0giTdD2+Mjre0ps:YIaUlExLI41EFdcpf+6Fxec=
infra:DEFAULT_PROVIDER_API_KEY:
Expand Down
13 changes: 13 additions & 0 deletions apps/infra/src/app/production/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ const defaultProviderApiKeyArn = coreStack.requireOutput(
'defaultProviderApiKeyArn',
)
const postHogApiKeyArn = coreStack.requireOutput('postHogApiKeyArn')
const datasetGeneratorWorkspaceApiKeyArn = coreStack.requireOutput(
'datasetGeneratorWorkspaceApiKeyArn',
)

const getSecretString = (arn: pulumi.Output<any>) => {
return arn.apply((secretId) =>
Expand Down Expand Up @@ -59,6 +62,9 @@ export const sentryOrg = getSecretString(sentryOrgArn)
export const sentryProject = getSecretString(sentryProjectArn)
export const defaultProviderApiKey = getSecretString(defaultProviderApiKeyArn)
export const postHogApiKey = getSecretString(postHogApiKeyArn)
export const datasetGeneratorWorkspaceApiKey = getSecretString(
datasetGeneratorWorkspaceApiKeyArn,
)

export const dbUrl = pulumi.interpolate`postgresql://${dbUsername}:${dbPassword}@${dbEndpoint}/${dbName}?sslmode=verify-full&sslrootcert=/app/packages/core/src/assets/eu-central-1-bundle.pem`
export const environment = pulumi
Expand All @@ -75,6 +81,7 @@ export const environment = pulumi
defaultProjectId,
defaultProviderApiKey,
postHogApiKey,
datasetGeneratorWorkspaceApiKey,
])
.apply(() => {
return [
Expand Down Expand Up @@ -115,5 +122,11 @@ export const environment = pulumi
{ name: 'DEFAULT_PROVIDER_API_KEY', value: defaultProviderApiKey },
{ name: 'NEXT_PUBLIC_POSTHOG_KEY', value: postHogApiKey },
{ name: 'NEXT_PUBLIC_POSTHOG_HOST', value: 'https://eu.i.posthog.com' },
{
name: 'DATASET_GENERATOR_WORKSPACE_APIKEY',
value: datasetGeneratorWorkspaceApiKey,
},
{ name: 'DATASET_GENERATOR_PROJECT_ID', value: '74' },
{ name: 'DATASET_GENERATOR_DOCUMENT_PATH', value: 'generator' },
]
})
6 changes: 6 additions & 0 deletions apps/infra/src/core/secrets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ const postHogApiKey = createSecretWithVersion(
'NEXT_PUBLIC_POSTHOG_KEY',
'Posthog API Key for product analytics',
)
const datasetGeneratorWorkspaceApiKey = createSecretWithVersion(
'DATASET_GENERATOR_WORKSPACE_APIKEY',
'API key for the dataset generator',
)

export const mailerApiKeyArn = mailerApiKey.arn
export const sentryDsnArn = sentryDsn.arn
Expand All @@ -83,3 +87,5 @@ export const workersWebsocketsSecretTokenArn = workersWebsocketsSecretToken.arn
export const defaultProjectIdArn = defaultProjectId.arn
export const defaultProviderApiKeyArn = defaultProviderApiKey.arn
export const postHogApiKeyArn = postHogApiKey.arn
export const datasetGeneratorWorkspaceApiKeyArn =
datasetGeneratorWorkspaceApiKey.arn
14 changes: 7 additions & 7 deletions apps/web/appspec.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
version: 0.0
Resources:
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: arn:aws:ecs:eu-central-1:442420265876:task-definition/LatitudeLLMTaskFamily:91
LoadBalancerInfo:
ContainerName: "LatitudeLLMAppContainer"
ContainerPort: 8080
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: arn:aws:ecs:eu-central-1:442420265876:task-definition/LatitudeLLMTaskFamily:92
LoadBalancerInfo:
ContainerName: 'LatitudeLLMAppContainer'
ContainerPort: 8080
1 change: 1 addition & 0 deletions apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"@monaco-editor/react": "^4.6.0",
"@sentry/nextjs": "^8",
"@sentry/utils": "^8.30.0",
"@sindresorhus/slugify": "^2.2.1",
"@t3-oss/env-nextjs": "^0.10.1",
"ai": "^3.2.42",
"bullmq": "^5.8.5",
Expand Down
99 changes: 99 additions & 0 deletions apps/web/src/actions/datasets/generateDataset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
'use server'

import {
ChainObjectResponse,
Dataset,
StreamEventTypes,
} from '@latitude-data/core/browser'
import { BadRequestError } from '@latitude-data/core/lib/errors'
import { createDataset } from '@latitude-data/core/services/datasets/create'
import { ChainEventDto } from '@latitude-data/sdk'
import slugify from '@sindresorhus/slugify'
import { createSdk } from '$/app/(private)/_lib/createSdk'
import env from '$/env'
import { getCurrentUser } from '$/services/auth/getCurrentUser'
import { createStreamableValue } from 'ai/rsc'

type GenerateDatasetActionProps = {
parameters: Record<string, unknown>
description: string
rowCount: number
name: string
}

export async function generateDatasetAction({
parameters,
description,
rowCount,
name,
}: GenerateDatasetActionProps) {
if (!env.DATASET_GENERATOR_PROJECT_ID) {
throw new BadRequestError('DATASET_GENERATOR_PROJECT_ID is not set')
}
if (!env.DATASET_GENERATOR_DOCUMENT_PATH) {
throw new BadRequestError('DATASET_GENERATOR_DOCUMENT_PATH is not set')
}
if (!env.DATASET_GENERATOR_WORKSPACE_APIKEY) {
throw new BadRequestError('DATASET_GENERATOR_WORKSPACE_APIKEY is not set')
}

let response: Dataset | undefined
const { user, workspace } = await getCurrentUser()
const stream = createStreamableValue<
{ event: StreamEventTypes; data: ChainEventDto },
Error
>()
const sdk = await createSdk({
apiKey: env.DATASET_GENERATOR_WORKSPACE_APIKEY,
projectId: env.DATASET_GENERATOR_PROJECT_ID,
}).then((r) => r.unwrap())
const sdkResponse = await sdk.run(env.DATASET_GENERATOR_DOCUMENT_PATH, {
parameters: {
row_count: rowCount,
parameters,
user_message: description,
},
onError: (error) => {
stream.error({
name: error.name,
message: error.message,
stack: error.stack,
})
},
})

try {
const sdkResult = await sdkResponse
const csv = (sdkResult?.response! as ChainObjectResponse).object.csv
const result = await createDataset({
author: user,
workspace,
data: {
name,
file: new File([csv], `${slugify(name)}.csv`, { type: 'text/csv' }),
csvDelimiter: ',',
},
})
if (result.error) {
stream.error({
name: result.error.name,
message: result.error.message,
stack: result.error.stack,
})
} else {
response = result.value
stream.done()
}
} catch (error) {
stream.error({
name: (error as Error).name,
message: (error as Error).message,
stack: (error as Error).stack,
})
}

return {
output: stream.value,
response,
}
}
64 changes: 64 additions & 0 deletions apps/web/src/actions/sdk/generateDatasetPreviewAction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
'use server'

import { StreamEventTypes } from '@latitude-data/core/browser'
import { BadRequestError } from '@latitude-data/core/lib/errors'
import { ChainEventDto } from '@latitude-data/sdk'
import { createSdk } from '$/app/(private)/_lib/createSdk'
import env from '$/env'
import { createStreamableValue } from 'ai/rsc'

type RunDocumentActionProps = {
projectId: number
documentUuid: string
parameters: Record<string, unknown>
description: string
}

export async function generateDatasetPreviewAction({
parameters,
description,
}: RunDocumentActionProps) {
const stream = createStreamableValue<
{ event: StreamEventTypes; data: ChainEventDto },
Error
>()
if (!env.DATASET_GENERATOR_PROJECT_ID) {
throw new BadRequestError('DATASET_GENERATOR_PROJECT_ID is not set')
}
if (!env.DATASET_GENERATOR_DOCUMENT_PATH) {
throw new BadRequestError('DATASET_GENERATOR_DOCUMENT_PATH is not set')
}
if (!env.DATASET_GENERATOR_WORKSPACE_APIKEY) {
throw new BadRequestError('DATASET_GENERATOR_WORKSPACE_APIKEY is not set')
}

const sdk = await createSdk({
apiKey: env.DATASET_GENERATOR_WORKSPACE_APIKEY,
projectId: env.DATASET_GENERATOR_PROJECT_ID,
}).then((r) => r.unwrap())
const response = await sdk.run(env.DATASET_GENERATOR_DOCUMENT_PATH, {
parameters: {
row_count: 10,
parameters,
user_message: description,
},
onEvent: (event) => {
stream.update(event)
},
onError: (error) => {
stream.error({
name: error.name,
message: error.message,
stack: error.stack,
})
},
onFinished: () => {
stream.done()
},
})

return {
output: stream.value,
response,
}
}
2 changes: 1 addition & 1 deletion apps/web/src/actions/sdk/runDocumentAction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export async function runDocumentAction({
commitUuid,
parameters,
}: RunDocumentActionProps) {
const sdk = await createSdk(projectId).then((r) => r.unwrap())
const sdk = await createSdk({ projectId }).then((r) => r.unwrap())
const stream = createStreamableValue<
{ event: StreamEventTypes; data: ChainEventDto },
Error
Expand Down
20 changes: 13 additions & 7 deletions apps/web/src/app/(private)/_lib/createSdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,24 @@ async function getLatitudeApiKey() {
return Result.ok(firstApiKey)
}

export async function createSdk(projectId?: number) {
const result = await getLatitudeApiKey()
if (result.error) return result
export async function createSdk({
projectId,
apiKey,
}: {
projectId?: number
apiKey?: string
} = {}) {
if (!apiKey) {
const result = await getLatitudeApiKey()
if (result.error) return result

const latitudeApiKey = result.value.token
apiKey = result.value.token
}

const gateway = {
host: env.GATEWAY_HOSTNAME,
port: env.GATEWAY_PORT,
ssl: env.GATEWAY_SSL,
}
return Result.ok(
new Latitude(latitudeApiKey, compactObject({ gateway, projectId })),
)
return Result.ok(new Latitude(apiKey, compactObject({ gateway, projectId })))
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function PreviewModal({
) : (
<Table maxHeight={TABLE_MAX_HEIGHT}>
<TableHeader>
<TableRow verticalPadding>
<TableRow verticalPadding hoverable={false}>
<TableHead>
<Text.H4>#</Text.H4>
</TableHead>
Expand All @@ -68,7 +68,7 @@ function PreviewModal({
<TableBody>
{rows.map((row, rowIndex) => {
return (
<TableRow key={rowIndex} verticalPadding>
<TableRow key={rowIndex} verticalPadding hoverable={false}>
{row.map((cell, cellIndex) => (
<TableCell key={cellIndex}>{cell}</TableCell>
))}
Expand Down
48 changes: 48 additions & 0 deletions apps/web/src/app/(private)/datasets/generate/CsvPreviewTable.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeader,
TableRow,
Text,
} from '@latitude-data/web-ui'

interface CsvPreviewTableProps {
csvData: {
headers: string[]
data: {
record: Record<string, string>
info: { columns: { name: string }[] }
}[]
}
}

export function CsvPreviewTable({ csvData }: CsvPreviewTableProps) {
return (
<Table>
<TableHeader>
<TableRow>
{csvData.headers.map((header, index) => (
<TableHead key={index}>
<Text.H5>{header}</Text.H5>
</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{csvData.data.map(({ record }, rowIndex) => (
<TableRow key={rowIndex} hoverable={false}>
{csvData.headers.map((header, cellIndex) => (
<TableCell key={cellIndex}>
<div className='py-1'>
<Text.H5>{record[header]}</Text.H5>
</div>
</TableCell>
))}
</TableRow>
))}
</TableBody>
</Table>
)
}
Loading

0 comments on commit 9094fb5

Please sign in to comment.