Skip to content

Commit

Permalink
feature: evaluation result types and configuration
Browse files Browse the repository at this point in the history
Evaluations/templates now have a configuration to configure the type of
result we are expecting, stored in a jsonb column. EvaluationResults now
have a polymorphic table to store the result depending on its type
– This is so that we can aggregate results in SQL later on.
  • Loading branch information
geclos committed Sep 12, 2024
1 parent 2a9b942 commit 71d6c40
Show file tree
Hide file tree
Showing 40 changed files with 3,027 additions and 93 deletions.
14 changes: 13 additions & 1 deletion apps/web/src/actions/evaluations/create.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
'use server'

import { EvaluationMetadataType } from '@latitude-data/core/browser'
import {
EvaluationMetadataType,
EvaluationResultableType,
} from '@latitude-data/core/browser'
import { createEvaluation } from '@latitude-data/core/services/evaluations/create'
import { z } from 'zod'

Expand All @@ -16,6 +19,12 @@ export const createEvaluationAction = authProcedure
.nativeEnum(EvaluationMetadataType)
.optional()
.default(EvaluationMetadataType.LlmAsJudge),
configuration: z.object({
type: z.nativeEnum(EvaluationResultableType),
detail: z
.object({ range: z.object({ from: z.number(), to: z.number() }) })
.optional(),
}),
metadata: z
.object({
prompt: z.string(),
Expand All @@ -25,11 +34,14 @@ export const createEvaluationAction = authProcedure
{ type: 'json' },
)
.handler(async ({ input, ctx }) => {
console.log(input)

const result = await createEvaluation({
workspace: ctx.workspace,
name: input.name,
description: input.description,
metadata: input.metadata,
configuration: input.configuration,
type: input.type,
})

Expand Down
11 changes: 10 additions & 1 deletion apps/web/src/app/(private)/error.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import { useEffect } from 'react'

import { ErrorComponent, useSession } from '@latitude-data/web-ui/browser'
import { NAV_LINKS } from '$/app/(private)/_lib/constants'
import BreadcrumpLink from '$/components/BreadcrumpLink'
import { AppLayout } from '$/components/layouts'
import { ROUTES } from '$/services/routes'

export default function Error({
error,
Expand All @@ -19,7 +21,14 @@ export default function Error({
return (
<AppLayout
currentUser={session.currentUser}
breadcrumbs={[{ name: session.workspace.name }, { name: 'Error' }]}
breadcrumbs={[
{
name: (
<BreadcrumpLink name={session.workspace.name} href={ROUTES.root} />
),
},
{ name: 'Error' },
]}
navigationLinks={NAV_LINKS}
>
<ErrorComponent
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import { useCallback, useEffect, useMemo, useState } from 'react'

import {
EvaluationResultableType,
EvaluationResultConfiguration,
} from '@latitude-data/core/browser'
import {
ConfirmModal,
FormField,
Input,
ReactStateDispatch,
Text,
TabSelector,
TextArea,
} from '@latitude-data/web-ui'
import { ROUTES } from '$/services/routes'
Expand All @@ -27,6 +32,10 @@ export default function CreateEvaluationModal({
const [title, setTitle] = useState(initialData?.title ?? '')
const [description, setDescription] = useState(initialData?.description ?? '')
const [prompt, setPrompt] = useState(initialData?.prompt ?? '')
const [configuration, setConfiguration] =
useState<EvaluationResultConfiguration>({
type: EvaluationResultableType.Text,
})

const router = useRouter()

Expand Down Expand Up @@ -54,9 +63,10 @@ export default function CreateEvaluationModal({
name: title,
description,
metadata: { prompt },
configuration,
})
onClose(null)
}, [create, onClose, title, description, prompt])
}, [create, onClose, title, description, prompt, configuration])

const titleError = useMemo<string | undefined>(() => {
if (!title) return 'Please enter a name for your evaluation.'
Expand All @@ -82,18 +92,16 @@ export default function CreateEvaluationModal({
}}
>
<div className='w-full flex flex-col gap-4'>
<div className='w-full flex flex-col gap-4'>
<Text.H5M>Name</Text.H5M>
<FormField label='Title'>
<Input
value={title}
errors={titleError ? [titleError] : undefined}
onChange={(e) => setTitle(e.target.value)}
placeholder='Enter title'
className='w-full'
/>
</div>
<div className='w-full flex flex-col gap-4'>
<Text.H5M>Description</Text.H5M>
</FormField>
<FormField label='Description'>
<TextArea
value={description}
minRows={4}
Expand All @@ -102,7 +110,103 @@ export default function CreateEvaluationModal({
placeholder='Describe what is the purpose of this evaluation'
className='w-full'
/>
</div>
</FormField>
<FormField label='Type'>
<TabSelector
options={[
{ label: 'Text', value: EvaluationResultableType.Text },
{ label: 'Number', value: EvaluationResultableType.Number },
{ label: 'Boolean', value: EvaluationResultableType.Boolean },
]}
onSelect={(value) => {
if (value === EvaluationResultableType.Number) {
setConfiguration({
type: value,
detail: { range: { from: 0, to: 1 } },
})
} else {
setConfiguration({ type: value })
}
}}
selected={configuration.type}
/>
</FormField>
{configuration.type === EvaluationResultableType.Number && (
<FormField label='Range'>
<div className='flex flex-row items-center flex-1 gap-4'>
<Input
type='number'
min={0}
value={configuration.detail?.range.from.toString() || ''}
placeholder='From'
onChange={(e) => {
setConfiguration((prev) => {
const next = { ...prev }

if (e.target.value === '') {
next.detail = {
range: { from: 0, to: next.detail?.range.to || 0 },
}

return next
}

if (next.detail?.range.from) {
next.detail.range.from = parseInt(e.target.value)
if (next.detail.range.from > next.detail.range.to) {
next.detail.range.to = next.detail.range.from + 1
}
} else {
next.detail = {
range: {
from: parseInt(e.target.value),
to: parseInt(e.target.value) + 1,
},
}
}

return next
})
}}
/>
<Input
type='number'
min={0}
value={configuration.detail?.range.to.toString() || ''}
placeholder='To'
onChange={(e) =>
setConfiguration((prev) => {
const next = { ...prev }

if (e.target.value === '') {
next.detail = {
range: { from: 0, to: 0 },
}

return next
}

if (next.detail?.range.to) {
next.detail.range.to = parseInt(e.target.value)
if (next.detail.range.to < next.detail.range.from) {
next.detail.range.from = next.detail.range.to - 1
}
} else {
next.detail = {
range: {
from: parseInt(e.target.value) - 1,
to: parseInt(e.target.value),
},
}
}

return next
})
}
/>
</div>
</FormField>
)}
</div>
</ConfirmModal>
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export function DocumentLogMetadata({
providerLogs?.reduce(
(acc, log) => {
const key = String(log.providerId)
acc[key] = (acc[key] ?? 0) + log.cost_in_millicents
acc[key] = (acc[key] ?? 0) + log.costInMillicents
return acc
},
{} as Record<string, number>,
Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/stores/evaluationTemplates.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import { compact, flatten } from 'lodash-es'

import type { EvaluationTemplateWithCategory } from '@latitude-data/core/browser'
import { EvaluationTemplateWithCategory } from '@latitude-data/core/browser'
import { useToast } from '@latitude-data/web-ui'
import { fetchEvaluationTemplatesAction } from '$/actions/evaluationTemplates/fetch'
import useSWR, { SWRConfiguration } from 'swr'
Expand Down
33 changes: 33 additions & 0 deletions packages/core/drizzle/0051_colorful_lightspeed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
DO $$ BEGIN
CREATE TYPE "public"."evaluation_result_types" AS ENUM('evaluation_resultable_booleans', 'evaluation_resultable_texts', 'evaluation_resultable_numbers');
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
CREATE TABLE IF NOT EXISTS "latitude"."evaluation_resultable_numbers" (
"id" bigserial PRIMARY KEY NOT NULL,
"result" bigint NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
);
--> statement-breakpoint
CREATE TABLE IF NOT EXISTS "latitude"."evaluation_resultable_texts" (
"id" bigserial PRIMARY KEY NOT NULL,
"result" text NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
);
--> statement-breakpoint
CREATE TABLE IF NOT EXISTS "latitude"."evaluation_resultable_booleans" (
"id" bigserial PRIMARY KEY NOT NULL,
"result" boolean NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
);
--> statement-breakpoint
ALTER TABLE "latitude"."evaluations" ADD COLUMN "configuration" jsonb NOT NULL;--> statement-breakpoint
ALTER TABLE "latitude"."evaluation_results" ADD COLUMN "resultable_type" "evaluation_result_types" NOT NULL;--> statement-breakpoint
ALTER TABLE "latitude"."evaluation_results" ADD COLUMN "resultable_id" bigint NOT NULL;--> statement-breakpoint
ALTER TABLE "latitude"."evaluations_templates" ADD COLUMN "configuration" jsonb NOT NULL;--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "resultable_idx" ON "latitude"."evaluation_results" USING btree ("resultable_id","resultable_type");--> statement-breakpoint
ALTER TABLE "latitude"."evaluation_results" DROP COLUMN IF EXISTS "result";
Loading

0 comments on commit 71d6c40

Please sign in to comment.