Skip to content

Commit

Permalink
feat(evaluations): add eject functionality for evaluations (#620)
Browse files Browse the repository at this point in the history
- Implement `ejectEvaluationAction` to handle the ejection of evaluations to advanced mode.
- Create `ejectEvaluation` service to manage the ejection process, including updating metadata and result configurations.
- Add unit tests for `ejectEvaluationAction` to ensure proper handling of authorized and unauthorized scenarios.
- Refactor evaluation editors to use a common `EvaluationEditorLayout` component for consistent UI and functionality.
- Introduce `useProviderModel` hook to manage provider and model selection logic across different evaluation editors.

This change allows users to convert simple evaluations into advanced evaluations with custom prompts, enhancing the flexibility and customization of the evaluation process.
  • Loading branch information
geclos authored Nov 15, 2024
1 parent e365e69 commit 72bb75f
Show file tree
Hide file tree
Showing 10 changed files with 577 additions and 363 deletions.
127 changes: 127 additions & 0 deletions apps/web/src/actions/evaluations/eject.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import {
EvaluationMetadataType,
EvaluationResultableType,
ProviderApiKey,
Providers,
} from '@latitude-data/core/browser'
import * as factories from '@latitude-data/core/factories'
import { Result } from '@latitude-data/core/lib/Result'
import { EvaluationsRepository } from '@latitude-data/core/repositories'
import { beforeEach, describe, expect, it, vi } from 'vitest'

import { ejectEvaluationAction } from './eject'

const mocks = vi.hoisted(() => {
return {
getSession: vi.fn(),
}
})

vi.mock('$/services/auth/getSession', () => ({
getSession: mocks.getSession,
}))

vi.mock('@latitude-data/core/repositories/evaluationsRepository')

describe('ejectEvaluationAction', () => {
describe('unauthorized', () => {
let evaluationId: number

beforeEach(async () => {
const { workspace, userData } = await factories.createWorkspace()
const provider = await factories.createProviderApiKey({
workspace,
type: Providers.OpenAI,
name: 'Test Provider',
user: userData,
})
const evaluation = await factories.createLlmAsJudgeEvaluation({
workspace,
user: userData,
prompt: factories.helpers.createPrompt({ provider }),
})
evaluationId = evaluation.id
})

it('errors when the user is not authenticated', async () => {
mocks.getSession.mockReturnValue(null)

const [_, error] = await ejectEvaluationAction({
id: evaluationId,
})

expect(error!.name).toEqual('UnauthorizedError')
})
})

describe('authorized', () => {
let evaluation: any
let user: any
let workspace: any
let provider: ProviderApiKey

beforeEach(async () => {
const { workspace: createdWorkspace, userData } =
await factories.createWorkspace()

workspace = createdWorkspace
user = userData
provider = await factories.createProviderApiKey({
workspace,
type: Providers.OpenAI,
name: 'Test Provider',
user,
})
evaluation = await factories.createEvaluation({
workspace,
user,
metadataType: EvaluationMetadataType.LlmAsJudgeSimple,
metadata: {
providerApiKeyId: provider.id,
model: 'gpt-4o',
objective: 'Test objective',
additionalInstructions: 'Test additional instructions',
},
resultType: EvaluationResultableType.Boolean,
resultConfiguration: {
trueValueDescription: 'Test true value description',
falseValueDescription: 'Test false value description',
},
})

mocks.getSession.mockReturnValue({
user: userData,
})
})

it('successfully ejects an evaluation', async () => {
// @ts-expect-error - Mocking the repository
vi.mocked(EvaluationsRepository).mockImplementation(() => ({
find: vi.fn().mockResolvedValue(Result.ok(evaluation)),
}))

const [data, error] = await ejectEvaluationAction({
id: evaluation.id,
})

expect(error).toBeNull()
expect(data).toEqual(expect.objectContaining({ id: evaluation.id }))
})

it('throws an error if evaluation is not found', async () => {
// @ts-expect-error - Mocking the repository
vi.mocked(EvaluationsRepository).mockImplementation(() => ({
find: vi
.fn()
.mockResolvedValue(Result.error(new Error('Evaluation not found'))),
}))

const [_, error] = await ejectEvaluationAction({
id: 999,
})

expect(error).not.toBeNull()
expect(error!.message).toEqual('Evaluation not found')
})
})
})
18 changes: 18 additions & 0 deletions apps/web/src/actions/evaluations/eject.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'use server'

import { EvaluationsRepository } from '@latitude-data/core/repositories'
import { ejectEvaluation } from '@latitude-data/core/services/evaluations/eject'
import { z } from 'zod'

import { authProcedure } from '../procedures'

export const ejectEvaluationAction = authProcedure
.createServerAction()
.input(z.object({ id: z.coerce.number() }))
.handler(async ({ input, ctx }) => {
const scope = new EvaluationsRepository(ctx.workspace.id)
const evaluation = await scope.find(input.id).then((r) => r.unwrap())
const result = await ejectEvaluation(evaluation)

return result.unwrap()
})
Original file line number Diff line number Diff line change
@@ -1,99 +1,50 @@
'use client'

import { FormEvent, useEffect, useMemo, useState } from 'react'
import { FormEvent } from 'react'

import {
EvaluationConfigurationBoolean,
EvaluationDto,
EvaluationMetadataLlmAsJudgeSimple,
EvaluationMetadataType,
EvaluationResultableType,
findFirstModelForProvider,
} from '@latitude-data/core/browser'
import {
Button,
FormField,
FormFieldGroup,
Input,
Label,
Text,
useToast,
} from '@latitude-data/web-ui'
import {
IProviderByName,
ProviderModelSelector,
} from '$/components/EditorHeader'
import { envClient } from '$/envClient'
import useModelOptions from '$/hooks/useModelOptions'
import { ProviderModelSelector } from '$/components/EditorHeader'
import useEvaluations from '$/stores/evaluations'
import useProviderApiKeys from '$/stores/providerApiKeys'

import { EvaluationEditorLayout } from '../components/EvaluationEditorLayout'
import { useProviderModel } from '../hooks/useProviderModel'

export default function BooleanEvaluationEditor({
evaluation,
}: {
evaluation: EvaluationDto
}) {
const { toast } = useToast()
const { data: providerApiKeys } = useProviderApiKeys()
const metadata = evaluation.metadata as EvaluationMetadataLlmAsJudgeSimple
const resultConfiguration =
evaluation.resultConfiguration as EvaluationConfigurationBoolean
const [selectedProvider, setSelectedProvider] = useState<string | undefined>()
const [selectedModel, setSelectedModel] = useState<
string | undefined | null
>()
useEffect(() => {
const provider = providerApiKeys.find(
(pk) => pk.id === metadata.providerApiKeyId,
)
if (!provider) return

setSelectedProvider(provider.name)
setSelectedModel(metadata.model)
}, [providerApiKeys])

const { update } = useEvaluations()

const providerOptions = useMemo(() => {
return providerApiKeys.map((apiKey) => ({
label: apiKey.name,
value: apiKey.name,
}))
}, [providerApiKeys])
const providersByName = useMemo(() => {
return providerApiKeys.reduce((acc, data) => {
acc[data.name] = data
return acc
}, {} as IProviderByName)
}, [providerApiKeys])
const provider = selectedProvider
? providersByName[selectedProvider]
: undefined
const modelOptions = useModelOptions({
provider: provider?.provider,
name: provider?.name,
})
const onProviderChange = async (value: string) => {
if (!value) return
if (value === selectedProvider) return

const firstModel = findFirstModelForProvider({
provider: providersByName[value],
latitudeProvider: envClient.NEXT_PUBLIC_DEFAULT_PROJECT_ID,
})

setSelectedProvider(value)
setSelectedModel(firstModel)
}
const onModelChange = async (value: string) => {
if (!value) return
if (value === selectedModel) return
const {
provider,
selectedProvider,
selectedModel,
providerOptions,
modelOptions,
onProviderChange,
onModelChange,
} = useProviderModel(metadata)

setSelectedModel(value)
}
const onSubmit = async (e: FormEvent<HTMLFormElement>) => {
e.preventDefault()

const formData = new FormData(e.currentTarget)

const [_, error] = await update({
Expand Down Expand Up @@ -123,63 +74,56 @@ export default function BooleanEvaluationEditor({
}

return (
<div className='flex flex-col gap-y-2 h-full'>
<div className='flex flex-row items-center justify-between'>
<Text.H4M>{evaluation.name}</Text.H4M>
<Button fancy form='simple-boolean-evaluation-editor' type='submit'>
Save changes
</Button>
</div>
<form
className='bg-backgroundCode flex flex-grow flex-col gap-y-4 p-4 rounded-lg border'
id='simple-boolean-evaluation-editor'
onSubmit={onSubmit}
>
<FormField>
<ProviderModelSelector
modelDisabled={!modelOptions.length || !selectedProvider}
modelOptions={modelOptions}
onModelChange={onModelChange}
onProviderChange={onProviderChange}
providerDisabled={!providerOptions.length}
providerOptions={providerOptions}
selectedModel={selectedModel}
selectedProvider={selectedProvider}
<EvaluationEditorLayout
name={evaluation.name}
evaluationId={evaluation.id}
formId='simple-boolean-evaluation-editor'
onSubmit={onSubmit}
>
<FormField>
<ProviderModelSelector
modelDisabled={!modelOptions.length || !selectedProvider}
modelOptions={modelOptions}
onModelChange={onModelChange}
onProviderChange={onProviderChange}
providerDisabled={!providerOptions.length}
providerOptions={providerOptions}
selectedModel={selectedModel}
selectedProvider={selectedProvider}
/>
</FormField>
<FormField label='Evaluation objective'>
<Input
name='objective'
defaultValue={metadata.objective}
placeholder='The main objective of the evaluation'
/>
</FormField>
<FormFieldGroup>
<FormField label='True value description'>
<Input
name='trueValueDescription'
defaultValue={resultConfiguration.trueValueDescription ?? ''}
placeholder='Description of the true value'
/>
</FormField>
<FormField label='Evaluation objective'>
<FormField label='False value description'>
<Input
name='objective'
defaultValue={metadata.objective}
placeholder='The main objective of the evaluation'
name='falseValueDescription'
defaultValue={resultConfiguration.falseValueDescription ?? ''}
placeholder='Description of the false value'
/>
</FormField>
<FormFieldGroup>
<FormField label='True value description'>
<Input
name='trueValueDescription'
defaultValue={resultConfiguration.trueValueDescription ?? ''}
placeholder='Description of the true value'
/>
</FormField>
<FormField label='False value description'>
<Input
name='falseValueDescription'
defaultValue={resultConfiguration.falseValueDescription ?? ''}
placeholder='Description of the false value'
/>
</FormField>
</FormFieldGroup>
<div className='flex flex-col gap-2 flex-grow'>
<Label>Additional instructions</Label>
<textarea
name='additionalInstructions'
className='w-full h-full border rounded-lg p-2 text-sm text-foreground'
defaultValue={metadata.additionalInstructions ?? ''}
placeholder='Additional instructions the eval should take into account...'
/>
</div>
</form>
</div>
</FormFieldGroup>
<div className='flex flex-col gap-2 flex-grow'>
<Label>Additional instructions</Label>
<textarea
name='additionalInstructions'
className='w-full h-full border rounded-lg p-2 text-sm text-foreground'
defaultValue={metadata.additionalInstructions ?? ''}
placeholder='Additional instructions the eval should take into account...'
/>
</div>
</EvaluationEditorLayout>
)
}
Loading

0 comments on commit 72bb75f

Please sign in to comment.