Skip to content

Commit

Permalink
feat(eval): generate simple evaluations (#582)
Browse files Browse the repository at this point in the history
Changes the copilot evaluation generator to generate simple evaluations.
  • Loading branch information
geclos authored Nov 11, 2024
1 parent cf43568 commit 14bbf59
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 159 deletions.
56 changes: 1 addition & 55 deletions apps/web/src/actions/evaluations/create.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
'use server'

import {
EvaluationMetadataLlmAsJudgeAdvanced,
EvaluationMetadataLlmAsJudgeSimple,
EvaluationMetadataType,
findFirstModelForProvider,
resultConfigurationSchema,
Workspace,
} from '@latitude-data/core/browser'
import { NotFoundError } from '@latitude-data/core/lib/errors'
import { createEvaluation } from '@latitude-data/core/services/evaluations/create'
import { findDefaultProvider } from '@latitude-data/core/services/providerApiKeys/findDefaultProvider'
import { z } from 'zod'

import { authProcedure } from '../procedures'
Expand Down Expand Up @@ -42,64 +36,16 @@ export const createEvaluationAction = authProcedure
{ type: 'json' },
)
.handler(async ({ input, ctx }) => {
const metadata = await enrichWithProvider({
metadata: input.metadata,
workspace: ctx.workspace,
})

const result = await createEvaluation({
workspace: ctx.workspace,
user: ctx.user,
name: input.name,
description: input.description,
metadataType: input.metadata.type,
metadata,
metadata: input.metadata,
resultType: input.resultConfiguration.type,
resultConfiguration: input.resultConfiguration,
})

return result.unwrap()
})

async function enrichWithProvider({
metadata,
workspace,
}: {
metadata: z.infer<
| typeof advancedEvaluationMetadataSchema
| typeof simpleEvaluationMetadataSchema
>
workspace: Workspace
}): Promise<
EvaluationMetadataLlmAsJudgeSimple | EvaluationMetadataLlmAsJudgeAdvanced
> {
const { type: _, ...rest } = metadata

if (metadata.type === EvaluationMetadataType.LlmAsJudgeAdvanced)
return rest as EvaluationMetadataLlmAsJudgeAdvanced
if (
metadata.type === EvaluationMetadataType.LlmAsJudgeSimple &&
metadata.providerApiKeyId &&
metadata.model
) {
return rest as EvaluationMetadataLlmAsJudgeSimple
}

const provider = await findDefaultProvider(workspace)
if (!provider)
throw new NotFoundError(
`No default provider found for workspace ${workspace.id}`,
)

const model = findFirstModelForProvider(provider.provider)
if (!model)
throw new NotFoundError(
`No default model found for provider ${provider.provider}`,
)

return {
...rest,
providerApiKeyId: provider.id,
model,
} as EvaluationMetadataLlmAsJudgeSimple
}
33 changes: 20 additions & 13 deletions apps/web/src/actions/evaluations/createFromPrompt.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
'use server'

import { EvaluationResultableType } from '@latitude-data/core/browser'
import { createAdvancedEvaluation } from '@latitude-data/core/services/evaluations/create'
import {
EvaluationMetadataType,
EvaluationResultableType,
} from '@latitude-data/core/browser'
import { createEvaluation } from '@latitude-data/core/services/evaluations/create'
import { z } from 'zod'

import { withDocument } from '../procedures'
Expand All @@ -11,35 +14,39 @@ export const createEvaluationFromPromptAction = withDocument
.input(
z.object({
name: z.string(),
prompt: z.string(),
objective: z.string(),
additionalInstructions: z.string().optional(),
resultType: z.nativeEnum(EvaluationResultableType),
metadata: z
.object({
metadata: z.union([
z.object({
minValue: z.number(),
maxValue: z.number(),
minValueDescription: z.string().optional(),
maxValueDescription: z.string().optional(),
})
.optional(),
}),
z.object({
falseValueDescription: z.string().optional(),
trueValueDescription: z.string().optional(),
}),
]),
}),
{ type: 'json' },
)
.handler(async ({ input, ctx }) => {
const result = await createAdvancedEvaluation({
const result = await createEvaluation({
workspace: ctx.workspace,
name: input.name,
description: 'AI-generated evaluation',
resultType:
input.resultType === EvaluationResultableType.Number
? EvaluationResultableType.Number
: EvaluationResultableType.Boolean,
resultConfiguration:
input.resultType === EvaluationResultableType.Number && input.metadata
? input.metadata
: {},
resultConfiguration: input.metadata,
metadata: {
prompt: input.prompt,
objective: input.objective,
additionalInstructions: input.additionalInstructions ?? null,
},
metadataType: EvaluationMetadataType.LlmAsJudgeSimple,
user: ctx.user,
projectId: ctx.project.id,
documentUuid: ctx.document.documentUuid,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export const generateSuggestedEvaluationsAction = authProcedure
env.COPILOT_EVALUATION_SUGGESTION_PROMPT_PATH,
{
stream: false,
versionUuid: 'da47b89d-2bde-4c6c-92ee-11a17241eb73', // TODO: remove
parameters: {
user_prompt: input.documentContent,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,25 +65,37 @@ export default function CreateEvaluationModal({
})

const onConfirm = useCallback(() => {
create({
name: title,
description,
metadata: {
type: EvaluationMetadataType.LlmAsJudgeSimple,
objective: '',
additionalInstructions: '',
},
resultConfiguration:
configuration.type === EvaluationResultableType.Number
? {
type: configuration.type,
minValue: configuration.detail!.range.from,
maxValue: configuration.detail!.range.to,
}
: {
type: configuration.type,
},
})
const resultConfiguration =
configuration.type === EvaluationResultableType.Number
? {
type: configuration.type,
minValue: configuration.detail!.range.from,
maxValue: configuration.detail!.range.to,
}
: { type: configuration.type }

if (prompt) {
create({
name: title,
description,
metadata: {
type: EvaluationMetadataType.LlmAsJudgeAdvanced,
prompt,
},
resultConfiguration,
})
} else {
create({
name: title,
description,
metadata: {
type: EvaluationMetadataType.LlmAsJudgeSimple,
objective: '',
additionalInstructions: '',
},
resultConfiguration,
})
}

onClose(null)
}, [create, onClose, title, description, prompt, configuration])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ export default function GenerateEvaluationPage() {
const { execute: createEvaluation } = useLatitudeAction(
createEvaluationFromPromptAction,
)
const [generatedSuggestion, setGeneratedSuggestion] = useState<any>(null)
const [generatedSuggestion, setGeneratedSuggestion] =
useState<SuggestedEvaluation | null>(null)
const validateSuggestion = (suggestion: SuggestedEvaluation) => {
if (
!suggestion.eval_name ||
!suggestion.eval_description ||
!suggestion.eval_prompt
!suggestion.eval_objective ||
!suggestion.metadata
) {
return false
}
Expand Down Expand Up @@ -100,19 +102,14 @@ export default function GenerateEvaluationPage() {
projectId: project.id,
documentUuid: document.documentUuid,
commitUuid: commit.uuid,
prompt: generatedSuggestion.eval_prompt,
objective: generatedSuggestion.eval_objective,
additionalInstructions: generatedSuggestion.eval_additional_instructions,
name: generatedSuggestion.eval_name,
resultType:
generatedSuggestion.eval_type === 'number'
? EvaluationResultableType.Number
: EvaluationResultableType.Boolean,
metadata:
generatedSuggestion.eval_type === 'number'
? {
minValue: generatedSuggestion.metadata.range.from as number,
maxValue: generatedSuggestion.metadata.range.to as number,
}
: undefined,
metadata: generatedSuggestion.metadata,
})

if (newEvaluation) {
Expand Down Expand Up @@ -146,6 +143,69 @@ export default function GenerateEvaluationPage() {
}
}

const renderMetadata = (suggestion: SuggestedEvaluation) => {
if (suggestion.eval_type === 'number') {
const metadata = suggestion.metadata as {
minValue: number
maxValue: number
minValueDescription?: string
maxValueDescription?: string
}

return (
<div className='grid grid-cols-2 gap-3'>
<div className='flex flex-col gap-1'>
<Text.H6M color='foregroundMuted'>Min Value</Text.H6M>
<Text.H5M>{metadata.minValue}</Text.H5M>
{metadata.minValueDescription && (
<Text.H6M color='foregroundMuted'>
{metadata.minValueDescription}
</Text.H6M>
)}
</div>
<div className='flex flex-col gap-1'>
<Text.H6M color='foregroundMuted'>Max Value</Text.H6M>
<Text.H5M>{metadata.maxValue}</Text.H5M>
{metadata.maxValueDescription && (
<Text.H6M color='foregroundMuted'>
{metadata.maxValueDescription}
</Text.H6M>
)}
</div>
</div>
)
}

// Boolean type
const metadata = suggestion.metadata as {
falseValueDescription?: string
trueValueDescription?: string
}

return (
<div className='grid grid-cols-2 gap-3'>
<div className='flex flex-col gap-1'>
<Text.H6M color='foregroundMuted'>True Value</Text.H6M>
<Text.H5M>True</Text.H5M>
{metadata.trueValueDescription && (
<Text.H6M color='foregroundMuted'>
{metadata.trueValueDescription}
</Text.H6M>
)}
</div>
<div className='flex flex-col gap-1'>
<Text.H6M color='foregroundMuted'>False Value</Text.H6M>
<Text.H5M>False</Text.H5M>
{metadata.falseValueDescription && (
<Text.H6M color='foregroundMuted'>
{metadata.falseValueDescription}
</Text.H6M>
)}
</div>
</div>
)
}

return (
<Modal
open
Expand Down Expand Up @@ -190,7 +250,7 @@ export default function GenerateEvaluationPage() {
<>
<div className='w-full flex flex-col gap-4'>
<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>Evaluation Name</Text.H6M>
<Text.H6M color='foregroundMuted'>Name</Text.H6M>
<Text.H5M>
<TypewriterText
text={generatedSuggestion.eval_name}
Expand All @@ -199,9 +259,7 @@ export default function GenerateEvaluationPage() {
</Text.H5M>
</div>
<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>
Evaluation Description
</Text.H6M>
<Text.H6M color='foregroundMuted'>Description</Text.H6M>
<Text.H5M>
<TypewriterText
text={generatedSuggestion.eval_description}
Expand All @@ -210,12 +268,45 @@ export default function GenerateEvaluationPage() {
</Text.H5M>
</div>
<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>Evaluation Prompt</Text.H6M>
<div className='w-full h-80 p-2 bg-background text-foreground border border-border rounded-md resize-none focus:outline-none focus:ring-2 focus:ring-primary text-sm bg-secondary overflow-auto'>
<Text.H6M color='foregroundMuted'>Objective</Text.H6M>
<Text.H5M>
<TypewriterText
text={generatedSuggestion.eval_prompt}
text={generatedSuggestion.eval_objective}
speed={0}
/>
</Text.H5M>
</div>
{generatedSuggestion.eval_additional_instructions && (
<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>
Additional Instructions
</Text.H6M>
<Text.H5M>
<TypewriterText
text={generatedSuggestion.eval_additional_instructions}
speed={0}
/>
</Text.H5M>
</div>
)}
<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>Result Type</Text.H6M>
<Text.H5M>
<TypewriterText
text={
generatedSuggestion.eval_type === 'number'
? 'Numeric Score'
: 'Boolean (Pass/Fail)'
}
speed={0}
/>
</Text.H5M>
</div>

<div className='w-full flex flex-col gap-2'>
<Text.H6M color='foregroundMuted'>Expected Values</Text.H6M>
<div className='rounded-lg border bg-card p-4'>
{renderMetadata(generatedSuggestion)}
</div>
</div>
</div>
Expand Down
Loading

0 comments on commit 14bbf59

Please sign in to comment.