Skip to content

Commit

Permalink
Simpler evaluations - part 1 (#489)
Browse files Browse the repository at this point in the history
  • Loading branch information
csansoon authored Oct 28, 2024
1 parent 3bf4389 commit 2e29dd7
Show file tree
Hide file tree
Showing 44 changed files with 3,430 additions and 824 deletions.
2 changes: 1 addition & 1 deletion apps/web/src/actions/evaluationTemplates/create.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import { EvaluationResultableType } from '@latitude-data/core/browser'
import { UnauthorizedError } from '@latitude-data/core/lib/errors'
import { createEvaluationTemplate } from '@latitude-data/core/services/evaluationTemplates/create'
import { createEvaluationTemplate } from '@latitude-data/core/services/evaluationLegacyTemplates/create'
import { z } from 'zod'

import { authProcedure } from '../procedures'
Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/actions/evaluationTemplates/destroy.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use server'

import { UnauthorizedError } from '@latitude-data/core/lib/errors'
import { destroyEvaluationTemplate } from '@latitude-data/core/services/evaluationTemplates/destroy'
import { destroyEvaluationTemplate } from '@latitude-data/core/services/evaluationLegacyTemplates/destroy'
import { z } from 'zod'

import { authProcedure } from '../procedures'
Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/actions/evaluations/create.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export const createEvaluationAction = authProcedure
type: z
.nativeEnum(EvaluationMetadataType)
.optional()
.default(EvaluationMetadataType.LlmAsJudge),
.default(EvaluationMetadataType.LlmAsJudgeLegacy),
configuration: z.object({
type: z.nativeEnum(EvaluationResultableType),
detail: z
Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/actions/evaluations/createFromPrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export const createEvaluationFromPromptAction = withDocument
workspace: ctx.workspace,
name: input.name,
description: 'AI-generated evaluation',
type: EvaluationMetadataType.LlmAsJudge,
type: EvaluationMetadataType.LlmAsJudgeLegacy,
configuration: {
type:
input.type === 'number'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,27 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (evaluation.configuration.type === EvaluationResultableType.Boolean) {
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Boolean
) {
return (
<Text.H4 color={(value as boolean) ? 'success' : 'destructive'}>
{String(value)}
</Text.H4>
)
}

if (evaluation.configuration.type === EvaluationResultableType.Number) {
const minValue = evaluation.configuration.detail?.range.from ?? 0
const maxValue = evaluation.configuration.detail?.range.to ?? 10
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Number
) {
const minValue =
(evaluation.configuration ?? evaluation.metadata.configuration)!.detail
?.range.from ?? 0
const maxValue =
(evaluation.configuration ?? evaluation.metadata.configuration)!.detail
?.range.to ?? 10

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,27 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (evaluation.configuration.type === EvaluationResultableType.Boolean) {
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Boolean
) {
return (
<Badge variant={value === 'true' ? 'success' : 'destructive'}>
{String(value)}
</Badge>
)
}

if (evaluation.configuration.type === EvaluationResultableType.Number) {
const minValue = evaluation.configuration.detail?.range.from ?? 0
const maxValue = evaluation.configuration.detail?.range.to ?? 10
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Number
) {
const minValue =
(evaluation.configuration ?? evaluation.metadata.configuration)!.detail
?.range.from ?? 0
const maxValue =
(evaluation.configuration ?? evaluation.metadata.configuration)!.detail
?.range.to ?? 10

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ export default function MeanValuePanel({
documentUuid,
onStatusChange,
})
const config = evaluation.configuration.detail!
const config = (evaluation.configuration ??
evaluation.metadata.configuration)!.detail!
const defaultMinValue = config.range.from
const defaultMaxValue = config.range.to
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import { useCallback, useMemo } from 'react'

import { Evaluation } from '@latitude-data/core/browser'
import { EvaluationDto } from '@latitude-data/core/browser'
import {
Badge,
ScatterChart,
Expand All @@ -20,7 +20,7 @@ export function CostOverResultsChart({
evaluation,
documentUuid,
}: {
evaluation: Evaluation
evaluation: EvaluationDto
documentUuid: string
}) {
const { project } = useCurrentProject()
Expand Down Expand Up @@ -75,8 +75,10 @@ export function CostOverResultsChart({
xAxis: {
label: 'Average result',
type: 'number',
min: evaluation.configuration.detail!.range.from,
max: evaluation.configuration.detail!.range.to,
min: (evaluation.configuration ??
evaluation.metadata.configuration)!.detail!.range.from,
max: (evaluation.configuration ??
evaluation.metadata.configuration)!.detail!.range.to,
},
yAxis: {
label: 'Average cost',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import { useCallback, useMemo } from 'react'

import { Evaluation } from '@latitude-data/core/browser'
import { EvaluationDto } from '@latitude-data/core/browser'
import {
AreaChart,
Text,
Expand All @@ -26,7 +26,7 @@ export function ResultOverTimeChart({
evaluation,
documentUuid,
}: {
evaluation: Evaluation
evaluation: EvaluationDto
documentUuid: string
}) {
const { project } = useCurrentProject()
Expand Down Expand Up @@ -97,8 +97,10 @@ export function ResultOverTimeChart({
yAxis: {
label: 'Average result',
type: 'number',
min: evaluation.configuration.detail!.range.from,
max: evaluation.configuration.detail!.range.to,
min: (evaluation.configuration ??
evaluation.metadata.configuration)!.detail!.range.from,
max: (evaluation.configuration ??
evaluation.metadata.configuration)!.detail!.range.to,
},
data: parsedData,
tooltipLabel: (item) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Evaluation } from '@latitude-data/core/browser'
import { EvaluationDto } from '@latitude-data/core/browser'

import { CostOverResultsChart } from './CostOverResults'
import { ResultOverTimeChart } from './ResultsOverTime'
Expand All @@ -7,7 +7,7 @@ export function NumericalCharts({
evaluation,
documentUuid,
}: {
evaluation: Evaluation
evaluation: EvaluationDto
documentUuid: string
}) {
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ export function EvaluationResultsCharts({
documentUuid: string
}) {
const isNumerical =
evaluation.configuration.type === EvaluationResultableType.Number
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Number

if (!isNumerical) return null

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ export default async function ConnectedEvaluationLayout({
uuid: params.commitUuid,
})
const isNumeric =
evaluation.configuration.type == EvaluationResultableType.Number
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ==
EvaluationResultableType.Number

let provider
if (evaluation.metadata.prompt) {
Expand Down Expand Up @@ -92,7 +93,12 @@ export default async function ConnectedEvaluationLayout({
{evaluation.name}
</Text.H4M>
<Text.H4M color='foregroundMuted'>
{TYPE_TEXT[evaluation.configuration.type]}
{
TYPE_TEXT[
(evaluation.configuration ??
evaluation.metadata.configuration)!.type
]
}
</Text.H4M>
<Tooltip
asChild
Expand Down
10 changes: 8 additions & 2 deletions apps/web/src/components/EvaluationAggregatedResult/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ export default function EvaluationAggregatedResult({
documentUuid: string
commitUuid: string
}) {
if (evaluation.configuration.type === EvaluationResultableType.Number) {
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Number
) {
return (
<EvaluationMeanValue
evaluation={evaluation}
Expand All @@ -146,7 +149,10 @@ export default function EvaluationAggregatedResult({
)
}

if (evaluation.configuration.type === EvaluationResultableType.Boolean) {
if (
(evaluation.configuration ?? evaluation.metadata.configuration)!.type ===
EvaluationResultableType.Boolean
) {
return (
<EvaluationBooleanValue
evaluation={evaluation}
Expand Down
3 changes: 3 additions & 0 deletions packages/core/drizzle/0086_legacy_evaluations_part_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE "latitude"."evaluations" ALTER COLUMN "configuration" DROP NOT NULL;--> statement-breakpoint
ALTER TABLE "latitude"."llm_as_judge_evaluation_metadatas" ADD COLUMN "configuration" jsonb;--> statement-breakpoint
ALTER TABLE "latitude"."llm_as_judge_evaluation_metadatas" DROP COLUMN IF EXISTS "metadata_type";
Loading

0 comments on commit 2e29dd7

Please sign in to comment.