Skip to content

Commit

Permalink
Simpler evaluations — part 2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
csansoon committed Nov 4, 2024
1 parent 203455e commit 6c67335
Show file tree
Hide file tree
Showing 29 changed files with 403 additions and 224 deletions.
8 changes: 7 additions & 1 deletion apps/web/src/actions/copilot/refinePrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
EvaluationsRepository,
} from '@latitude-data/core/repositories'
import { serialize as serializeEvaluationResult } from '@latitude-data/core/services/evaluationResults/serialize'
import { getEvaluationPrompt } from '@latitude-data/core/services/evaluations/index'
import { env } from '@latitude-data/env'
import { createSdk } from '$/app/(private)/_lib/createSdk'
import { z } from 'zod'
Expand Down Expand Up @@ -76,11 +77,16 @@ export const refinePromptAction = authProcedure
projectId: env.COPILOT_PROJECT_ID,
}).then((r) => r.unwrap())

const evaluationPrompt = await getEvaluationPrompt({
workspace: ctx.workspace,
evaluation,
}).then((r) => r.unwrap())

const result = await sdk.run(env.COPILOT_REFINE_PROMPT_PATH, {
stream: false,
parameters: {
prompt: document.content,
evaluation: evaluation.metadata.prompt,
evaluation: evaluationPrompt,
results: serializedEvaluationResults,
},
})
Expand Down
9 changes: 8 additions & 1 deletion apps/web/src/app/(private)/_data-access/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,20 @@ export const getApiKeysCached = cache(async () => {
return result.unwrap()
})

export const getProviderApiKeyCached = cache(async (name: string) => {
export const getProviderApiKeyByNameCached = cache(async (name: string) => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
const result = await scope.findByName(name)
return result.unwrap()
})

export const getProviderApiKeyByIdCached = cache(async (id: number) => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
const result = await scope.find(id)
return result.unwrap()
})

export const getProviderApiKeysCached = cache(async () => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import { useEffect, useState } from 'react'

import { readMetadata } from '@latitude-data/compiler'
import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationDto,
EvaluationMetadataType,
} from '@latitude-data/core/browser'
import { ConnectedDocumentWithMetadata } from '@latitude-data/core/repositories'
import { Skeleton, Text } from '@latitude-data/web-ui'
import { formatCostInMillicents } from '$/app/_lib/formatUtils'
Expand Down Expand Up @@ -34,10 +37,14 @@ export default function EvaluationStats({
useConnectedDocuments({ evaluation })

useEffect(() => {
readMetadata({ prompt: evaluation.metadata.prompt }).then((metadata) => {
const metadataModel = (metadata.config['model'] as string) ?? 'Unknown'
setModel(metadataModel)
})
if (evaluation.metadataType === EvaluationMetadataType.LlmAsJudgeAdvanced) {
readMetadata({ prompt: evaluation.metadata.prompt }).then((metadata) => {
const metadataModel = (metadata.config['model'] as string) ?? 'Unknown'
setModel(metadataModel)
})
} else {
setModel(evaluation.metadata.model)
}
}, [evaluation.metadata])

return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
import {
ChainEventTypes,
EvaluationDto,
EvaluationMetadataLlmAsJudgeAdvanced,
StreamEventTypes,
} from '@latitude-data/core/browser'
import {
Expand Down Expand Up @@ -52,6 +53,10 @@ export default function Chat({
const [responseStream, setResponseStream] = useState<string | undefined>()
const [isStreaming, setIsStreaming] = useState(false)

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

const addMessageToConversation = useCallback(
(message: ConversationMessage) => {
let newConversation: Conversation
Expand All @@ -75,7 +80,7 @@ export default function Chat({
let messagesCount = 0

const [data, error] = await runPromptAction({
prompt: evaluation.metadata.prompt,
prompt,
parameters,
})
if (error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { Suspense, useCallback, useEffect, useMemo, useState } from 'react'

import {
EvaluationMetadataLlmAsJudgeAdvanced,
ProviderApiKey,
SERIALIZED_DOCUMENT_LOG_FIELDS,
} from '@latitude-data/core/browser'
Expand Down Expand Up @@ -70,6 +71,10 @@ export default function EvaluationEditor({

if (!evaluation) return null

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

return (
<div className='flex flex-row w-full h-full gap-8'>
<div className='flex flex-col flex-1 flex-grow flex-shrink gap-2 min-w-0'>
Expand All @@ -82,7 +87,7 @@ export default function EvaluationEditor({
onChangePrompt={onChange}
rightActions={
<>
{value !== evaluation.metadata.prompt && (
{value !== prompt && (
<Button
fancy
disabled={isUpdating || isLoading}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

import { ReactNode } from 'react'

import { EvaluationDto, ProviderApiKey } from '@latitude-data/core/browser'
import {
EvaluationDto,
EvaluationMetadataLlmAsJudgeAdvanced,
ProviderApiKey,
} from '@latitude-data/core/browser'
import { Button, Icon } from '@latitude-data/web-ui'
import Link from 'next/link'
import { useSearchParams } from 'next/navigation'
Expand All @@ -28,6 +32,10 @@ export default function EvaluationEditorLayout({
const searchParams = useSearchParams()
const backUrl = searchParams.get('back')

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

return (
<div className='h-full flex flex-col gap-y-4 p-6'>
{backUrl && (
Expand All @@ -42,7 +50,7 @@ export default function EvaluationEditorLayout({
<EvaluationEditor
providerApiKeys={providerApiKeys}
evaluationUuid={evaluationUuid}
defaultPrompt={evaluation.metadata.prompt}
defaultPrompt={prompt}
freeRunsCount={freeRunsCount ? Number(freeRunsCount) : undefined}
/>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,17 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (
evaluation.metadata.configuration.type === EvaluationResultableType.Boolean
) {
if (evaluation.resultType === EvaluationResultableType.Boolean) {
return (
<Text.H4 color={(value as boolean) ? 'success' : 'destructive'}>
{String(value)}
</Text.H4>
)
}

if (
evaluation.metadata.configuration.type === EvaluationResultableType.Number
) {
const minValue = evaluation.metadata.configuration.detail?.range.from ?? 0
const maxValue = evaluation.metadata.configuration.detail?.range.to ?? 10
if (evaluation.resultType === EvaluationResultableType.Number) {
const minValue = evaluation.resultConfiguration.minValue
const maxValue = evaluation.resultConfiguration.maxValue

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,17 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (
evaluation.metadata.configuration.type === EvaluationResultableType.Boolean
) {
if (evaluation.resultType === EvaluationResultableType.Boolean) {
return (
<Badge variant={value === 'true' ? 'success' : 'destructive'}>
{String(value)}
</Badge>
)
}

if (
evaluation.metadata.configuration.type === EvaluationResultableType.Number
) {
const minValue = evaluation.metadata.configuration.detail?.range.from ?? 0
const maxValue = evaluation.metadata.configuration.detail?.range.to ?? 10
if (evaluation.resultType === EvaluationResultableType.Number) {
const minValue = evaluation.resultConfiguration.minValue
const maxValue = evaluation.resultConfiguration.maxValue

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import { RangeBadge } from '@latitude-data/web-ui'
import useEvaluationResultsMeanValue from '$/stores/evaluationResultCharts/evaluationResultsMeanValue'
import { useDebouncedCallback } from 'use-debounce'
Expand Down Expand Up @@ -40,9 +43,10 @@ export default function MeanValuePanel({
documentUuid,
onStatusChange,
})
const config = evaluation.metadata.configuration.detail!
const defaultMinValue = config.range.from
const defaultMaxValue = config.range.to
const config =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const defaultMinValue = config.minValue
const defaultMaxValue = config.maxValue
return (
<Panel
label='Current average'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import { Commit, EvaluationDto } from '@latitude-data/core/browser'
import {
Commit,
EvaluationDto,
EvaluationResultableType,
} from '@latitude-data/core/browser'

import MeanValuePanel from './MeanValuePanel'
import ModalValuePanel from './ModalValuePanel'
import TotalsPanels from './TotalsPanels'

export function BigNumberPanels<T extends boolean>({
export function BigNumberPanels({
commit,
evaluation,
documentUuid,
isNumeric,
}: {
isNumeric: T
commit: Commit
evaluation: EvaluationDto
documentUuid: string
Expand All @@ -23,15 +25,13 @@ export function BigNumberPanels<T extends boolean>({
evaluationId={evaluation.id}
/>

{isNumeric && (
{evaluation.resultType == EvaluationResultableType.Number ? (
<MeanValuePanel
evaluation={evaluation}
commitUuid={commit.uuid}
documentUuid={documentUuid}
/>
)}

{!isNumeric && (
) : (
<ModalValuePanel
evaluationId={evaluation.id}
commitUuid={commit.uuid}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback, useMemo } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import {
Badge,
ScatterChart,
Expand All @@ -23,6 +26,8 @@ export function CostOverResultsChart({
evaluation: EvaluationDto
documentUuid: string
}) {
const evaluationConfiguration =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const { project } = useCurrentProject()
const { commit } = useCurrentCommit()
const { isLoading, error, data, refetch } =
Expand Down Expand Up @@ -75,8 +80,8 @@ export function CostOverResultsChart({
xAxis: {
label: 'Average result',
type: 'number',
min: evaluation.metadata.configuration.detail!.range.from,
max: evaluation.metadata.configuration.detail!.range.to,
min: evaluationConfiguration.minValue,
max: evaluationConfiguration.maxValue,
},
yAxis: {
label: 'Average cost',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback, useMemo } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import {
AreaChart,
Text,
Expand All @@ -29,6 +32,8 @@ export function ResultOverTimeChart({
evaluation: EvaluationDto
documentUuid: string
}) {
const evaluationConfiguration =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const { project } = useCurrentProject()
const { commit } = useCurrentCommit()
const { isLoading, error, data, refetch } = useAverageResultOverTime(
Expand Down Expand Up @@ -97,8 +102,8 @@ export function ResultOverTimeChart({
yAxis: {
label: 'Average result',
type: 'number',
min: evaluation.metadata.configuration.detail!.range.from,
max: evaluation.metadata.configuration.detail!.range.to,
min: evaluationConfiguration.minValue,
max: evaluationConfiguration.maxValue,
},
data: parsedData,
tooltipLabel: (item) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ export function EvaluationResultsCharts({
evaluation: EvaluationDto
documentUuid: string
}) {
const isNumerical =
evaluation.metadata.configuration.type === EvaluationResultableType.Number

if (!isNumerical) return null

if (evaluation.resultType != EvaluationResultableType.Number) return null
return <NumericalCharts evaluation={evaluation} documentUuid={documentUuid} />
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ import { Commit, EvaluationDto } from '@latitude-data/core/browser'
import { BigNumberPanels } from './BigNumberPanels'
import { EvaluationResultsCharts } from './Charts'

export function MetricsSummary<T extends boolean>({
export function MetricsSummary({
commit,
evaluation,
documentUuid,
isNumeric,
}: {
commit: Commit
evaluation: EvaluationDto
documentUuid: string
isNumeric: T
}) {
return (
<div className='flex gap-6 flex-wrap'>
Expand All @@ -27,7 +25,6 @@ export function MetricsSummary<T extends boolean>({
commit={commit}
evaluation={evaluation}
documentUuid={documentUuid}
isNumeric={isNumeric}
/>
</div>
</div>
Expand Down
Loading

0 comments on commit 6c67335

Please sign in to comment.