Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simpler evaluations — part 2.4 #533

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion apps/web/src/actions/copilot/refinePrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
EvaluationsRepository,
} from '@latitude-data/core/repositories'
import { serialize as serializeEvaluationResult } from '@latitude-data/core/services/evaluationResults/serialize'
import { getEvaluationPrompt } from '@latitude-data/core/services/evaluations/index'
import { env } from '@latitude-data/env'
import { createSdk } from '$/app/(private)/_lib/createSdk'
import { z } from 'zod'
Expand Down Expand Up @@ -76,11 +77,16 @@ export const refinePromptAction = authProcedure
projectId: env.COPILOT_PROJECT_ID,
}).then((r) => r.unwrap())

const evaluationPrompt = await getEvaluationPrompt({
workspace: ctx.workspace,
evaluation,
}).then((r) => r.unwrap())

const result = await sdk.run(env.COPILOT_REFINE_PROMPT_PATH, {
stream: false,
parameters: {
prompt: document.content,
evaluation: evaluation.metadata.prompt,
evaluation: evaluationPrompt,
results: serializedEvaluationResults,
},
})
Expand Down
9 changes: 8 additions & 1 deletion apps/web/src/app/(private)/_data-access/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,20 @@ export const getApiKeysCached = cache(async () => {
return result.unwrap()
})

export const getProviderApiKeyCached = cache(async (name: string) => {
export const getProviderApiKeyByNameCached = cache(async (name: string) => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
const result = await scope.findByName(name)
return result.unwrap()
})

export const getProviderApiKeyByIdCached = cache(async (id: number) => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
const result = await scope.find(id)
return result.unwrap()
})

export const getProviderApiKeysCached = cache(async () => {
const { workspace } = await getCurrentUser()
const scope = new ProviderApiKeysRepository(workspace.id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import { useEffect, useState } from 'react'

import { readMetadata } from '@latitude-data/compiler'
import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationDto,
EvaluationMetadataType,
} from '@latitude-data/core/browser'
import { ConnectedDocumentWithMetadata } from '@latitude-data/core/repositories'
import { Skeleton, Text } from '@latitude-data/web-ui'
import { formatCostInMillicents } from '$/app/_lib/formatUtils'
Expand Down Expand Up @@ -34,10 +37,14 @@ export default function EvaluationStats({
useConnectedDocuments({ evaluation })

useEffect(() => {
readMetadata({ prompt: evaluation.metadata.prompt }).then((metadata) => {
const metadataModel = (metadata.config['model'] as string) ?? 'Unknown'
setModel(metadataModel)
})
if (evaluation.metadataType === EvaluationMetadataType.LlmAsJudgeAdvanced) {
readMetadata({ prompt: evaluation.metadata.prompt }).then((metadata) => {
csansoon marked this conversation as resolved.
Show resolved Hide resolved
const metadataModel = (metadata.config['model'] as string) ?? 'Unknown'
setModel(metadataModel)
})
} else {
setModel(evaluation.metadata.model)
}
}, [evaluation.metadata])

return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
import {
ChainEventTypes,
EvaluationDto,
EvaluationMetadataLlmAsJudgeAdvanced,
StreamEventTypes,
} from '@latitude-data/core/browser'
import {
Expand Down Expand Up @@ -52,6 +53,10 @@ export default function Chat({
const [responseStream, setResponseStream] = useState<string | undefined>()
const [isStreaming, setIsStreaming] = useState(false)

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

const addMessageToConversation = useCallback(
(message: ConversationMessage) => {
let newConversation: Conversation
Expand All @@ -75,7 +80,7 @@ export default function Chat({
let messagesCount = 0

const [data, error] = await runPromptAction({
prompt: evaluation.metadata.prompt,
prompt,
parameters,
})
if (error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { Suspense, useCallback, useEffect, useMemo, useState } from 'react'

import {
EvaluationMetadataLlmAsJudgeAdvanced,
ProviderApiKey,
SERIALIZED_DOCUMENT_LOG_FIELDS,
} from '@latitude-data/core/browser'
Expand Down Expand Up @@ -70,6 +71,10 @@ export default function EvaluationEditor({

if (!evaluation) return null

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

return (
<div className='flex flex-row w-full h-full gap-8'>
<div className='flex flex-col flex-1 flex-grow flex-shrink gap-2 min-w-0'>
Expand All @@ -82,7 +87,7 @@ export default function EvaluationEditor({
onChangePrompt={onChange}
rightActions={
<>
{value !== evaluation.metadata.prompt && (
{value !== prompt && (
<Button
fancy
disabled={isUpdating || isLoading}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

import { ReactNode } from 'react'

import { EvaluationDto, ProviderApiKey } from '@latitude-data/core/browser'
import {
EvaluationDto,
EvaluationMetadataLlmAsJudgeAdvanced,
ProviderApiKey,
} from '@latitude-data/core/browser'
import { Button, Icon } from '@latitude-data/web-ui'
import Link from 'next/link'
import { useSearchParams } from 'next/navigation'
Expand All @@ -28,6 +32,10 @@ export default function EvaluationEditorLayout({
const searchParams = useSearchParams()
const backUrl = searchParams.get('back')

// TODO: Only advanced evaluations are available right now. Next PR will add saparate components for each evaluation type
const prompt = (evaluation.metadata as EvaluationMetadataLlmAsJudgeAdvanced)
.prompt

return (
<div className='h-full flex flex-col gap-y-4 p-6'>
{backUrl && (
Expand All @@ -42,7 +50,7 @@ export default function EvaluationEditorLayout({
<EvaluationEditor
providerApiKeys={providerApiKeys}
evaluationUuid={evaluationUuid}
defaultPrompt={evaluation.metadata.prompt}
defaultPrompt={prompt}
freeRunsCount={freeRunsCount ? Number(freeRunsCount) : undefined}
/>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,17 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (
evaluation.metadata.configuration.type === EvaluationResultableType.Boolean
) {
if (evaluation.resultType === EvaluationResultableType.Boolean) {
return (
<Text.H4 color={(value as boolean) ? 'success' : 'destructive'}>
{String(value)}
</Text.H4>
)
}

if (
evaluation.metadata.configuration.type === EvaluationResultableType.Number
) {
const minValue = evaluation.metadata.configuration.detail?.range.from ?? 0
const maxValue = evaluation.metadata.configuration.detail?.range.to ?? 10
if (evaluation.resultType === EvaluationResultableType.Number) {
const minValue = evaluation.resultConfiguration.minValue
const maxValue = evaluation.resultConfiguration.maxValue
csansoon marked this conversation as resolved.
Show resolved Hide resolved

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,17 @@ export const ResultCellContent = ({
evaluation: EvaluationDto
value: unknown
}) => {
if (
evaluation.metadata.configuration.type === EvaluationResultableType.Boolean
) {
if (evaluation.resultType === EvaluationResultableType.Boolean) {
return (
<Badge variant={value === 'true' ? 'success' : 'destructive'}>
{String(value)}
</Badge>
)
}

if (
evaluation.metadata.configuration.type === EvaluationResultableType.Number
) {
const minValue = evaluation.metadata.configuration.detail?.range.from ?? 0
const maxValue = evaluation.metadata.configuration.detail?.range.to ?? 10
if (evaluation.resultType === EvaluationResultableType.Number) {
const minValue = evaluation.resultConfiguration.minValue
const maxValue = evaluation.resultConfiguration.maxValue

return (
<RangeBadge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import { RangeBadge } from '@latitude-data/web-ui'
import useEvaluationResultsMeanValue from '$/stores/evaluationResultCharts/evaluationResultsMeanValue'
import { useDebouncedCallback } from 'use-debounce'
Expand Down Expand Up @@ -40,9 +43,10 @@ export default function MeanValuePanel({
documentUuid,
onStatusChange,
})
const config = evaluation.metadata.configuration.detail!
const defaultMinValue = config.range.from
const defaultMaxValue = config.range.to
const config =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const defaultMinValue = config.minValue
const defaultMaxValue = config.maxValue
return (
<Panel
label='Current average'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import { Commit, EvaluationDto } from '@latitude-data/core/browser'
import {
Commit,
EvaluationDto,
EvaluationResultableType,
} from '@latitude-data/core/browser'

import MeanValuePanel from './MeanValuePanel'
import ModalValuePanel from './ModalValuePanel'
import TotalsPanels from './TotalsPanels'

export function BigNumberPanels<T extends boolean>({
export function BigNumberPanels({
commit,
evaluation,
documentUuid,
isNumeric,
}: {
isNumeric: T
commit: Commit
evaluation: EvaluationDto
documentUuid: string
Expand All @@ -23,15 +25,13 @@ export function BigNumberPanels<T extends boolean>({
evaluationId={evaluation.id}
/>

{isNumeric && (
{evaluation.resultType == EvaluationResultableType.Number ? (
<MeanValuePanel
evaluation={evaluation}
commitUuid={commit.uuid}
documentUuid={documentUuid}
/>
)}

{!isNumeric && (
) : (
<ModalValuePanel
evaluationId={evaluation.id}
commitUuid={commit.uuid}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback, useMemo } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import {
Badge,
ScatterChart,
Expand All @@ -23,6 +26,8 @@ export function CostOverResultsChart({
evaluation: EvaluationDto
documentUuid: string
}) {
const evaluationConfiguration =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const { project } = useCurrentProject()
const { commit } = useCurrentCommit()
const { isLoading, error, data, refetch } =
Expand Down Expand Up @@ -75,8 +80,8 @@ export function CostOverResultsChart({
xAxis: {
label: 'Average result',
type: 'number',
min: evaluation.metadata.configuration.detail!.range.from,
max: evaluation.metadata.configuration.detail!.range.to,
min: evaluationConfiguration.minValue,
max: evaluationConfiguration.maxValue,
},
yAxis: {
label: 'Average cost',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import { useCallback, useMemo } from 'react'

import { EvaluationDto } from '@latitude-data/core/browser'
import {
EvaluationConfigurationNumerical,
EvaluationDto,
} from '@latitude-data/core/browser'
import {
AreaChart,
Text,
Expand All @@ -29,6 +32,8 @@ export function ResultOverTimeChart({
evaluation: EvaluationDto
documentUuid: string
}) {
const evaluationConfiguration =
evaluation.resultConfiguration as EvaluationConfigurationNumerical
const { project } = useCurrentProject()
const { commit } = useCurrentCommit()
const { isLoading, error, data, refetch } = useAverageResultOverTime(
Expand Down Expand Up @@ -97,8 +102,8 @@ export function ResultOverTimeChart({
yAxis: {
label: 'Average result',
type: 'number',
min: evaluation.metadata.configuration.detail!.range.from,
max: evaluation.metadata.configuration.detail!.range.to,
min: evaluationConfiguration.minValue,
max: evaluationConfiguration.maxValue,
},
data: parsedData,
tooltipLabel: (item) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ export function EvaluationResultsCharts({
evaluation: EvaluationDto
documentUuid: string
}) {
const isNumerical =
evaluation.metadata.configuration.type === EvaluationResultableType.Number

if (!isNumerical) return null

if (evaluation.resultType != EvaluationResultableType.Number) return null
return <NumericalCharts evaluation={evaluation} documentUuid={documentUuid} />
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ import { Commit, EvaluationDto } from '@latitude-data/core/browser'
import { BigNumberPanels } from './BigNumberPanels'
import { EvaluationResultsCharts } from './Charts'

export function MetricsSummary<T extends boolean>({
export function MetricsSummary({
commit,
evaluation,
documentUuid,
isNumeric,
}: {
commit: Commit
evaluation: EvaluationDto
documentUuid: string
isNumeric: T
}) {
return (
<div className='flex gap-6 flex-wrap'>
Expand All @@ -27,7 +25,6 @@ export function MetricsSummary<T extends boolean>({
commit={commit}
evaluation={evaluation}
documentUuid={documentUuid}
isNumeric={isNumeric}
csansoon marked this conversation as resolved.
Show resolved Hide resolved
/>
</div>
</div>
Expand Down
Loading
Loading