diff --git a/apps/web/src/actions/connectedEvaluations/fetchConnectedDocuments.ts b/apps/web/src/actions/connectedEvaluations/fetchConnectedDocuments.ts new file mode 100644 index 000000000..7da58f5d1 --- /dev/null +++ b/apps/web/src/actions/connectedEvaluations/fetchConnectedDocuments.ts @@ -0,0 +1,25 @@ +'use server' + +import { ConnectedEvaluationsRepository } from '@latitude-data/core/repositories' +import { z } from 'zod' + +import { authProcedure } from '../procedures' + +export const fetchConnectedDocumentsAction = authProcedure + .createServerAction() + .input( + z.object({ + evaluationId: z.number(), + }), + ) + .handler(async ({ input, ctx }) => { + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + ctx.workspace.id, + ) + const connectedDocuments = + await connectedEvaluationsScope.getConnectedDocumentsWithMetadata( + input.evaluationId, + ) + + return connectedDocuments.unwrap() + }) diff --git a/apps/web/src/actions/evaluations/connect.test.ts b/apps/web/src/actions/evaluations/connect.test.ts index ace2509c5..e7974eb4a 100644 --- a/apps/web/src/actions/evaluations/connect.test.ts +++ b/apps/web/src/actions/evaluations/connect.test.ts @@ -1,7 +1,4 @@ -import { randomUUID } from 'crypto' - import { - Commit, DocumentVersion, Project, ProviderApiKey, @@ -30,7 +27,6 @@ describe('connectEvaluationsAction', () => { const [_, error] = await connectEvaluationsAction({ projectId: 1, documentUuid: 'fake-document-uuid', - commitUuid: 'fake-commit-uuid', templateIds: [1], evaluationUuids: ['fake-evaluation-uuid'], }) @@ -43,7 +39,6 @@ describe('connectEvaluationsAction', () => { let workspace: Workspace, user: User, document: DocumentVersion, - commit: Commit, provider: ProviderApiKey, project: Project @@ -54,7 +49,6 @@ describe('connectEvaluationsAction', () => { workspace = setup.workspace user = setup.user document = setup.documents[0]! - commit = setup.commit project = setup.project provider = await factories.createProviderApiKey({ @@ -71,9 +65,10 @@ describe('connectEvaluationsAction', () => { }) it('connects evaluations and templates to a document', async () => { - const evaluation = await factories.createEvaluation({ - provider, + const evaluation = await factories.createLlmAsJudgeEvaluation({ + workspace, name: 'Test Evaluation', + prompt: factories.helpers.createPrompt({ provider }), }) const template = await factories.createEvaluationTemplate({ @@ -85,7 +80,6 @@ describe('connectEvaluationsAction', () => { const [result, error] = await connectEvaluationsAction({ projectId: project.id, documentUuid: document.documentUuid, - commitUuid: commit.uuid, templateIds: [template.id], evaluationUuids: [evaluation.uuid], }) @@ -110,7 +104,6 @@ describe('connectEvaluationsAction', () => { const [result, error] = await connectEvaluationsAction({ projectId: project.id, documentUuid: document.documentUuid, - commitUuid: commit.uuid, templateIds: [], evaluationUuids: [], }) @@ -123,19 +116,6 @@ describe('connectEvaluationsAction', () => { const [_, error] = await connectEvaluationsAction({ projectId: project.id, documentUuid: 'non-existent-uuid', - commitUuid: randomUUID(), - templateIds: [], - evaluationUuids: [], - }) - - expect(error!.name).toEqual('NotFoundError') - }) - - it('fails when the commit does not exist', async () => { - const [_, error] = await connectEvaluationsAction({ - projectId: project.id, - documentUuid: document.documentUuid, - commitUuid: randomUUID(), templateIds: [], evaluationUuids: [], }) diff --git a/apps/web/src/actions/evaluations/connect.ts b/apps/web/src/actions/evaluations/connect.ts index 0af31f552..61da116b0 100644 --- a/apps/web/src/actions/evaluations/connect.ts +++ b/apps/web/src/actions/evaluations/connect.ts @@ -1,10 +1,5 @@ 'use server' -import { filterEvaluationTemplatesById } from '@latitude-data/core/data-access' -import { - DocumentVersionsRepository, - EvaluationsRepository, -} from '@latitude-data/core/repositories' import { connectEvaluations } from '@latitude-data/core/services/evaluations/connect' import { z } from 'zod' @@ -15,33 +10,16 @@ export const connectEvaluationsAction = withProject .input( z.object({ documentUuid: z.string(), - commitUuid: z.string(), templateIds: z.array(z.number()), evaluationUuids: z.array(z.string()), }), ) .handler(async ({ ctx, input }) => { - const selectedTemplates = await filterEvaluationTemplatesById( - input.templateIds, - ).then((r) => r.unwrap()) - const scope = new EvaluationsRepository(ctx.workspace.id) - const selectedEvaluations = await scope - .filterByUuids(input.evaluationUuids) - .then((r) => r.unwrap()) - - const documentsScope = new DocumentVersionsRepository(ctx.workspace.id) - const document = await documentsScope - .getDocumentAtCommit({ - projectId: ctx.project.id, - commitUuid: input.commitUuid, - documentUuid: input.documentUuid, - }) - .then((r) => r.unwrap()) - const connectedEvaluations = await connectEvaluations({ - document, - templates: selectedTemplates, - evaluations: selectedEvaluations, + workspace: ctx.workspace, + documentUuid: input.documentUuid, + evaluationUuids: input.evaluationUuids, + templateIds: input.templateIds, }).then((r) => r.unwrap()) return connectedEvaluations diff --git a/apps/web/src/actions/evaluations/destroy.test.ts b/apps/web/src/actions/evaluations/destroy.test.ts index 1546f23d7..0c6c010ff 100644 --- a/apps/web/src/actions/evaluations/destroy.test.ts +++ b/apps/web/src/actions/evaluations/destroy.test.ts @@ -32,7 +32,10 @@ describe('destroyEvaluationAction', () => { name: 'Test Provider', user: userData, }) - const evaluation = await factories.createEvaluation({ provider }) + const evaluation = await factories.createLlmAsJudgeEvaluation({ + workspace, + prompt: factories.helpers.createPrompt({ provider }), + }) evaluationId = evaluation.id }) @@ -63,7 +66,10 @@ describe('destroyEvaluationAction', () => { name: 'Test Provider', user, }) - evaluation = await factories.createEvaluation({ provider }) + evaluation = await factories.createLlmAsJudgeEvaluation({ + workspace, + prompt: factories.helpers.createPrompt({ provider }), + }) mocks.getSession.mockReturnValue({ user: userData, diff --git a/apps/web/src/app/(private)/_data-access/index.ts b/apps/web/src/app/(private)/_data-access/index.ts index de2863523..e61b17441 100644 --- a/apps/web/src/app/(private)/_data-access/index.ts +++ b/apps/web/src/app/(private)/_data-access/index.ts @@ -5,6 +5,7 @@ import { findAllEvaluationTemplates } from '@latitude-data/core/data-access' import { NotFoundError } from '@latitude-data/core/lib/errors' import { CommitsRepository, + ConnectedEvaluationsRepository, DocumentLogsRepository, DocumentVersionsRepository, EvaluationsRepository, @@ -186,3 +187,17 @@ export const getEvaluationsByDocumentUuidCached = cache( return result.unwrap() }, ) + +export const getConnectedDocumentsWithMetadataCached = cache( + async (evaluationId: number) => { + const { workspace } = await getCurrentUser() + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = + await connectedEvaluationsScope.getConnectedDocumentsWithMetadata( + evaluationId, + ) + return result.unwrap() + }, +) diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/_components/EvaluationTabs/index.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/_components/EvaluationTabs/index.tsx index 473945e0c..a05f99f08 100644 --- a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/_components/EvaluationTabs/index.tsx +++ b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/_components/EvaluationTabs/index.tsx @@ -1,20 +1,23 @@ 'use client' +import { Evaluation } from '@latitude-data/core/browser' import { TabSelector } from '@latitude-data/web-ui' import { useNavigate } from '$/hooks/useNavigate' import { EvaluationRoutes, ROUTES } from '$/services/routes' import { useSelectedLayoutSegment } from 'next/navigation' export function EvaluationTabSelector({ - evaluationUuid, + evaluation, }: { - evaluationUuid: string + evaluation: Evaluation }) { const router = useNavigate() const selectedSegment = useSelectedLayoutSegment() as EvaluationRoutes | null const pathTo = (evaluationRoute: EvaluationRoutes) => { - const evaluationDetail = ROUTES.evaluations.detail({ uuid: evaluationUuid }) + const evaluationDetail = ROUTES.evaluations.detail({ + uuid: evaluation.uuid, + }) const detail = evaluationDetail[evaluationRoute] ?? evaluationDetail return detail.root } @@ -23,7 +26,7 @@ export function EvaluationTabSelector({
{ - return evaluations?.find((evaluation) => evaluation.uuid === evaluationUuid) - }, [evaluations, evaluationUuid]) - - if (!evaluation) return null +export function EvaluationTitle({ evaluation }: { evaluation: Evaluation }) { return (
{evaluation.name} diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/ConnectedDocumentsTable/index.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/ConnectedDocumentsTable/index.tsx new file mode 100644 index 000000000..b542daae5 --- /dev/null +++ b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/ConnectedDocumentsTable/index.tsx @@ -0,0 +1,141 @@ +'use client' + +import { useMemo } from 'react' + +import { HEAD_COMMIT } from '@latitude-data/core/browser' +import type { ConnectedDocumentWithMetadata } from '@latitude-data/core/repositories' +import { + Skeleton, + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, + Text, +} from '@latitude-data/web-ui' +import { formatCostInMillicents } from '$/app/_lib/formatCostInMillicents' +import { useNavigate } from '$/hooks/useNavigate' +import { ROUTES } from '$/services/routes' +import useProjects from '$/stores/projects' + +const ConnectedDocumentTableRow = ({ + document, + onSelect, +}: { + document: ConnectedDocumentWithMetadata + onSelect: () => void +}) => { + const { data: projects, isLoading: isProjectsLoading } = useProjects() + const projectName = useMemo(() => { + if (isProjectsLoading) return null + + return projects?.find((project) => project.id === document.projectId)?.name + }, [document.projectId, isProjectsLoading, projects]) + + const promptPath = useMemo(() => { + return document.path.split('/').slice(0, -1).join('/') + }, [document.path]) + + const promptName = useMemo(() => { + return document.path.split('/').pop() + }, [document.path]) + + const modalValuePercentage = useMemo(() => { + return ((100 * document.modalValueCount) / document.evaluationLogs).toFixed( + 2, + ) + }, [document.modalValueCount, document.evaluationLogs]) + + return ( + + + {promptPath && ( + <> + + {promptPath} + + + {'/'} + + + )} + {promptName} + + + {isProjectsLoading ? ( + + ) : ( + {projectName} + )} + + + {document.modalValue} +
+ + ({modalValuePercentage}%) + +
+ + {document.evaluationLogs} + + + {document.totalTokens} + + + + {formatCostInMillicents(document.costInMillicents ?? 0)} + + +
+ ) +} + +export default function ConnectedDocumentsTable({ + connectedDocumentsWithMetadata, +}: { + connectedDocumentsWithMetadata: ConnectedDocumentWithMetadata[] +}) { + const navigate = useNavigate() + + return ( + + + + Prompt name + Project + Modal value + Logs evaluated + Tokens + Cost + + + + {connectedDocumentsWithMetadata.map((document) => ( + + navigate.push( + ROUTES.projects + .detail({ id: document.projectId }) + .commits.detail({ uuid: HEAD_COMMIT }) + .documents.detail({ uuid: document.documentUuid }) + .evaluations.detail({ uuid: document.evaluationUuid }).root, + ) + } + /> + ))} + +
+ ) +} diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/EvaluationStats.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/EvaluationStats.tsx new file mode 100644 index 000000000..190b2a52b --- /dev/null +++ b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/_components/EvaluationStats.tsx @@ -0,0 +1,71 @@ +'use client' + +import { useEffect, useState } from 'react' + +import { readMetadata } from '@latitude-data/compiler' +import { EvaluationDto } from '@latitude-data/core/browser' +import { ConnectedDocumentWithMetadata } from '@latitude-data/core/repositories' +import { Skeleton, Text } from '@latitude-data/web-ui' +import { formatCostInMillicents } from '$/app/_lib/formatCostInMillicents' +import useConnectedDocuments from '$/stores/connectedEvaluations' + +export function Stat({ label, value }: { label: string; value?: string }) { + return ( +
+ {label} + {value == undefined ? ( + + ) : ( + {value} + )} +
+ ) +} + +export default function EvaluationStats({ + evaluation, + connectedDocumentsWithMetadata, +}: { + evaluation: EvaluationDto + connectedDocumentsWithMetadata: ConnectedDocumentWithMetadata[] +}) { + const [model, setModel] = useState() + const { data: connectedDocuments, isLoading: connectedDocumentsLoading } = + useConnectedDocuments({ evaluation }) + + useEffect(() => { + readMetadata({ prompt: evaluation.metadata.prompt }).then((metadata) => { + const metadataModel = (metadata.config['model'] as string) ?? 'Unknown' + setModel(metadataModel) + }) + }, [evaluation.metadata]) + + return ( +
+ + + acc + doc.evaluationLogs, 0) + .toString()} + /> + acc + doc.costInMillicents, + 0, + ), + )} + /> +
+ ) +} diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/layout.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/layout.tsx deleted file mode 100644 index 11bf527d1..000000000 --- a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/layout.tsx +++ /dev/null @@ -1,14 +0,0 @@ -import { ReactNode } from 'react' - -import { Text } from '@latitude-data/web-ui' - -export default function DashboardLayout({ children }: { children: ReactNode }) { - return ( - <> - {children} -
- (Really cool dashboard) -
- - ) -} diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/page.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/page.tsx index 118f37351..348e77a98 100644 --- a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/page.tsx +++ b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/dashboard/page.tsx @@ -1,3 +1,30 @@ -export default function DashboardPage() { - return null // --> layout.tsx +import { Container } from '@latitude-data/web-ui' +import { + getConnectedDocumentsWithMetadataCached, + getEvaluationByUuidCached, +} from '$/app/(private)/_data-access' + +import ConnectedDocumentsTable from './_components/ConnectedDocumentsTable' +import EvaluationStats from './_components/EvaluationStats' + +export default async function DashboardPage({ + params, +}: { + params: { evaluationUuid: string } +}) { + const evaluation = await getEvaluationByUuidCached(params.evaluationUuid) + const connectedDocumentsWithMetadata = + await getConnectedDocumentsWithMetadataCached(evaluation.id) + + return ( + + + + + ) } diff --git a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/layout.tsx b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/layout.tsx index 5a803ff9d..fc08df6f3 100644 --- a/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/layout.tsx +++ b/apps/web/src/app/(private)/evaluations/(evaluation)/[evaluationUuid]/layout.tsx @@ -1,5 +1,7 @@ import { ReactNode } from 'react' +import { getEvaluationByUuidCached } from '$/app/(private)/_data-access' + import { EvaluationTabSelector } from './_components/EvaluationTabs' import { EvaluationTitle } from './_components/EvaluationTitle' @@ -10,10 +12,12 @@ export default async function EvaluationLayout({ params: { evaluationUuid: string } children: ReactNode }) { + const evaluation = await getEvaluationByUuidCached(params.evaluationUuid) + return (
- - + +
{children}
diff --git a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations/connect/page.tsx b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations/connect/page.tsx index b44af3a40..2d182141b 100644 --- a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations/connect/page.tsx +++ b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations/connect/page.tsx @@ -64,7 +64,6 @@ export default function ConnectionEvaluationModal({ templateIds, evaluationUuids, documentUuid, - commitUuid, }) if (data) { diff --git a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogInfo/Metadata.tsx b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogInfo/Metadata.tsx index 73c499e9f..881e62519 100644 --- a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogInfo/Metadata.tsx +++ b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogInfo/Metadata.tsx @@ -9,10 +9,11 @@ import { Text, Tooltip, } from '@latitude-data/web-ui' +import { formatCostInMillicents } from '$/app/_lib/formatCostInMillicents' import useProviderApiKeys from '$/stores/providerApiKeys' import { format } from 'date-fns' -import { formatCostInMillicents, formatDuration } from '../utils' +import { formatDuration } from '../utils' function MetadataItem({ label, diff --git a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogsTable.tsx b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogsTable.tsx index 6b6a20ce1..ab5567fa5 100644 --- a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogsTable.tsx +++ b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/DocumentLogsTable.tsx @@ -12,8 +12,9 @@ import { TableRow, Text, } from '@latitude-data/web-ui' +import { formatCostInMillicents } from '$/app/_lib/formatCostInMillicents' -import { formatCostInMillicents, formatDuration, relativeTime } from './utils' +import { formatDuration, relativeTime } from './utils' export const DocumentLogsTable = ({ documentLogs, diff --git a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/utils.ts b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/utils.ts index dc78ce964..6675c8147 100644 --- a/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/utils.ts +++ b/apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/logs/_components/DocumentLogs/utils.ts @@ -24,8 +24,4 @@ function formatDuration(duration: number) { return `${hours > 0 ? `${hours}h ` : ''}${minutes > 0 ? `${minutes}m ` : ''}${seconds}s` } -function formatCostInMillicents(cost_in_millicents: number) { - return `$ ${cost_in_millicents / 100_000}` -} - -export { relativeTime, formatDuration, formatCostInMillicents } +export { relativeTime, formatDuration } diff --git a/apps/web/src/app/_lib/formatCostInMillicents.ts b/apps/web/src/app/_lib/formatCostInMillicents.ts new file mode 100644 index 000000000..418212909 --- /dev/null +++ b/apps/web/src/app/_lib/formatCostInMillicents.ts @@ -0,0 +1,3 @@ +export function formatCostInMillicents(cost_in_millicents: number) { + return `$ ${cost_in_millicents / 100_000}` +} diff --git a/apps/web/src/services/routes.ts b/apps/web/src/services/routes.ts index 36906689d..7926fd90f 100644 --- a/apps/web/src/services/routes.ts +++ b/apps/web/src/services/routes.ts @@ -81,13 +81,20 @@ export const ROUTES = { root: rootDocuments, detail: ({ uuid }: { uuid: string }) => { const root = `${rootDocuments}/${uuid}` + const rootEvaluations = `${root}/evaluations` return { root, [DocumentRoutes.editor]: { root }, [DocumentRoutes.evaluations]: { - root: `${root}/evaluations`, + root: rootEvaluations, connect: { - root: `${root}/evaluations/connect`, + root: `${rootEvaluations}/connect`, + }, + detail: ({ uuid }: { uuid: string }) => { + const root = `${rootEvaluations}/${uuid}` + return { + root, + } }, }, [DocumentRoutes.logs]: { diff --git a/apps/web/src/stores/connectedEvaluations.ts b/apps/web/src/stores/connectedEvaluations.ts new file mode 100644 index 000000000..ae704c9fc --- /dev/null +++ b/apps/web/src/stores/connectedEvaluations.ts @@ -0,0 +1,51 @@ +'use client' + +import type { DocumentVersion, Evaluation } from '@latitude-data/core/browser' +import { useSession, useToast } from '@latitude-data/web-ui' +import { fetchConnectedDocumentsAction } from '$/actions/connectedEvaluations/fetchConnectedDocuments' +import useSWR, { SWRConfiguration } from 'swr' + +export default function useConnectedDocuments( + { + evaluation, + }: { + evaluation: Evaluation + }, + opts: SWRConfiguration = {}, +) { + const { workspace } = useSession() + const { toast } = useToast() + + const { + data = [], + isLoading, + error, + } = useSWR( + ['connectedDocuments', workspace.id, evaluation.id], + async () => { + const [data, error] = await fetchConnectedDocumentsAction({ + evaluationId: evaluation.id, + }) + + if (error) { + console.error(error) + + toast({ + title: 'Error fetching evaluation connections', + description: error.formErrors?.[0] || error.message, + variant: 'destructive', + }) + throw error + } + + return data + }, + opts, + ) + + return { + data, + isLoading, + error, + } +} diff --git a/packages/core/src/data-access/evaluationTemplates.ts b/packages/core/src/data-access/evaluationTemplates.ts index f4dc882df..d73bc3954 100644 --- a/packages/core/src/data-access/evaluationTemplates.ts +++ b/packages/core/src/data-access/evaluationTemplates.ts @@ -32,8 +32,9 @@ export async function findAllEvaluationTemplates(): Promise< export async function findEvaluationTemplateById( id: number, + db = database, ): Promise> { - const result = await database.query.evaluationTemplates.findFirst({ + const result = await db.query.evaluationTemplates.findFirst({ where: eq(evaluationTemplates.id, id), }) diff --git a/packages/core/src/repositories/connectedEvaluationsRepository/getConnectedDocumentsWithMetadata.test.ts b/packages/core/src/repositories/connectedEvaluationsRepository/getConnectedDocumentsWithMetadata.test.ts new file mode 100644 index 000000000..699ee1546 --- /dev/null +++ b/packages/core/src/repositories/connectedEvaluationsRepository/getConnectedDocumentsWithMetadata.test.ts @@ -0,0 +1,284 @@ +import { beforeEach, describe, expect, it } from 'vitest' + +import { ConnectedEvaluationsRepository } from '.' +import { + Commit, + DocumentVersion, + EvaluationDto, + Project, + User, + Workspace, +} from '../../browser' +import { Providers } from '../../constants' +import { mergeCommit } from '../../services/commits' +import { createNewDocument, updateDocument } from '../../services/documents' +import { connectEvaluations } from '../../services/evaluations' +import * as factories from '../../tests/factories' + +function documentContent(text: string) { + return ` +--- +provider: openai +model: foo +--- +${text} +` +} + +async function generateDocumentLogs({ + document, + commit, + parameters, + quantity = 1, +}: { + document: DocumentVersion + commit: Commit + parameters?: Record + quantity?: number +}) { + return await Promise.all( + Array.from({ length: quantity }).map(() => { + return factories + .createDocumentLog({ + document, + commit, + parameters, + }) + .then((r) => r.documentLog) + }), + ) +} + +describe('getConnectedDocumentsWithMetadata', () => { + let user: User + let workspace: Workspace + let project: Project + let commit: Commit + let documents: DocumentVersion[] + let evaluation: EvaluationDto + + const connectEvaluationToDocuments = async ({ + documents: documentsArr, + }: { documents?: DocumentVersion[] } = {}) => { + await Promise.all( + (documentsArr ?? documents).map(async (document) => { + await connectEvaluations({ + workspace, + documentUuid: document.documentUuid, + evaluationUuids: [evaluation.uuid], + }) + }), + ) + } + + beforeEach(async () => { + const projectData = await factories.createProject({ + providers: [{ type: Providers.OpenAI, name: 'openai' }], + documents: { + translate: documentContent('Translate the following text: {{text}}'), + summarize: documentContent('Summarize the following text: {{text}}'), + poet: documentContent( + 'Write a poem based in the following text: {{text}}', + ), + }, + evaluations: [ + { + name: 'Instruction', + prompt: documentContent( + 'Did the assistant follow the instructions correctly? ...', + ), + }, + ], + }) + + ;({ user, workspace, project, commit, documents } = projectData) + + evaluation = projectData.evaluations[0]! + }) + + it('returns an empty list when the evaluation is not connected to any document', async () => { + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result).toEqual([]) + }) + + it('returns a list with all of the documents that are connected to the evaluation', async () => { + await connectEvaluationToDocuments() + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(documents.length) + const expectedDocumentUuids = documents.map((d) => d.documentUuid).sort() + const resultDocumentUuids = result.map((r) => r.documentUuid).sort() + + expect(resultDocumentUuids).toEqual(expectedDocumentUuids) + }) + + it('returns only one item per document, independently of the number of versions for each document', async () => { + const { commit: draft1 } = await factories.createDraft({ project, user }) + await updateDocument({ + commit: draft1, + document: documents[0]!, + content: documentContent('Version 2'), + }) + await mergeCommit(draft1) + + const { commit: draft2 } = await factories.createDraft({ project, user }) + await updateDocument({ + commit: draft2, + document: documents[0]!, + content: documentContent('Version 3'), + }) + await mergeCommit(draft2) + await connectEvaluationToDocuments() + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(documents.length) + const expectedDocumentUuids = documents.map((d) => d.documentUuid).sort() + const resultDocumentUuids = result.map((r) => r.documentUuid).sort() + + expect(resultDocumentUuids).toEqual(expectedDocumentUuids) + }) + + it('does not return documents that only exist in a draft, even when its connected to an evaluation', async () => { + const { commit: draft } = await factories.createDraft({ project, user }) + const draftDocument = await createNewDocument({ + commit: draft, + path: 'foo', + content: documentContent('New document'), + }).then((r) => r.unwrap()) + + await connectEvaluationToDocuments({ documents: [draftDocument] }) + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(0) + + await mergeCommit(draft) + + const result2 = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result2.length).toEqual(1) + }) + + it('returns the correct metadata for each connected document', async () => { + await connectEvaluationToDocuments({ documents: [documents[0]!] }) + + const logs = await generateDocumentLogs({ + document: documents[0]!, + commit, + parameters: { text: 'foo' }, + quantity: 5, + }) + const results = await Promise.all( + logs.map((documentLog) => { + return factories.createEvaluationResult({ + evaluation, + documentLog, + }) + }), + ) + + const totalTokens = results.reduce( + (acc, r) => acc + r.providerLogs.reduce((acc2, l) => acc2 + l.tokens, 0), + 0, + ) + const totalCost = results.reduce( + (acc, r) => + acc + + r.providerLogs.reduce((acc2, l) => acc2 + l.cost_in_millicents, 0), + 0, + ) + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(1) + const resultItem = result[0]! + expect(resultItem.documentUuid).toEqual(documents[0]!.documentUuid) + expect(resultItem.evaluationLogs).toEqual(logs.length) + expect(resultItem.totalTokens).toEqual(totalTokens) + expect(resultItem.costInMillicents).toEqual(totalCost) + }) + + it('correctly calculates the modal value', async () => { + await connectEvaluationToDocuments({ documents: [documents[0]!] }) + + const logs = await generateDocumentLogs({ + document: documents[0]!, + commit, + parameters: { text: 'foo' }, + quantity: 10, + }) + await Promise.all( + logs.map((documentLog, index) => { + return factories.createEvaluationResult({ + evaluation, + documentLog, + result: index < 6 ? 'yes' : 'no', // yes should appear 6 times, while no should appear 4 times + }) + }), + ) + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(1) + const resultItem = result[0]! + expect(resultItem.documentUuid).toEqual(documents[0]!.documentUuid) + expect(resultItem.evaluationLogs).toEqual(logs.length) + expect(resultItem.modalValue).toEqual('yes') + expect(resultItem.modalValueCount).toEqual(6) + }) + + it('returns 0 in metadata when there are no evaluation results', async () => { + await connectEvaluationToDocuments({ documents: [documents[0]!] }) + + const connectedEvaluationsScope = new ConnectedEvaluationsRepository( + workspace.id, + ) + const result = await connectedEvaluationsScope + .getConnectedDocumentsWithMetadata(evaluation.id) + .then((r) => r.unwrap()) + + expect(result.length).toEqual(1) + const resultItem = result[0]! + expect(resultItem.documentUuid).toEqual(documents[0]!.documentUuid) + expect(resultItem.evaluationLogs).toEqual(0) + expect(resultItem.totalTokens).toEqual(0) + expect(resultItem.costInMillicents).toEqual(0) + expect(resultItem.modalValue).toEqual(null) + expect(resultItem.modalValueCount).toEqual(0) + }) +}) diff --git a/packages/core/src/repositories/connectedEvaluationsRepository/index.ts b/packages/core/src/repositories/connectedEvaluationsRepository/index.ts new file mode 100644 index 000000000..3ffcdc8aa --- /dev/null +++ b/packages/core/src/repositories/connectedEvaluationsRepository/index.ts @@ -0,0 +1,217 @@ +import { + count, + desc, + eq, + getTableColumns, + isNotNull, + sql, + sum, +} from 'drizzle-orm' + +import { ConnectedEvaluation, DocumentVersion } from '../../browser' +import { LatitudeError, Result, TypedResult } from '../../lib' +import { + connectedEvaluations, + documentLogs, + evaluationResults, + evaluations, + providerLogs, +} from '../../schema' +import { DocumentVersionsRepository } from '../documentVersionsRepository' +import { EvaluationsRepository } from '../evaluationsRepository' +import Repository from '../repository' + +const tt = getTableColumns(connectedEvaluations) + +export type ConnectedDocumentWithMetadata = DocumentVersion & { + projectId: number // This is automatically provided by the DocumentVersionsRepository + evaluationUuid: string + evaluationLogs: number + totalTokens: number + costInMillicents: number + modalValue: string | null + modalValueCount: number +} + +export class ConnectedEvaluationsRepository extends Repository< + typeof tt, + ConnectedEvaluation +> { + get scope() { + return this.db + .select(tt) + .from(connectedEvaluations) + .innerJoin( + evaluations, + eq(connectedEvaluations.evaluationId, evaluations.id), + ) + .where(eq(evaluations.workspaceId, this.workspaceId)) + .as('connectedEvaluationsScope') + } + + async findByEvaluationId(id: number) { + const result = await this.db + .select() + .from(this.scope) + .where(eq(this.scope.evaluationId, id)) + + return Result.ok(result) + } + + async findByDocumentUuid(uuid: string) { + const result = await this.db + .select() + .from(this.scope) + .where(eq(this.scope.documentUuid, uuid)) + + return Result.ok(result[0]!) + } + + async getConnectedDocumentsWithMetadata( + evaluationId: number, + ): Promise> { + const documentVersionsScope = new DocumentVersionsRepository( + this.workspaceId, + this.db, + ) + const evaluationsScope = new EvaluationsRepository( + this.workspaceId, + this.db, + ) + + const documents = this.db // Last version of each (merged) document + .$with('documents') + .as( + this.db + .selectDistinctOn( + [documentVersionsScope.scope.documentUuid], + documentVersionsScope.scope._.selectedFields, + ) + .from(documentVersionsScope.scope) + .where(isNotNull(documentVersionsScope.scope.mergedAt)) + .orderBy( + documentVersionsScope.scope.documentUuid, + desc(documentVersionsScope.scope.mergedAt), + ), + ) + + const selectedDocuments = this.db.$with('selected_documents').as( + this.db + .with(documents) + .select({ + ...documents._.selectedFields, + evaluationUuid: evaluationsScope.scope.uuid, + }) + .from(documents) + .innerJoin( + this.scope, + eq(this.scope.documentUuid, documents.documentUuid), + ) + .innerJoin( + evaluationsScope.scope, + eq(evaluationsScope.scope.id, this.scope.evaluationId), + ) + .where(eq(this.scope.evaluationId, evaluationId)), + ) + + const selectedEvaluationResults = this.db + .$with('selected_evaluation_results') + .as( + this.db + .select({ + ...getTableColumns(evaluationResults), + ...getTableColumns(documentLogs), + ...getTableColumns(providerLogs), + }) + .from(evaluationResults) + .innerJoin( + documentLogs, + eq(documentLogs.id, evaluationResults.documentLogId), + ) + .innerJoin( + providerLogs, + eq(providerLogs.id, evaluationResults.providerLogId), + ) + .where(eq(evaluationResults.evaluationId, evaluationId)), + ) + + const aggregatedResults = this.db + .with(selectedEvaluationResults) + .select({ + documentUuid: selectedEvaluationResults.documentUuid, + evaluationLogs: count(selectedEvaluationResults.id).as( + 'evaluation_logs', + ), + totalTokens: sum(selectedEvaluationResults.tokens).as('total_tokens'), + costInMillicents: sum(selectedEvaluationResults.cost_in_millicents).as( + 'cost_in_millicents', + ), + modalValue: sql< + string | null + >`MODE() WITHIN GROUP (ORDER BY ${selectedEvaluationResults.result})`.as( + 'modal_value', + ), + }) + .from(selectedEvaluationResults) + .groupBy(selectedEvaluationResults.documentUuid) + .as('aggregated_results') + + const modalValueCount = this.db.$with('modal_value_count').as( + this.db + .with(aggregatedResults, selectedEvaluationResults) + .select({ + documentUuid: aggregatedResults.documentUuid, + modalValueCount: count(selectedEvaluationResults.id).as( + 'modal_value_count', + ), + }) + .from(aggregatedResults) + .innerJoin( + selectedEvaluationResults, + eq( + aggregatedResults.documentUuid, + selectedEvaluationResults.documentUuid, + ), + ) + .where( + eq(selectedEvaluationResults.result, aggregatedResults.modalValue), + ) + .groupBy(aggregatedResults.documentUuid), + ) + + const result = await this.db + .with(selectedDocuments, aggregatedResults, modalValueCount) + .select({ + ...selectedDocuments._.selectedFields, + evaluationLogs: + sql`COALESCE(${aggregatedResults.evaluationLogs}, 0)` + .mapWith(Number) + .as('evaluation_logs'), + totalTokens: sql`COALESCE(${aggregatedResults.totalTokens}, 0)` + .mapWith(Number) + .as('total_tokens'), + costInMillicents: + sql`COALESCE(${aggregatedResults.costInMillicents}, 0)` + .mapWith(Number) + .as('cost_in_millicents'), + modalValue: sql< + string | null + >`COALESCE(${aggregatedResults.modalValue}, NULL)`.as('modal_value'), + modalValueCount: + sql`COALESCE(${modalValueCount.modalValueCount}, 0)` + .mapWith(Number) + .as('modal_value_count'), + }) + .from(selectedDocuments) + .leftJoin( + aggregatedResults, + eq(selectedDocuments.documentUuid, aggregatedResults.documentUuid), + ) + .leftJoin( + modalValueCount, + eq(selectedDocuments.documentUuid, modalValueCount.documentUuid), + ) + + return Result.ok(result.filter((r) => r.deletedAt == null)) // Only show non-removed documents + } +} diff --git a/packages/core/src/repositories/documentVersionsRepository/index.ts b/packages/core/src/repositories/documentVersionsRepository/index.ts index 6ef6cdce6..d573d8d42 100644 --- a/packages/core/src/repositories/documentVersionsRepository/index.ts +++ b/packages/core/src/repositories/documentVersionsRepository/index.ts @@ -50,6 +50,25 @@ export class DocumentVersionsRepository extends Repository< .as('documentVersionsScope') } + async existsDocumentWithUuid(documentUuid: string) { + if ( + !documentUuid.match( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, + ) + ) { + // Note: otherwise the comparison fails with "invalid input syntax for type uuid: 'non-existent-uuid'"" + return false + } + + const result = await this.db + .select() + .from(this.scope) + .where(eq(this.scope.documentUuid, documentUuid)) + .limit(1) + + return result.length > 0 + } + async getDocumentById(documentId: number) { const res = await this.db .select() diff --git a/packages/core/src/repositories/evaluationResultsRepository/findByDocumentUuid.test.ts b/packages/core/src/repositories/evaluationResultsRepository/findByDocumentUuid.test.ts index e1bac6e41..3cd64cfe8 100644 --- a/packages/core/src/repositories/evaluationResultsRepository/findByDocumentUuid.test.ts +++ b/packages/core/src/repositories/evaluationResultsRepository/findByDocumentUuid.test.ts @@ -14,7 +14,10 @@ describe('findEvaluationResultsByDocumentUuid', () => { name: 'openai', user, }) - const evaluation = await factories.createEvaluation({ provider }) + const evaluation = await factories.createLlmAsJudgeEvaluation({ + workspace, + prompt: factories.helpers.createPrompt({ provider }), + }) const { commit: draft } = await factories.createDraft({ project, user }) const { documentVersion: doc } = await factories.createDocumentVersion({ @@ -28,7 +31,7 @@ describe('findEvaluationResultsByDocumentUuid', () => { commit, }) - const evaluationResult = await factories.createEvaluationResult({ + const { evaluationResult } = await factories.createEvaluationResult({ documentLog, evaluation, }) diff --git a/packages/core/src/repositories/evaluationResultsRepository/index.ts b/packages/core/src/repositories/evaluationResultsRepository/index.ts index 2dd2c9c7a..a988297a8 100644 --- a/packages/core/src/repositories/evaluationResultsRepository/index.ts +++ b/packages/core/src/repositories/evaluationResultsRepository/index.ts @@ -1,16 +1,10 @@ import { eq, getTableColumns } from 'drizzle-orm' -import { Commit, DocumentLog, EvaluationResult } from '../../browser' +import { EvaluationResult } from '../../browser' import { Result } from '../../lib' import { documentLogs, evaluationResults, evaluations } from '../../schema' import Repository from '../repository' -export type DocumentLogWithMetadata = DocumentLog & { - commit: Commit - tokens: number | null - cost_in_millicents: number | null -} - const tt = getTableColumns(evaluationResults) export class EvaluationResultsRepository extends Repository< diff --git a/packages/core/src/repositories/evaluationsRepository.ts b/packages/core/src/repositories/evaluationsRepository.ts index 42e6ab34d..7846420d9 100644 --- a/packages/core/src/repositories/evaluationsRepository.ts +++ b/packages/core/src/repositories/evaluationsRepository.ts @@ -4,7 +4,7 @@ import { and, eq, getTableColumns, inArray, sql } from 'drizzle-orm' import { EvaluationDto } from '../browser' import { EvaluationMetadataType } from '../constants' -import { NotFoundError, Result } from '../lib' +import { NotFoundError, PromisedResult, Result } from '../lib' import { connectedEvaluations, evaluations, @@ -85,7 +85,7 @@ export class EvaluationsRepository extends Repository< return Result.ok(result as EvaluationDto[]) } - async filterByUuids(uuids: string[]) { + async filterByUuids(uuids: string[]): PromisedResult { const result = await this.db .select() .from(this.scope) diff --git a/packages/core/src/repositories/index.ts b/packages/core/src/repositories/index.ts index 2fc7d35d9..3d27d5c7a 100644 --- a/packages/core/src/repositories/index.ts +++ b/packages/core/src/repositories/index.ts @@ -10,3 +10,4 @@ export * from './documentLogsRepository' export * from './membershipsRepository' export * from './evaluationsRepository' export * from './datasetsRepository' +export * from './connectedEvaluationsRepository' diff --git a/packages/core/src/services/evaluations/connect.ts b/packages/core/src/services/evaluations/connect.ts index d73de1267..e8b5cacf8 100644 --- a/packages/core/src/services/evaluations/connect.ts +++ b/packages/core/src/services/evaluations/connect.ts @@ -1,77 +1,101 @@ import { - DocumentVersion, - EvaluationDto, - EvaluationMetadataType, + ConnectedEvaluation, EvaluationMode, - EvaluationTemplateWithCategory, + SafeWorkspace, + Workspace, } from '../../browser' import { database } from '../../client' -import { findWorkspaceFromDocument } from '../../data-access' -import { ErrorResult, NotFoundError, Result, Transaction } from '../../lib' +import { + NotFoundError, + PromisedResult, + Result, + Transaction, + TypedResult, +} from '../../lib' +import { + DocumentVersionsRepository, + EvaluationsRepository, +} from '../../repositories' import { connectedEvaluations } from '../../schema' -import { createEvaluation } from './create' +import { importLlmAsJudgeEvaluation } from './create' export function connectEvaluations( { - document, - templates, - evaluations: evaluationToImport, + workspace, + documentUuid, + evaluationUuids, + templateIds, evaluationMode = EvaluationMode.Batch, }: { - document: DocumentVersion - templates: EvaluationTemplateWithCategory[] - evaluations: EvaluationDto[] + workspace: Workspace | SafeWorkspace + documentUuid: string + evaluationUuids?: string[] + templateIds?: number[] evaluationMode?: EvaluationMode }, db = database, -) { - return Transaction.call(async (tx) => { - const workspace = await findWorkspaceFromDocument(document, tx) - if (!workspace) { - return Result.error(new NotFoundError('Workspace not found')) - } - - // TODO: Creating an evaluation is kind of a pita because of the - // polymorphic relation with metadata so we use the creation service which - // causes N db operations (not ideal). Implement a bulkCreate of - // evaluations service. - const results = await Promise.all( - templates.map((template) => - createEvaluation( - { - workspace, - name: template.name, - description: template.description, - type: EvaluationMetadataType.LlmAsJudge, - metadata: { - prompt: template.prompt, - }, - }, - tx, - ), - ), - ) +): PromisedResult { + return Transaction.call( + async (tx): PromisedResult => { + const documentVersionsScope = new DocumentVersionsRepository( + workspace.id, + tx, + ) + const documentExists = + await documentVersionsScope.existsDocumentWithUuid(documentUuid) + if (!documentExists) { + return Result.error(new NotFoundError('Document not found')) + } - const error = Result.findError(results) - if (error) return error as ErrorResult + // TODO: Creating an evaluation is kind of a pita because of the + // polymorphic relation with metadata so we use the creation service which + // causes N db operations (not ideal). Implement a bulkCreate of + // evaluations service. + const importedEvaluations = await Promise.all( + templateIds?.map((templateId) => + importLlmAsJudgeEvaluation({ workspace, templateId }, tx), + ) ?? [], + ) - const evaluations = [ - ...evaluationToImport, - ...results.map((r) => r.unwrap()), - ] - if (!evaluations.length) return Result.ok([]) + const error = Result.findError(importedEvaluations) + if (error) return error as TypedResult - const rezults = await tx - .insert(connectedEvaluations) - .values( - evaluations.map((evaluation) => ({ - evaluationMode, - documentUuid: document.documentUuid, - evaluationId: evaluation.id, - })), + const evaluationsScope = new EvaluationsRepository(workspace.id, tx) + const selectedEvaluations = await evaluationsScope.filterByUuids( + evaluationUuids ?? [], ) - .returning() + if (selectedEvaluations.error) return selectedEvaluations + if (selectedEvaluations.value.length !== evaluationUuids?.length) { + const missingEvaluationUuids = evaluationUuids?.filter( + (uuid) => !selectedEvaluations.value.some((r) => r.uuid === uuid), + ) + return Result.error( + new NotFoundError( + `The following evaluations were not found: ${missingEvaluationUuids?.join(', ')}`, + ), + ) + } + + const allEvaluationIds = [ + ...selectedEvaluations.unwrap().map((r) => r.id), + ...importedEvaluations.map((r) => r.unwrap().id), + ] + + if (!allEvaluationIds.length) return Result.ok([]) + + const rezults = await tx + .insert(connectedEvaluations) + .values( + allEvaluationIds.map((evaluationId) => ({ + evaluationMode, + documentUuid, + evaluationId, + })), + ) + .returning() - return Result.ok(rezults) - }, db) + return Result.ok(rezults) + }, + db, + ) } diff --git a/packages/core/src/services/evaluations/create.ts b/packages/core/src/services/evaluations/create.ts index 9a26a17e7..d6187fea2 100644 --- a/packages/core/src/services/evaluations/create.ts +++ b/packages/core/src/services/evaluations/create.ts @@ -1,5 +1,6 @@ import { EvaluationMetadataType, SafeWorkspace, Workspace } from '../../browser' import { database } from '../../client' +import { findEvaluationTemplateById } from '../../data-access' import { Result, Transaction } from '../../lib' import { evaluations, llmAsJudgeEvaluationMetadatas } from '../../schema' @@ -21,7 +22,7 @@ export async function createEvaluation( case EvaluationMetadataType.LlmAsJudge: metadataTable = await tx .insert(llmAsJudgeEvaluationMetadatas) - .values(metadata as { prompt: string; templateId: number }) + .values(metadata as { prompt: string; templateId?: number }) .returning() break @@ -43,3 +44,29 @@ export async function createEvaluation( return Result.ok({ ...result[0]!, metadata: metadataTable[0]! }) }, db) } + +export async function importLlmAsJudgeEvaluation( + { + workspace, + templateId, + }: { workspace: Workspace | SafeWorkspace; templateId: number }, + db = database, +) { + const templateResult = await findEvaluationTemplateById(templateId, db) + if (templateResult.error) return templateResult + const template = templateResult.unwrap() + + return await createEvaluation( + { + workspace, + name: template.name, + description: template.description, + type: EvaluationMetadataType.LlmAsJudge, + metadata: { + prompt: template.prompt, + templateId: template.id, + }, + }, + db, + ) +} diff --git a/packages/core/src/tests/factories/evaluationResults.ts b/packages/core/src/tests/factories/evaluationResults.ts index 9fd1cec01..b0555b468 100644 --- a/packages/core/src/tests/factories/evaluationResults.ts +++ b/packages/core/src/tests/factories/evaluationResults.ts @@ -18,11 +18,13 @@ import { createProviderLog } from '../../services/providerLogs' export type IEvaluationResultData = { documentLog: DocumentLog evaluation: EvaluationDto + result?: string } export async function createEvaluationResult({ documentLog, evaluation, + result, }: IEvaluationResultData) { const commit = await findCommitById({ id: documentLog.commitId }).then((r) => r.unwrap(), @@ -32,7 +34,7 @@ export async function createEvaluationResult({ const chain = createChain({ prompt: evaluation.metadata.prompt, - parameters: {}, + parameters: {}, // TODO: Generate parameters from documentLog }) const providerLogs: ProviderLog[] = [] @@ -45,7 +47,7 @@ export async function createEvaluationResult({ .findByName(config.provider) .then((r) => r.unwrap()) - mockedResponse = String(faker.number.int({ min: 0, max: 10 })) + mockedResponse = result ?? String(faker.number.int({ min: 0, max: 10 })) const promptTokens = conversation.messages.reduce((acc, message) => { let content = message.content @@ -89,5 +91,8 @@ export async function createEvaluationResult({ result: mockedResponse, }) - return evaluationResult.unwrap() + return { + evaluationResult: evaluationResult.unwrap(), + providerLogs: providerLogs, + } } diff --git a/packages/core/src/tests/factories/evaluations.ts b/packages/core/src/tests/factories/evaluations.ts index e7ed5b752..58041c23d 100644 --- a/packages/core/src/tests/factories/evaluations.ts +++ b/packages/core/src/tests/factories/evaluations.ts @@ -1,30 +1,21 @@ import { faker } from '@faker-js/faker' -import { EvaluationMetadataType, ProviderApiKey } from '../../browser' -import { findWorkspaceFromProviderApiKey } from '../../data-access' +import { EvaluationMetadataType, Workspace } from '../../browser' import { createEvaluation as createEvaluationService } from '../../services/evaluations' -import { helpers } from './helpers' -import { createProviderApiKey, ICreateProvider } from './providerApiKeys' export type IEvaluationData = { - provider: ICreateProvider | ProviderApiKey + workspace: Workspace name?: string description?: string + prompt?: string } -export async function createEvaluation({ - provider: providerData, +export async function createLlmAsJudgeEvaluation({ + workspace, name, description, + prompt, }: IEvaluationData) { - const provider = - 'id' in providerData - ? providerData - : await createProviderApiKey(providerData) - - const workspace = (await findWorkspaceFromProviderApiKey(provider))! - const prompt = helpers.createPrompt({ provider }) - const evaluationResult = await createEvaluationService({ workspace, metadata: { prompt }, diff --git a/packages/core/src/tests/factories/projects.ts b/packages/core/src/tests/factories/projects.ts index 153f7381a..46d862ceb 100644 --- a/packages/core/src/tests/factories/projects.ts +++ b/packages/core/src/tests/factories/projects.ts @@ -1,12 +1,19 @@ import { faker } from '@faker-js/faker' -import type { DocumentVersion, SafeUser, Workspace } from '../../browser' +import type { + DocumentVersion, + Providers, + SafeUser, + Workspace, +} from '../../browser' import { unsafelyGetUser } from '../../data-access' import { CommitsRepository } from '../../repositories' import { mergeCommit } from '../../services/commits' import { createNewDocument, updateDocument } from '../../services/documents' import { createProject as createProjectFn } from '../../services/projects/create' import { createDraft } from './commits' +import { createLlmAsJudgeEvaluation, IEvaluationData } from './evaluations' +import { createProviderApiKey } from './providerApiKeys' import { createWorkspace, type ICreateWorkspace } from './workspaces' export type IDocumentStructure = { [key: string]: string | IDocumentStructure } @@ -39,6 +46,8 @@ export async function flattenDocumentStructure({ export type ICreateProject = { name?: string workspace?: Workspace | ICreateWorkspace + providers?: { type: Providers; name: string }[] + evaluations?: Omit[] documents?: IDocumentStructure } export async function createProject(projectData: Partial = {}) { @@ -67,6 +76,23 @@ export async function createProject(projectData: Partial = {}) { const commitsScope = new CommitsRepository(workspace.id) let commit = (await commitsScope.getFirstCommitForProject(project)).unwrap() + const providers = await Promise.all( + projectData.providers?.map(({ type, name }) => + createProviderApiKey({ + workspace, + user, + type, + name, + }), + ) ?? [], + ) + + const evaluations = await Promise.all( + projectData.evaluations?.map((evaluationData) => + createLlmAsJudgeEvaluation({ workspace, ...evaluationData }), + ) ?? [], + ) + const documents: DocumentVersion[] = [] if (projectData.documents) { @@ -88,5 +114,13 @@ export async function createProject(projectData: Partial = {}) { commit = await mergeCommit(draft).then((r) => r.unwrap()) } - return { project, user, workspace, documents, commit: commit! } + return { + project, + user, + workspace, + providers, + documents, + commit: commit!, + evaluations, + } }