feat(api): add evaluation endpoint for conversations (#526)

- Introduced a new POST /evaluate endpoint to handle evaluation requests for conversations. - Implemented the evaluateHandler to process evaluation requests, including optional evaluation UUIDs. - Added unit tests to cover various scenarios such as unauthorized access, evaluating all evaluations, evaluating specific evaluations, handling no evaluations, and invalid conversation UUIDs. - Updated the SDK to include a new eval method for evaluating conversations. - Modified job definitions to handle optional batchId in runEvaluationJob. - Enhanced the evaluateDocumentLog service to enqueue evaluation jobs. This change allows users to evaluate conversations through a dedicated endpoint, providing flexibility to evaluate all or specific evaluations. It also ensures proper handling of different scenarios and integrates the functionality into the SDK for easier usage.
latitude-dev · Oct 30, 2024 · 81404d1 · 81404d1
1 parent 9123988
commit 81404d1
Show file tree

Hide file tree

Showing 18 changed files with 592 additions and 33 deletions.
diff --git a/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.test.ts b/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.test.ts
@@ -0,0 +1,201 @@
+import {
+  ApiKey,
+  DocumentLog,
+  Providers,
+  User,
+  Workspace,
+} from '@latitude-data/core/browser'
+import { unsafelyGetFirstApiKeyByWorkspaceId } from '@latitude-data/core/data-access'
+import {
+  createConnectedEvaluation,
+  createDocumentLog,
+  createLlmAsJudgeEvaluation,
+  createProject,
+  helpers,
+} from '@latitude-data/core/factories'
+import { Result } from '@latitude-data/core/lib/Result'
+import app from '$/routes/app'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+const mocks = vi.hoisted(() => ({
+  evaluateDocumentLog: vi.fn(),
+  queues: {
+    defaultQueue: {
+      jobs: {
+        enqueueRunEvaluationJob: vi.fn(),
+      },
+    },
+  },
+}))
+
+vi.mock('@latitude-data/core/services/documentLogs/evaluate', () => ({
+  evaluateDocumentLog: mocks.evaluateDocumentLog,
+}))
+
+vi.mock('$/jobs', () => ({
+  queues: mocks.queues,
+}))
+
+let route: string
+let body: string
+let token: string
+let headers: Record<string, string>
+let workspace: Workspace
+let apiKey: ApiKey
+let documentLog: DocumentLog
+let user: User
+
+describe('POST /evaluate', () => {
+  describe('unauthorized', () => {
+    it('fails', async () => {
+      const res = await app.request(
+        '/api/v2/conversations/fake-document-log-uuid/evaluate',
+        {
+          method: 'POST',
+          body: JSON.stringify({}),
+        },
+      )
+
+      expect(res.status).toBe(401)
+    })
+  })
+
+  describe('authorized', () => {
+    beforeEach(async () => {
+      mocks.evaluateDocumentLog.mockClear()
+      mocks.queues.defaultQueue.jobs.enqueueRunEvaluationJob.mockClear()
+
+      const {
+        workspace: wsp,
+        user: u,
+        commit,
+        documents,
+      } = await createProject({
+        providers: [{ type: Providers.OpenAI, name: 'Latitude' }],
+        documents: {
+          foo: helpers.createPrompt({
+            provider: 'Latitude',
+            model: 'gpt-4o',
+          }),
+        },
+      })
+      user = u
+      const document = documents[0]!
+      workspace = wsp
+      const { documentLog: dl } = await createDocumentLog({
+        document,
+        commit,
+      })
+      documentLog = dl
+      const key = await unsafelyGetFirstApiKeyByWorkspaceId({
+        workspaceId: workspace.id,
+      }).then((r) => r.unwrap())
+      apiKey = key!
+      token = apiKey.token
+
+      route = `/api/v2/conversations/${documentLog.uuid}/evaluate`
+      body = JSON.stringify({})
+      headers = {
+        Authorization: `Bearer ${token}`,
+        'Content-Type': 'application/json',
+      }
+    })
+
+    it('evaluates all evaluations when no evaluationUuids provided', async () => {
+      const evaluation = await createLlmAsJudgeEvaluation({
+        workspace,
+        user,
+      })
+
+      await createConnectedEvaluation({
+        workspace,
+        user,
+        documentUuid: documentLog.documentUuid,
+        evaluationUuid: evaluation.uuid,
+      })
+      const res = await app.request(route, {
+        method: 'POST',
+        body,
+        headers,
+      })
+
+      expect(res.status).toBe(200)
+      expect(await res.json()).toEqual({
+        evaluations: [evaluation.uuid],
+      })
+      expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
+        documentLog,
+        workspace,
+        { evaluations: [expect.objectContaining({ id: evaluation.id })] },
+      )
+    })
+
+    it('evaluates only specified evaluations when evaluationUuids provided', async () => {
+      const evaluation = await createLlmAsJudgeEvaluation({
+        workspace,
+        user,
+      })
+
+      await createConnectedEvaluation({
+        workspace,
+        user,
+        documentUuid: documentLog.documentUuid,
+        evaluationUuid: evaluation.uuid,
+      })
+      const res = await app.request(route, {
+        method: 'POST',
+        body: JSON.stringify({ evaluationUuids: [evaluation.uuid] }),
+        headers,
+      })
+
+      expect(res.status).toBe(200)
+      expect(await res.json()).toEqual({
+        evaluations: [evaluation.uuid],
+      })
+      expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
+        expect.any(Object), // documentLog
+        workspace,
+        {
+          evaluations: expect.arrayContaining([
+            expect.objectContaining({ uuid: evaluation.uuid }),
+          ]),
+        },
+      )
+    })
+
+    it('handles case when no evaluations exist', async () => {
+      mocks.evaluateDocumentLog.mockImplementationOnce(() => {
+        return Result.ok({ evaluations: [] })
+      })
+
+      const res = await app.request(route, {
+        method: 'POST',
+        body,
+        headers,
+      })
+
+      expect(res.status).toBe(200)
+      expect(await res.json()).toEqual({
+        evaluations: [],
+      })
+      expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
+        expect.any(Object),
+        workspace,
+        { evaluations: [] },
+      )
+    })
+
+    it('handles invalid conversation uuid', async () => {
+      const res = await app.request(
+        '/api/v2/conversations/invalid-uuid/evaluate',
+        {
+          method: 'POST',
+          body,
+          headers,
+        },
+      )
+
+      expect(res.status).toBe(404)
+    })
+  })
+})
diff --git a/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.ts b/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.ts
@@ -0,0 +1,58 @@
+import { zValidator } from '@hono/zod-validator'
+import { DocumentLog, EvaluationDto } from '@latitude-data/core/browser'
+import { NotFoundError } from '@latitude-data/core/lib/errors'
+import {
+  DocumentLogsRepository,
+  EvaluationsRepository,
+} from '@latitude-data/core/repositories'
+import { evaluateDocumentLog } from '@latitude-data/core/services/documentLogs/evaluate'
+import { captureException } from '$/common/sentry'
+import { Factory } from 'hono/factory'
+import { z } from 'zod'
+
+const factory = new Factory()
+
+export const evaluateHandler = factory.createHandlers(
+  zValidator(
+    'json',
+    z
+      .object({
+        evaluationUuids: z.array(z.string()).optional(),
+      })
+      .optional()
+      .default({}),
+  ),
+  async (c) => {
+    const { conversationUuid } = c.req.param()
+    const { evaluationUuids } = c.req.valid('json')
+    const workspace = c.get('workspace')
+
+    const repo = new DocumentLogsRepository(workspace.id)
+    let documentLog: DocumentLog
+    try {
+      documentLog = await repo
+        .findByUuid(conversationUuid!)
+        .then((r) => r.unwrap())
+    } catch (e) {
+      captureException(e as Error)
+
+      throw new NotFoundError('Document log not found')
+    }
+
+    const evaluationsRepo = new EvaluationsRepository(workspace.id)
+    let evaluations: EvaluationDto[] | undefined = []
+    if (evaluationUuids) {
+      evaluations = await evaluationsRepo
+        .filterByUuids(evaluationUuids)
+        .then((r) => r.unwrap())
+    } else {
+      evaluations = await evaluationsRepo
+        .findByDocumentUuid(documentLog.documentUuid)
+        .then((r) => r.unwrap())
+    }
+
+    evaluateDocumentLog(documentLog, workspace, { evaluations })
+
+    return c.json({ evaluations: evaluations?.map((e) => e.uuid) ?? [] })
+  },
+)
diff --git a/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/index.ts b/apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/index.ts
@@ -1,6 +1,9 @@
 import { chatHandler } from '$/routes/api/v1/conversations/[conversationUuid]/handlers/chat'
 import { Hono } from 'hono'
 
-export const chatsRouter = new Hono()
+import { evaluateHandler } from './handlers/evaluate'
 
-chatsRouter.post('/:conversationUuid/chat', ...chatHandler)
+export const conversationsRouter = new Hono()
+
+conversationsRouter.post('/:conversationUuid/chat', ...chatHandler)
+conversationsRouter.post('/:conversationUuid/evaluate', ...evaluateHandler)
diff --git a/apps/gateway/src/routes/app.ts b/apps/gateway/src/routes/app.ts
@@ -4,9 +4,9 @@ import rateLimitMiddleware from '$/middlewares/rateLimit'
 import { Hono } from 'hono'
 import { logger } from 'hono/logger'
 
-import { chatsRouter as chatsRouterV1 } from './api/v1/conversations/[conversationUuid]'
+import { chatsRouter as conversationsRouterV1 } from './api/v1/conversations/[conversationUuid]'
 import { documentsRouter as documentsRouterV1 } from './api/v1/projects/[projectId]/versions/[versionUuid]/documents'
-import { chatsRouter as chatsRouterV2 } from './api/v2/conversations/[conversationUuid]'
+import { conversationsRouter as conversationsRouterV2 } from './api/v2/conversations/[conversationUuid]'
 import { documentsRouter as documentsRouterV2 } from './api/v2/projects/[projectId]/versions/[versionUuid]/documents'
 
 const app = new Hono()
@@ -29,14 +29,14 @@ app.route(
   '/api/v1/projects/:projectId/versions/:versionUuid/documents',
   documentsRouterV1,
 )
-app.route('/api/v1/conversations', chatsRouterV1)
+app.route('/api/v1/conversations', conversationsRouterV1)
 
 // v2
 app.route(
   '/api/v2/projects/:projectId/versions/:versionUuid/documents',
   documentsRouterV2,
 )
-app.route('/api/v2/conversations', chatsRouterV2)
+app.route('/api/v2/conversations', conversationsRouterV2)
 
 // Must be the last one!
 app.onError(errorHandlerMiddleware)

diff --git a/docs/guides/prompt-manager/api-access.mdx b/docs/guides/prompt-manager/api-access.mdx
@@ -191,7 +191,7 @@ The response contains the document details along with its configuration.
 
 **Response Body:**
 
-````json
+```json
 {
   "id": "document-id",
   "name": "Document Name",
@@ -226,7 +226,7 @@ The API uses standard HTTP status codes. In case of an error, the response body
     "message": "Error description"
   }
 }
-````
+```
 
 3. #### Get a Document
 
@@ -286,3 +286,29 @@ curl -X GET "https://gateway.latitude.so/api/v2/projects/123/versions/live/docum
   }
 }
 ```
+
+#### 4. Evaluate a Conversation
+
+Evaluate a conversation using configured evaluations.
+
+**Endpoint:** `POST /conversations/{conversationUuid}/evaluate`
+
+**Path Parameters:**
+
+- `conversationUuid`: UUID of the conversation to evaluate
+
+**Request Body:**
+
+```json
+{
+  "evaluationUuids": ["evaluation-uuid-1", "evaluation-uuid-2"] // optional, defaults to all evaluations connected to the conversation prompt
+}
+```
+
+**Response:**
+
+```json
+{
+  "evaluations": ["evaluation-uuid-1", "evaluation-uuid-2"] // array of evaluation UUIDs that will be run
+}
+```