Skip to content

Commit

Permalink
feat(api): add evaluation endpoint for conversations (#526)
Browse files Browse the repository at this point in the history
- Introduced a new POST /evaluate endpoint to handle evaluation requests for conversations.
- Implemented the evaluateHandler to process evaluation requests, including optional evaluation UUIDs.
- Added unit tests to cover various scenarios such as unauthorized access, evaluating all evaluations, evaluating specific evaluations, handling no evaluations, and invalid conversation UUIDs.
- Updated the SDK to include a new eval method for evaluating conversations.
- Modified job definitions to handle optional batchId in runEvaluationJob.
- Enhanced the evaluateDocumentLog service to enqueue evaluation jobs.

This change allows users to evaluate conversations through a dedicated endpoint, providing flexibility to evaluate all or specific evaluations. It also ensures proper handling of different scenarios and integrates the functionality into the SDK for easier usage.
  • Loading branch information
geclos authored Oct 30, 2024
1 parent 9123988 commit 81404d1
Show file tree
Hide file tree
Showing 18 changed files with 592 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
import {
ApiKey,
DocumentLog,
Providers,
User,
Workspace,
} from '@latitude-data/core/browser'
import { unsafelyGetFirstApiKeyByWorkspaceId } from '@latitude-data/core/data-access'
import {
createConnectedEvaluation,
createDocumentLog,
createLlmAsJudgeEvaluation,
createProject,
helpers,
} from '@latitude-data/core/factories'
import { Result } from '@latitude-data/core/lib/Result'
import app from '$/routes/app'
import { beforeEach, describe, expect, it, vi } from 'vitest'

const mocks = vi.hoisted(() => ({
evaluateDocumentLog: vi.fn(),
queues: {
defaultQueue: {
jobs: {
enqueueRunEvaluationJob: vi.fn(),
},
},
},
}))

vi.mock('@latitude-data/core/services/documentLogs/evaluate', () => ({
evaluateDocumentLog: mocks.evaluateDocumentLog,
}))

vi.mock('$/jobs', () => ({
queues: mocks.queues,
}))

let route: string
let body: string
let token: string
let headers: Record<string, string>
let workspace: Workspace
let apiKey: ApiKey
let documentLog: DocumentLog
let user: User

describe('POST /evaluate', () => {
describe('unauthorized', () => {
it('fails', async () => {
const res = await app.request(
'/api/v2/conversations/fake-document-log-uuid/evaluate',
{
method: 'POST',
body: JSON.stringify({}),
},
)

expect(res.status).toBe(401)
})
})

describe('authorized', () => {
beforeEach(async () => {
mocks.evaluateDocumentLog.mockClear()
mocks.queues.defaultQueue.jobs.enqueueRunEvaluationJob.mockClear()

const {
workspace: wsp,
user: u,
commit,
documents,
} = await createProject({
providers: [{ type: Providers.OpenAI, name: 'Latitude' }],
documents: {
foo: helpers.createPrompt({
provider: 'Latitude',
model: 'gpt-4o',
}),
},
})
user = u
const document = documents[0]!
workspace = wsp
const { documentLog: dl } = await createDocumentLog({
document,
commit,
})
documentLog = dl
const key = await unsafelyGetFirstApiKeyByWorkspaceId({
workspaceId: workspace.id,
}).then((r) => r.unwrap())
apiKey = key!
token = apiKey.token

route = `/api/v2/conversations/${documentLog.uuid}/evaluate`
body = JSON.stringify({})
headers = {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
}
})

it('evaluates all evaluations when no evaluationUuids provided', async () => {
const evaluation = await createLlmAsJudgeEvaluation({
workspace,
user,
})

await createConnectedEvaluation({
workspace,
user,
documentUuid: documentLog.documentUuid,
evaluationUuid: evaluation.uuid,
})
const res = await app.request(route, {
method: 'POST',
body,
headers,
})

expect(res.status).toBe(200)
expect(await res.json()).toEqual({
evaluations: [evaluation.uuid],
})
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
documentLog,
workspace,
{ evaluations: [expect.objectContaining({ id: evaluation.id })] },
)
})

it('evaluates only specified evaluations when evaluationUuids provided', async () => {
const evaluation = await createLlmAsJudgeEvaluation({
workspace,
user,
})

await createConnectedEvaluation({
workspace,
user,
documentUuid: documentLog.documentUuid,
evaluationUuid: evaluation.uuid,
})
const res = await app.request(route, {
method: 'POST',
body: JSON.stringify({ evaluationUuids: [evaluation.uuid] }),
headers,
})

expect(res.status).toBe(200)
expect(await res.json()).toEqual({
evaluations: [evaluation.uuid],
})
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
expect.any(Object), // documentLog
workspace,
{
evaluations: expect.arrayContaining([
expect.objectContaining({ uuid: evaluation.uuid }),
]),
},
)
})

it('handles case when no evaluations exist', async () => {
mocks.evaluateDocumentLog.mockImplementationOnce(() => {
return Result.ok({ evaluations: [] })
})

const res = await app.request(route, {
method: 'POST',
body,
headers,
})

expect(res.status).toBe(200)
expect(await res.json()).toEqual({
evaluations: [],
})
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith(
expect.any(Object),
workspace,
{ evaluations: [] },
)
})

it('handles invalid conversation uuid', async () => {
const res = await app.request(
'/api/v2/conversations/invalid-uuid/evaluate',
{
method: 'POST',
body,
headers,
},
)

expect(res.status).toBe(404)
})
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { zValidator } from '@hono/zod-validator'
import { DocumentLog, EvaluationDto } from '@latitude-data/core/browser'
import { NotFoundError } from '@latitude-data/core/lib/errors'
import {
DocumentLogsRepository,
EvaluationsRepository,
} from '@latitude-data/core/repositories'
import { evaluateDocumentLog } from '@latitude-data/core/services/documentLogs/evaluate'
import { captureException } from '$/common/sentry'
import { Factory } from 'hono/factory'
import { z } from 'zod'

const factory = new Factory()

export const evaluateHandler = factory.createHandlers(
zValidator(
'json',
z
.object({
evaluationUuids: z.array(z.string()).optional(),
})
.optional()
.default({}),
),
async (c) => {
const { conversationUuid } = c.req.param()
const { evaluationUuids } = c.req.valid('json')
const workspace = c.get('workspace')

const repo = new DocumentLogsRepository(workspace.id)
let documentLog: DocumentLog
try {
documentLog = await repo
.findByUuid(conversationUuid!)
.then((r) => r.unwrap())
} catch (e) {
captureException(e as Error)

throw new NotFoundError('Document log not found')
}

const evaluationsRepo = new EvaluationsRepository(workspace.id)
let evaluations: EvaluationDto[] | undefined = []
if (evaluationUuids) {
evaluations = await evaluationsRepo
.filterByUuids(evaluationUuids)
.then((r) => r.unwrap())
} else {
evaluations = await evaluationsRepo
.findByDocumentUuid(documentLog.documentUuid)
.then((r) => r.unwrap())
}

evaluateDocumentLog(documentLog, workspace, { evaluations })

return c.json({ evaluations: evaluations?.map((e) => e.uuid) ?? [] })
},
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { chatHandler } from '$/routes/api/v1/conversations/[conversationUuid]/handlers/chat'
import { Hono } from 'hono'

export const chatsRouter = new Hono()
import { evaluateHandler } from './handlers/evaluate'

chatsRouter.post('/:conversationUuid/chat', ...chatHandler)
export const conversationsRouter = new Hono()

conversationsRouter.post('/:conversationUuid/chat', ...chatHandler)
conversationsRouter.post('/:conversationUuid/evaluate', ...evaluateHandler)
8 changes: 4 additions & 4 deletions apps/gateway/src/routes/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import rateLimitMiddleware from '$/middlewares/rateLimit'
import { Hono } from 'hono'
import { logger } from 'hono/logger'

import { chatsRouter as chatsRouterV1 } from './api/v1/conversations/[conversationUuid]'
import { chatsRouter as conversationsRouterV1 } from './api/v1/conversations/[conversationUuid]'
import { documentsRouter as documentsRouterV1 } from './api/v1/projects/[projectId]/versions/[versionUuid]/documents'
import { chatsRouter as chatsRouterV2 } from './api/v2/conversations/[conversationUuid]'
import { conversationsRouter as conversationsRouterV2 } from './api/v2/conversations/[conversationUuid]'
import { documentsRouter as documentsRouterV2 } from './api/v2/projects/[projectId]/versions/[versionUuid]/documents'

const app = new Hono()
Expand All @@ -29,14 +29,14 @@ app.route(
'/api/v1/projects/:projectId/versions/:versionUuid/documents',
documentsRouterV1,
)
app.route('/api/v1/conversations', chatsRouterV1)
app.route('/api/v1/conversations', conversationsRouterV1)

// v2
app.route(
'/api/v2/projects/:projectId/versions/:versionUuid/documents',
documentsRouterV2,
)
app.route('/api/v2/conversations', chatsRouterV2)
app.route('/api/v2/conversations', conversationsRouterV2)

// Must be the last one!
app.onError(errorHandlerMiddleware)
Expand Down
30 changes: 28 additions & 2 deletions docs/guides/prompt-manager/api-access.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ The response contains the document details along with its configuration.

**Response Body:**

````json
```json
{
"id": "document-id",
"name": "Document Name",
Expand Down Expand Up @@ -226,7 +226,7 @@ The API uses standard HTTP status codes. In case of an error, the response body
"message": "Error description"
}
}
````
```

3. #### Get a Document

Expand Down Expand Up @@ -286,3 +286,29 @@ curl -X GET "https://gateway.latitude.so/api/v2/projects/123/versions/live/docum
}
}
```

#### 4. Evaluate a Conversation

Evaluate a conversation using configured evaluations.

**Endpoint:** `POST /conversations/{conversationUuid}/evaluate`

**Path Parameters:**

- `conversationUuid`: UUID of the conversation to evaluate

**Request Body:**

```json
{
"evaluationUuids": ["evaluation-uuid-1", "evaluation-uuid-2"] // optional, defaults to all evaluations connected to the conversation prompt
}
```

**Response:**

```json
{
"evaluations": ["evaluation-uuid-1", "evaluation-uuid-2"] // array of evaluation UUIDs that will be run
}
```
Loading

0 comments on commit 81404d1

Please sign in to comment.