-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(api): add evaluation endpoint for conversations (#526)
- Introduced a new POST /evaluate endpoint to handle evaluation requests for conversations. - Implemented the evaluateHandler to process evaluation requests, including optional evaluation UUIDs. - Added unit tests to cover various scenarios such as unauthorized access, evaluating all evaluations, evaluating specific evaluations, handling no evaluations, and invalid conversation UUIDs. - Updated the SDK to include a new eval method for evaluating conversations. - Modified job definitions to handle optional batchId in runEvaluationJob. - Enhanced the evaluateDocumentLog service to enqueue evaluation jobs. This change allows users to evaluate conversations through a dedicated endpoint, providing flexibility to evaluate all or specific evaluations. It also ensures proper handling of different scenarios and integrates the functionality into the SDK for easier usage.
- Loading branch information
Showing
18 changed files
with
592 additions
and
33 deletions.
There are no files selected for viewing
201 changes: 201 additions & 0 deletions
201
apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
import { | ||
ApiKey, | ||
DocumentLog, | ||
Providers, | ||
User, | ||
Workspace, | ||
} from '@latitude-data/core/browser' | ||
import { unsafelyGetFirstApiKeyByWorkspaceId } from '@latitude-data/core/data-access' | ||
import { | ||
createConnectedEvaluation, | ||
createDocumentLog, | ||
createLlmAsJudgeEvaluation, | ||
createProject, | ||
helpers, | ||
} from '@latitude-data/core/factories' | ||
import { Result } from '@latitude-data/core/lib/Result' | ||
import app from '$/routes/app' | ||
import { beforeEach, describe, expect, it, vi } from 'vitest' | ||
|
||
const mocks = vi.hoisted(() => ({ | ||
evaluateDocumentLog: vi.fn(), | ||
queues: { | ||
defaultQueue: { | ||
jobs: { | ||
enqueueRunEvaluationJob: vi.fn(), | ||
}, | ||
}, | ||
}, | ||
})) | ||
|
||
vi.mock('@latitude-data/core/services/documentLogs/evaluate', () => ({ | ||
evaluateDocumentLog: mocks.evaluateDocumentLog, | ||
})) | ||
|
||
vi.mock('$/jobs', () => ({ | ||
queues: mocks.queues, | ||
})) | ||
|
||
let route: string | ||
let body: string | ||
let token: string | ||
let headers: Record<string, string> | ||
let workspace: Workspace | ||
let apiKey: ApiKey | ||
let documentLog: DocumentLog | ||
let user: User | ||
|
||
describe('POST /evaluate', () => { | ||
describe('unauthorized', () => { | ||
it('fails', async () => { | ||
const res = await app.request( | ||
'/api/v2/conversations/fake-document-log-uuid/evaluate', | ||
{ | ||
method: 'POST', | ||
body: JSON.stringify({}), | ||
}, | ||
) | ||
|
||
expect(res.status).toBe(401) | ||
}) | ||
}) | ||
|
||
describe('authorized', () => { | ||
beforeEach(async () => { | ||
mocks.evaluateDocumentLog.mockClear() | ||
mocks.queues.defaultQueue.jobs.enqueueRunEvaluationJob.mockClear() | ||
|
||
const { | ||
workspace: wsp, | ||
user: u, | ||
commit, | ||
documents, | ||
} = await createProject({ | ||
providers: [{ type: Providers.OpenAI, name: 'Latitude' }], | ||
documents: { | ||
foo: helpers.createPrompt({ | ||
provider: 'Latitude', | ||
model: 'gpt-4o', | ||
}), | ||
}, | ||
}) | ||
user = u | ||
const document = documents[0]! | ||
workspace = wsp | ||
const { documentLog: dl } = await createDocumentLog({ | ||
document, | ||
commit, | ||
}) | ||
documentLog = dl | ||
const key = await unsafelyGetFirstApiKeyByWorkspaceId({ | ||
workspaceId: workspace.id, | ||
}).then((r) => r.unwrap()) | ||
apiKey = key! | ||
token = apiKey.token | ||
|
||
route = `/api/v2/conversations/${documentLog.uuid}/evaluate` | ||
body = JSON.stringify({}) | ||
headers = { | ||
Authorization: `Bearer ${token}`, | ||
'Content-Type': 'application/json', | ||
} | ||
}) | ||
|
||
it('evaluates all evaluations when no evaluationUuids provided', async () => { | ||
const evaluation = await createLlmAsJudgeEvaluation({ | ||
workspace, | ||
user, | ||
}) | ||
|
||
await createConnectedEvaluation({ | ||
workspace, | ||
user, | ||
documentUuid: documentLog.documentUuid, | ||
evaluationUuid: evaluation.uuid, | ||
}) | ||
const res = await app.request(route, { | ||
method: 'POST', | ||
body, | ||
headers, | ||
}) | ||
|
||
expect(res.status).toBe(200) | ||
expect(await res.json()).toEqual({ | ||
evaluations: [evaluation.uuid], | ||
}) | ||
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith( | ||
documentLog, | ||
workspace, | ||
{ evaluations: [expect.objectContaining({ id: evaluation.id })] }, | ||
) | ||
}) | ||
|
||
it('evaluates only specified evaluations when evaluationUuids provided', async () => { | ||
const evaluation = await createLlmAsJudgeEvaluation({ | ||
workspace, | ||
user, | ||
}) | ||
|
||
await createConnectedEvaluation({ | ||
workspace, | ||
user, | ||
documentUuid: documentLog.documentUuid, | ||
evaluationUuid: evaluation.uuid, | ||
}) | ||
const res = await app.request(route, { | ||
method: 'POST', | ||
body: JSON.stringify({ evaluationUuids: [evaluation.uuid] }), | ||
headers, | ||
}) | ||
|
||
expect(res.status).toBe(200) | ||
expect(await res.json()).toEqual({ | ||
evaluations: [evaluation.uuid], | ||
}) | ||
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith( | ||
expect.any(Object), // documentLog | ||
workspace, | ||
{ | ||
evaluations: expect.arrayContaining([ | ||
expect.objectContaining({ uuid: evaluation.uuid }), | ||
]), | ||
}, | ||
) | ||
}) | ||
|
||
it('handles case when no evaluations exist', async () => { | ||
mocks.evaluateDocumentLog.mockImplementationOnce(() => { | ||
return Result.ok({ evaluations: [] }) | ||
}) | ||
|
||
const res = await app.request(route, { | ||
method: 'POST', | ||
body, | ||
headers, | ||
}) | ||
|
||
expect(res.status).toBe(200) | ||
expect(await res.json()).toEqual({ | ||
evaluations: [], | ||
}) | ||
expect(mocks.evaluateDocumentLog).toHaveBeenCalledWith( | ||
expect.any(Object), | ||
workspace, | ||
{ evaluations: [] }, | ||
) | ||
}) | ||
|
||
it('handles invalid conversation uuid', async () => { | ||
const res = await app.request( | ||
'/api/v2/conversations/invalid-uuid/evaluate', | ||
{ | ||
method: 'POST', | ||
body, | ||
headers, | ||
}, | ||
) | ||
|
||
expect(res.status).toBe(404) | ||
}) | ||
}) | ||
}) |
58 changes: 58 additions & 0 deletions
58
apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/handlers/evaluate.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import { zValidator } from '@hono/zod-validator' | ||
import { DocumentLog, EvaluationDto } from '@latitude-data/core/browser' | ||
import { NotFoundError } from '@latitude-data/core/lib/errors' | ||
import { | ||
DocumentLogsRepository, | ||
EvaluationsRepository, | ||
} from '@latitude-data/core/repositories' | ||
import { evaluateDocumentLog } from '@latitude-data/core/services/documentLogs/evaluate' | ||
import { captureException } from '$/common/sentry' | ||
import { Factory } from 'hono/factory' | ||
import { z } from 'zod' | ||
|
||
const factory = new Factory() | ||
|
||
export const evaluateHandler = factory.createHandlers( | ||
zValidator( | ||
'json', | ||
z | ||
.object({ | ||
evaluationUuids: z.array(z.string()).optional(), | ||
}) | ||
.optional() | ||
.default({}), | ||
), | ||
async (c) => { | ||
const { conversationUuid } = c.req.param() | ||
const { evaluationUuids } = c.req.valid('json') | ||
const workspace = c.get('workspace') | ||
|
||
const repo = new DocumentLogsRepository(workspace.id) | ||
let documentLog: DocumentLog | ||
try { | ||
documentLog = await repo | ||
.findByUuid(conversationUuid!) | ||
.then((r) => r.unwrap()) | ||
} catch (e) { | ||
captureException(e as Error) | ||
|
||
throw new NotFoundError('Document log not found') | ||
} | ||
|
||
const evaluationsRepo = new EvaluationsRepository(workspace.id) | ||
let evaluations: EvaluationDto[] | undefined = [] | ||
if (evaluationUuids) { | ||
evaluations = await evaluationsRepo | ||
.filterByUuids(evaluationUuids) | ||
.then((r) => r.unwrap()) | ||
} else { | ||
evaluations = await evaluationsRepo | ||
.findByDocumentUuid(documentLog.documentUuid) | ||
.then((r) => r.unwrap()) | ||
} | ||
|
||
evaluateDocumentLog(documentLog, workspace, { evaluations }) | ||
|
||
return c.json({ evaluations: evaluations?.map((e) => e.uuid) ?? [] }) | ||
}, | ||
) |
7 changes: 5 additions & 2 deletions
7
apps/gateway/src/routes/api/v2/conversations/[conversationUuid]/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
import { chatHandler } from '$/routes/api/v1/conversations/[conversationUuid]/handlers/chat' | ||
import { Hono } from 'hono' | ||
|
||
export const chatsRouter = new Hono() | ||
import { evaluateHandler } from './handlers/evaluate' | ||
|
||
chatsRouter.post('/:conversationUuid/chat', ...chatHandler) | ||
export const conversationsRouter = new Hono() | ||
|
||
conversationsRouter.post('/:conversationUuid/chat', ...chatHandler) | ||
conversationsRouter.post('/:conversationUuid/evaluate', ...evaluateHandler) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.