From aaa5ad6b39e23e7aa29c73333154761767729f46 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Mon, 2 Dec 2024 10:36:56 -0700 Subject: [PATCH] refactor: comply with latest api spec --- src/agentTester.ts | 181 +++++++----------- test/agentTester.test.ts | 7 +- test/mocks/einstein_ai-evaluations_runs.json | 3 +- ...tions_runs_4KBSM000000003F4AQ_details.json | 103 +++++++--- 4 files changed, 145 insertions(+), 149 deletions(-) diff --git a/src/agentTester.ts b/src/agentTester.ts index 8b8dc17..5062ebb 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -8,47 +8,77 @@ import { Connection, Lifecycle, PollingClient, StatusResult } from '@salesforce/ import { Duration } from '@salesforce/kit'; import { MaybeMock } from './maybe-mock'; -type Format = 'human' | 'tap' | 'junit' | 'json'; +type Format = 'human' | 'json'; + +type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; type AgentTestStartResponse = { - id: string; + aiEvaluationId: string; + status: TestStatus; }; type AgentTestStatusResponse = { - status: 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; + status: TestStatus; startTime: string; endTime?: string; errorMessage?: string; }; -type AgentTestDetailsResponse = { - AiEvaluationSuiteDefinition: string; - tests: Array<{ - AiEvaluationDefinition: string; - results: Array<{ - test_number: number; - results: Array<{ - name: string; - actual: string[]; - is_pass: boolean; - execution_time_ms: number; - error?: string; - }>; - }>; +type TestCaseResult = { + status: TestStatus; + number: string; + startTime: string; + endTime?: string; + generatedData: { + type: 'AGENT'; + actionsSequence: string[]; + outcome: 'Success' | 'Failure'; + topic: string; + inputTokensCount: string; + outputTokensCount: string; + }; + expectationResults: Array<{ + name: string; + actualValue: string; + expectedValue: string; + score: number; + result: 'Passed' | 'Failed'; + metricLabel: 'Accuracy' | 'Precision'; + metricExplainability: string; + status: TestStatus; + startTime: string; + endTime?: string; + errorCode?: string; + errorMessage?: string; }>; }; +type AgentTestDetailsResponse = { + status: TestStatus; + startTime: string; + endTime?: string; + errorMessage?: string; + testCases: TestCaseResult[]; +}; + export class AgentTester { private maybeMock: MaybeMock; public constructor(connection: Connection) { this.maybeMock = new MaybeMock(connection); } - public async start(suiteId: string): Promise<{ id: string }> { + /** + * Starts an AI evaluation run based on the provided name or ID. + * + * @param nameOrId - The name or ID of the AI evaluation definition. + * @param type - Specifies whether the provided identifier is a 'name' or 'id'. Defaults to 'name'. If 'name' is provided, nameOrId is treated as the name of the AiEvaluationDefinition. If 'id' is provided, nameOrId is treated as the unique ID of the AiEvaluationDefinition. + * @returns A promise that resolves to an object containing the ID of the started AI evaluation run. + */ + public async start(nameOrId: string, type: 'name' | 'id' = 'name'): Promise<{ aiEvaluationId: string }> { const url = '/einstein/ai-evaluations/runs'; return this.maybeMock.request('POST', url, { - aiEvaluationSuiteDefinition: suiteId, + [type === 'name' ? 'aiEvaluationDefinitionName' : 'aiEvaluationDefinitionVersionId']: nameOrId, }); } @@ -100,14 +130,7 @@ export class AgentTester { const response = await this.maybeMock.request('GET', url); return { response, - formatted: - format === 'human' - ? await humanFormat(response) - : format === 'tap' - ? await tapFormat(response) - : format === 'junit' - ? await junitFormat(response) - : await jsonFormat(response), + formatted: format === 'human' ? await humanFormat(jobId, response) : await jsonFormat(response), }; } @@ -118,100 +141,30 @@ export class AgentTester { } } -export async function humanFormat(details: AgentTestDetailsResponse): Promise { - // TODO: the api response isn't finalized so this is just a POC +export async function humanFormat(name: string, details: AgentTestDetailsResponse): Promise { const { Ux } = await import('@salesforce/sf-plugins-core'); const ux = new Ux(); + const tables: string[] = []; - for (const aiEvalDef of details.tests) { - for (const result of aiEvalDef.results) { - const table = ux.makeTable({ - title: `Test Results for ${aiEvalDef.AiEvaluationDefinition} (#${result.test_number})`, - data: result.results.map((r) => ({ - 'TEST NAME': r.name, - OUTCOME: r.is_pass ? 'Pass' : 'Fail', - MESSAGE: r.error ?? '', - 'RUNTIME (MS)': r.execution_time_ms, - })), - }); - tables.push(table); - } + for (const testCase of details.testCases) { + const table = ux.makeTable({ + title: `Test Case #${testCase.number}`, + data: testCase.expectationResults.map((r) => ({ + name: r.name, + outcome: r.result === 'Passed' ? 'Pass' : 'Fail', + actualValue: r.actualValue, + expectedValue: r.expectedValue, + score: r.score, + 'metric label': r.metricLabel, + message: r.errorMessage ?? '', + 'runtime (MS)': r.endTime ? new Date(r.endTime).getTime() - new Date(r.startTime).getTime() : 0, + })), + }); + tables.push(table); } - return tables.join('\n'); } -export async function junitFormat(details: AgentTestDetailsResponse): Promise { - // APEX EXAMPLE - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - // - await Promise.reject(new Error('Not implemented')); - return JSON.stringify(details, null, 2); -} - -export async function tapFormat(details: AgentTestDetailsResponse): Promise { - // APEX EXAMPLE (these are streamed in chunks) - // 1..11 - // ok 1 TestPropertyController.testGetPagedPropertyList - // ok 2 TestPropertyController.testGetPicturesNoResults - // ok 3 TestPropertyController.testGetPicturesWithResults - // ok 4 FileUtilitiesTest.createFileFailsWhenIncorrectBase64Data - // ok 5 FileUtilitiesTest.createFileFailsWhenIncorrectFilename - // ok 6 FileUtilitiesTest.createFileFailsWhenIncorrectRecordId - // ok 7 FileUtilitiesTest.createFileSucceedsWhenCorrectInput - // ok 8 TestSampleDataController.importSampleData - // ok 9 GeocodingServiceTest.blankAddress - // ok 10 GeocodingServiceTest.errorResponse - // ok 11 GeocodingServiceTest.successResponse - // # Run "sf apex get test -i 707Ei00000dUJry -o test-mgoe8ogsltwe@example.com --result-format " to retrieve test results in a different format. - await Promise.reject(new Error('Not implemented')); - return JSON.stringify(details, null, 2); -} - export async function jsonFormat(details: AgentTestDetailsResponse): Promise { return Promise.resolve(JSON.stringify(details, null, 2)); } diff --git a/test/agentTester.test.ts b/test/agentTester.test.ts index 1281163..7f55a73 100644 --- a/test/agentTester.test.ts +++ b/test/agentTester.test.ts @@ -57,8 +57,9 @@ describe('AgentTester', () => { const output = await tester.poll('4KBSM000000003F4AQ'); expect(output).to.be.ok; // TODO: make these assertions more meaningful - expect(output.formatted).to.include('Test Results for my first test'); - expect(output.response.tests[0].results[0].results[0].is_pass).to.be.true; + expect(output.formatted).to.include('Test Case #1'); + expect(output.formatted).to.include('Test Case #2'); + expect(output.response.testCases[0].status).to.equal('Completed'); }); it('should poll until test run is complete (json format)', async () => { @@ -68,7 +69,7 @@ describe('AgentTester', () => { expect(output).to.be.ok; // TODO: make these assertions more meaningful expect(JSON.parse(output.formatted)).to.deep.equal(output.response); - expect(output.response.tests[0].results[0].results[0].is_pass).to.be.true; + expect(output.response.testCases[0].status).to.equal('Completed'); }); }); diff --git a/test/mocks/einstein_ai-evaluations_runs.json b/test/mocks/einstein_ai-evaluations_runs.json index 3c37def..87f063c 100644 --- a/test/mocks/einstein_ai-evaluations_runs.json +++ b/test/mocks/einstein_ai-evaluations_runs.json @@ -1,3 +1,4 @@ { - "id": "4KBSM000000003F4AQ" + "aiEvaluationId": "4KBSM000000003F4AQ", + "status": "NEW" } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index d0a8a06..b7dd417 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -1,39 +1,80 @@ { - "AiEvaluationSuiteDefinition": "", - "tests": [ + "status": "Completed", + "startTime": "2024-11-28T12:00:00Z", + "endTime": "2024-11-28T12:05:00Z", + "errorMessage": null, + "testCases": [ { - "AiEvaluationDefinition": "my first test", - "results": [ + "status": "Completed", + "number": 1, + "startTime": "2024-11-28T12:00:10Z", + "endTime": "2024-11-28T12:00:20Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action1", "Action2"], + "outcome": "Success", + "topic": "Mathematics", + "inputTokensCount": 50, + "outputTokensCount": 55 + }, + "expectationResults": [ { - "test_number": 1, - "results": [ - { - "name": "action_assertion", - "actual": ["Identify Record by Name", "Get Record Details"], - "is_pass": true, - "execution_time_ms": 3000, - "error": "" - }, - { - "name": "action_assertion", - "actual": ["Identify Record by Name", "Get Record Details"], - "is_pass": false, - "execution_time_ms": 3000, - "error": "assertion failed" - } - ] + "name": "topic_sequence_match", + "actualValue": "Result A", + "expectedValue": "Result A", + "score": 1.0, + "result": "Passed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:12Z", + "endTime": "2024-11-28T12:00:13Z", + "errorCode": null, + "errorMessage": null }, { - "test_number": 2, - "results": [ - { - "name": "action_assertion", - "actual": ["Identify Record by Name", "Get Record Details"], - "is_pass": true, - "execution_time_ms": 3000, - "error": "" - } - ] + "name": "action_sequence_match", + "actualValue": "Result B", + "expectedValue": "Result B", + "score": 0.9, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + } + ] + }, + { + "status": "Failed", + "number": 2, + "startTime": "2024-11-28T12:00:30Z", + "endTime": "2024-11-28T12:00:40Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action3", "Action4"], + "outcome": "Failure", + "topic": "Physics", + "inputTokensCount": 60, + "outputTokensCount": 50 + }, + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": null } ] }