diff --git a/package.json b/package.json index 2bf9395..b8b58b1 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.5.0", + "@salesforce/agents": "^0.5.1", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index e67de4d..ae4e91f 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -23,17 +23,24 @@ "subjectName": { "type": "string" }, - "testSetName": { - "type": "string" - }, - "testCases": { - "type": "array", - "items": { - "$ref": "#/definitions/TestCaseResult" - } + "testSet": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "testCases": { + "type": "array", + "items": { + "$ref": "#/definitions/TestCaseResult" + } + } + }, + "required": ["name", "testCases"], + "additionalProperties": false } }, - "required": ["status", "startTime", "subjectName", "testSetName", "testCases"], + "required": ["status", "startTime", "subjectName", "testSet"], "additionalProperties": false }, "TestStatus": { diff --git a/test/commands/agent/test/results.nut.ts b/test/commands/agent/test/results.nut.ts index 5004e91..e59eaf3 100644 --- a/test/commands/agent/test/results.nut.ts +++ b/test/commands/agent/test/results.nut.ts @@ -47,7 +47,7 @@ describe('agent test results NUTs', () => { ).jsonOutput; expect(output?.result.status).to.equal('COMPLETED'); - expect(output?.result.testCases.length).to.equal(2); + expect(output?.result.testSet.testCases.length).to.equal(2); // check that cache does not have an entry const cache = await AgentTestCache.create(); diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index 5309372..c154dbd 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -4,81 +4,97 @@ "endTime": "2024-11-28T12:05:00Z", "errorMessage": null, "subjectName": "Copilot_for_Salesforce", - "testSetName": "CRM_Sanity_v1", - "testCases": [ - { - "status": "COMPLETED", - "number": 1, - "startTime": "2024-11-28T12:00:10Z", - "endTime": "2024-11-28T12:00:20Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action1", "Action2"], - "outcome": "Success", - "topic": "Mathematics", - "inputTokensCount": 50, - "outputTokensCount": 55 - }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result A", - "expectedValue": "Result A", - "score": 1.0, - "result": "Passed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:12Z", - "endTime": "2024-11-28T12:00:13Z", - "errorCode": null, - "errorMessage": null + "testSet": { + "name": "CRM_Sanity_v1", + "testCases": [ + { + "status": "COMPLETED", + "number": 1, + "startTime": "2024-11-28T12:00:10Z", + "endTime": "2024-11-28T12:00:20Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action1", "Action2"], + "outcome": "Success", + "topic": "Mathematics", + "inputTokensCount": 50, + "outputTokensCount": 55 }, - { - "name": "action_sequence_match", - "actualValue": "Result B", - "expectedValue": "Result B", - "score": 0.9, - "result": "Passed", - "metricLabel": "Precision", - "metricExplainability": "Measures the precision of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:14Z", - "endTime": "2024-11-28T12:00:15Z", - "errorCode": null, - "errorMessage": null - } - ] - }, - { - "status": "ERROR", - "number": 2, - "startTime": "2024-11-28T12:00:30Z", - "endTime": "2024-11-28T12:00:40Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action3", "Action4"], - "outcome": "Failure", - "topic": "Physics", - "inputTokensCount": 60, - "outputTokensCount": 50 + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result A", + "expectedValue": "Result A", + "score": 1.0, + "result": "Passed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:12Z", + "endTime": "2024-11-28T12:00:13Z", + "errorCode": null, + "errorMessage": null + }, + { + "name": "action_sequence_match", + "actualValue": "Result B", + "expectedValue": "Result B", + "score": 0.9, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + } + ] }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", - "score": 0.5, - "result": "Failed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:32Z", - "endTime": "2024-11-28T12:00:33Z", - "errorCode": null, - "errorMessage": null - } - ] - } - ] + { + "status": "ERROR", + "number": 2, + "startTime": "2024-11-28T12:00:30Z", + "endTime": "2024-11-28T12:00:40Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action3", "Action4"], + "outcome": "Failure", + "topic": "Physics", + "inputTokensCount": 60, + "outputTokensCount": 50 + }, + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + }, + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + } + ] + } + ] + } } diff --git a/yarn.lock b/yarn.lock index d64cc72..1d9e383 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1389,10 +1389,10 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.5.0": - version "0.5.0" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.0.tgz#f6e4106e4796dde6bc1c5d4045511fd7d27a4640" - integrity sha512-xmPCC3yOXFgsG0Mrt+BDRBVibJzHzBHlKws7szEeNY5it9g2rt0Knl/KzZZTDz9hGAkNCd94T4luCt653l7Pbg== +"@salesforce/agents@^0.5.1": + version "0.5.1" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.1.tgz#b6de16004505432c226c02f612c6b0b7b6227f6f" + integrity sha512-FGpCQ3PVzZunoaQVPAJG05eqafOvf2P7fx2w5aZYVg9yqwM/UnBpTBKVvkmdZDsBRTUYaExr6tvboaMc5Hsfzw== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0"