forked from elastic/kibana
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Security Solution] [Attack discovery] Updates default Attack discove…
…ry max alerts for users still using legacy models (elastic#196939) ### [Security Solution] [Attack discovery] Updates default Attack discovery max alerts for users still using legacy models In consideration of users still using legacy models, (e.g. GPT-4 instead of GPT-4o), this PR updates `DEFAULT_ATTACK_DISCOVERY_MAX_ALERTS` from its previous value `200` in <elastic#195669> to `100`. This PR also includes additional tests. ## Desk testing 1) Navigate to Security > Attack discovery 2) Click the settings gear 3) Select any value above or below `100` in the Alerts range slider 4) Click `Reset` **Expected result** - The range slider resets to `100` (cherry picked from commit 96585a5)
- Loading branch information
1 parent
19590f9
commit 46bcf06
Showing
22 changed files
with
1,256 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
...stic_assistant/server/lib/attack_discovery/evaluation/__mocks__/mock_anonymized_alerts.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { Document } from '@langchain/core/documents'; | ||
|
||
export const mockAnonymizedAlerts: Document[] = [ | ||
{ | ||
pageContent: | ||
'@timestamp,2024-10-16T02:40:08.837Z\n_id,87c42d26897490ee02ba42ec4e872910b29f3c69bda357b8faf197b533b8528a\nevent.category,malware,intrusion_detection\nevent.dataset,endpoint.alerts\nevent.module,endpoint\nevent.outcome,success\nhost.name,f5b69281-3e7e-4b52-9225-e5c30dc29c78\nhost.os.name,Windows\nhost.os.version,21H2 (10.0.20348.1607)\nkibana.alert.original_time,2023-04-01T22:03:26.909Z\nkibana.alert.risk_score,99\nkibana.alert.rule.description,Generates a detection alert each time an Elastic Endpoint Security alert is received. Enabling this rule allows you to immediately begin investigating your Endpoint alerts.\nkibana.alert.rule.name,Malicious Behavior Detection Alert: Execution of a Windows Script File Written by a Suspicious Process\nkibana.alert.severity,critical\nkibana.alert.workflow_status,open\nmessage,Malicious Behavior Detection Alert: Execution of a Windows Script File Written by a Suspicious Process\nprocess.Ext.token.integrity_level_name,high\nprocess.args,wscript,C:\\ProgramData\\WindowsAppPool\\AppPool.vbs\nprocess.code_signature.exists,true\nprocess.code_signature.status,trusted\nprocess.code_signature.subject_name,Microsoft Windows\nprocess.code_signature.trusted,true\nprocess.command_line,wscript C:\\ProgramData\\WindowsAppPool\\AppPool.vbs\nprocess.executable,C:\\Windows\\System32\\wscript.exe\nprocess.hash.md5,3412340ca1bf2f4118cbfe98961ceeda\nprocess.hash.sha1,bcb0568cbf0af0c09b53829ce9ee8ba30db77c56\nprocess.hash.sha256,02c731754bcc8f063a8c7aa53c7b7d5773f389e17582ffaa6eaaa692da183fd7\nprocess.name,wscript.exe\nprocess.parent.args,C:\\Program Files\\Microsoft Office\\Root\\Office16\\WINWORD.EXE,/n,C:\\Users\\Administrator\\Desktop\\9828375091\\7cbad6b3f505a199d6766a86b41ed23786bbb99dab9cae6c18936afdc2512f00.doc,/o,\nprocess.parent.args_count,5\nprocess.parent.command_line,"C:\\Program Files\\Microsoft Office\\Root\\Office16\\WINWORD.EXE" /n "C:\\Users\\Administrator\\Desktop\\9828375091\\7cbad6b3f505a199d6766a86b41ed23786bbb99dab9cae6c18936afdc2512f00.doc" /o ""\nprocess.parent.executable,C:\\Program Files\\Microsoft Office\\root\\Office16\\WINWORD.EXE\nprocess.parent.name,WINWORD.EXE\nprocess.pe.original_file_name,wscript.exe\nprocess.pid,13024\nprocess.working_directory,C:\\Users\\Administrator\\Desktop\\9828375091\\\nrule.name,Execution of a Windows Script File Written by a Suspicious Process\nthreat.framework,MITRE ATT&CK,MITRE ATT&CK\nthreat.tactic.id,TA0002,TA0005\nthreat.tactic.name,Execution,Defense Evasion\nthreat.tactic.reference,https://attack.mitre.org/tactics/TA0002/,https://attack.mitre.org/tactics/TA0005/\nthreat.technique.id,T1059,T1218\nthreat.technique.name,Command and Scripting Interpreter,System Binary Proxy Execution\nthreat.technique.reference,https://attack.mitre.org/techniques/T1059/,https://attack.mitre.org/techniques/T1218/\nthreat.technique.subtechnique.id,T1059.005,T1059.007,T1059.001,T1218.005\nthreat.technique.subtechnique.name,Visual Basic,JavaScript,PowerShell,Mshta\nthreat.technique.subtechnique.reference,https://attack.mitre.org/techniques/T1059/005/,https://attack.mitre.org/techniques/T1059/007/,https://attack.mitre.org/techniques/T1059/001/,https://attack.mitre.org/techniques/T1218/005/\nuser.domain,OMM-WIN-DETECT\nuser.name,42c4e419-c859-47a5-b1cb-f069d48fa509', | ||
metadata: {}, | ||
}, | ||
{ | ||
pageContent: | ||
'@timestamp,2024-10-16T02:40:08.836Z\n_id,be6d293f9a71ba209adbcacc3ba04adfd8e9456260f6af342b7cb0478a7a144a\nevent.category,malware,intrusion_detection\nevent.dataset,endpoint.alerts\nevent.module,endpoint\nevent.outcome,success\nfile.name,AppPool.vbs\nfile.path,C:\\ProgramData\\WindowsAppPool\\AppPool.vbs\nhost.name,f5b69281-3e7e-4b52-9225-e5c30dc29c78\nhost.os.name,Windows\nhost.os.version,21H2 (10.0.20348.1607)\nkibana.alert.original_time,2023-04-01T22:03:26.747Z\nkibana.alert.risk_score,99\nkibana.alert.rule.description,Generates a detection alert each time an Elastic Endpoint Security alert is received. Enabling this rule allows you to immediately begin investigating your Endpoint alerts.\nkibana.alert.rule.name,Malicious Behavior Detection Alert: Suspicious Executable File Creation\nkibana.alert.severity,critical\nkibana.alert.workflow_status,open\nmessage,Malicious Behavior Detection Alert: Suspicious Executable File Creation\nprocess.code_signature.exists,true\nprocess.code_signature.status,trusted\nprocess.code_signature.subject_name,Microsoft Corporation\nprocess.code_signature.trusted,true\nprocess.executable,C:\\Program Files\\Microsoft Office\\root\\Office16\\WINWORD.EXE\nprocess.name,WINWORD.EXE\nprocess.pid,13036\nrule.name,Suspicious Executable File Creation\nthreat.framework,MITRE ATT&CK,MITRE ATT&CK\nthreat.tactic.id,TA0011,TA0002\nthreat.tactic.name,Command and Control,Execution\nthreat.tactic.reference,https://attack.mitre.org/tactics/TA0011/,https://attack.mitre.org/tactics/TA0002/\nthreat.technique.id,T1105,T1059\nthreat.technique.name,Ingress Tool Transfer,Command and Scripting Interpreter\nthreat.technique.reference,https://attack.mitre.org/techniques/T1105/,https://attack.mitre.org/techniques/T1059/\nthreat.technique.subtechnique.id,T1059.005,T1059.007\nthreat.technique.subtechnique.name,Visual Basic,JavaScript\nthreat.technique.subtechnique.reference,https://attack.mitre.org/techniques/T1059/005/,https://attack.mitre.org/techniques/T1059/007/\nuser.domain,OMM-WIN-DETECT\nuser.name,42c4e419-c859-47a5-b1cb-f069d48fa509', | ||
metadata: {}, | ||
}, | ||
]; | ||
|
||
export const mockAnonymizedAlertsReplacements: Record<string, string> = { | ||
'42c4e419-c859-47a5-b1cb-f069d48fa509': 'Administrator', | ||
'f5b69281-3e7e-4b52-9225-e5c30dc29c78': 'SRVWIN07', | ||
}; |
32 changes: 32 additions & 0 deletions
32
...tic_assistant/server/lib/attack_discovery/evaluation/__mocks__/mock_attack_discoveries.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { AttackDiscovery } from '@kbn/elastic-assistant-common'; | ||
|
||
export const mockAttackDiscoveries: AttackDiscovery[] = [ | ||
{ | ||
title: 'Critical Malware and Phishing Alerts on host e1cb3cf0-30f3-4f99-a9c8-518b955c6f90', | ||
alertIds: [ | ||
'4af5689eb58c2420efc0f7fad53c5bf9b8b6797e516d6ea87d6044ce25d54e16', | ||
'c675d7eb6ee181d788b474117bae8d3ed4bdc2168605c330a93dd342534fb02b', | ||
'021b27d6bee0650a843be1d511119a3b5c7c8fdaeff922471ce0248ad27bd26c', | ||
'6cc8d5f0e1c2b6c75219b001858f1be64194a97334be7a1e3572f8cfe6bae608', | ||
'f39a4013ed9609584a8a22dca902e896aa5b24d2da03e0eaab5556608fa682ac', | ||
'909968e926e08a974c7df1613d98ebf1e2422afcb58e4e994beb47b063e85080', | ||
'2c25a4dc31cd1ec254c2b19ea663fd0b09a16e239caa1218b4598801fb330da6', | ||
'3bf907becb3a4f8e39a3b673e0d50fc954a7febef30c12891744c603760e4998', | ||
], | ||
timestamp: '2024-10-10T22:59:52.749Z', | ||
detailsMarkdown: | ||
'- On `2023-06-19T00:28:38.061Z` a critical malware detection alert was triggered on host {{ host.name e1cb3cf0-30f3-4f99-a9c8-518b955c6f90 }} running {{ host.os.name macOS }} version {{ host.os.version 13.4 }}.\n- The malware was identified as {{ file.name unix1 }} with SHA256 hash {{ file.hash.sha256 0b18d6880dc9670ab2b955914598c96fc3d0097dc40ea61157b8c79e75edf231 }}.\n- The process {{ process.name My Go Application.app }} was executed with command line {{ process.command_line /private/var/folders/_b/rmcpc65j6nv11ygrs50ctcjr0000gn/T/AppTranslocation/6D63F08A-011C-4511-8556-EAEF9AFD6340/d/Setup.app/Contents/MacOS/My Go Application.app }}.\n- The process was not trusted as its code signature failed to satisfy specified code requirements.\n- The user involved was {{ user.name 039c15c5-3964-43e7-a891-42fe2ceeb9ff }}.\n- Another critical alert was triggered for potential credentials phishing via {{ process.name osascript }} on the same host.\n- The phishing attempt involved displaying a dialog to capture the user\'s password.\n- The process {{ process.name osascript }} was executed with command line {{ process.command_line osascript -e display dialog "MacOS wants to access System Preferences\\n\\nPlease enter your password." with title "System Preferences" with icon file "System:Library:CoreServices:CoreTypes.bundle:Contents:Resources:ToolbarAdvanced.icns" default answer "" giving up after 30 with hidden answer ¬ }}.\n- The MITRE ATT&CK tactics involved include Credential Access and Input Capture.', | ||
summaryMarkdown: | ||
'Critical malware and phishing alerts detected on {{ host.name e1cb3cf0-30f3-4f99-a9c8-518b955c6f90 }} involving user {{ user.name 039c15c5-3964-43e7-a891-42fe2ceeb9ff }}. Malware identified as {{ file.name unix1 }} and phishing attempt via {{ process.name osascript }}.', | ||
mitreAttackTactics: ['Credential Access', 'Input Capture'], | ||
entitySummaryMarkdown: | ||
'Critical malware and phishing alerts detected on {{ host.name e1cb3cf0-30f3-4f99-a9c8-518b955c6f90 }} involving user {{ user.name 039c15c5-3964-43e7-a891-42fe2ceeb9ff }}.', | ||
}, | ||
]; |
26 changes: 26 additions & 0 deletions
26
...c_assistant/server/lib/attack_discovery/evaluation/__mocks__/mock_experiment_connector.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { Connector } from '@kbn/actions-plugin/server/application/connector/types'; | ||
|
||
export const mockExperimentConnector: Connector = { | ||
name: 'Gemini 1.5 Pro 002', | ||
actionTypeId: '.gemini', | ||
config: { | ||
apiUrl: 'https://example.com', | ||
defaultModel: 'gemini-1.5-pro-002', | ||
gcpRegion: 'test-region', | ||
gcpProjectID: 'test-project-id', | ||
}, | ||
secrets: { | ||
credentialsJson: '{}', | ||
}, | ||
id: 'gemini-1-5-pro-002', | ||
isPreconfigured: true, | ||
isSystemAction: false, | ||
isDeprecated: false, | ||
} as Connector; |
143 changes: 143 additions & 0 deletions
143
x-pack/plugins/elastic_assistant/server/lib/attack_discovery/evaluation/index.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { ActionsClient } from '@kbn/actions-plugin/server'; | ||
import type { Connector } from '@kbn/actions-plugin/server/application/connector/types'; | ||
import { elasticsearchServiceMock } from '@kbn/core-elasticsearch-server-mocks'; | ||
import type { ActionsClientLlm } from '@kbn/langchain/server'; | ||
import { getLangSmithTracer } from '@kbn/langchain/server/tracers/langsmith'; | ||
import { loggerMock } from '@kbn/logging-mocks'; | ||
import type { LangChainTracer } from '@langchain/core/tracers/tracer_langchain'; | ||
|
||
import { evaluateAttackDiscovery } from '.'; | ||
import { DefaultAttackDiscoveryGraph } from '../graphs/default_attack_discovery_graph'; | ||
import { AttackDiscoveryGraphMetadata } from '../../langchain/graphs'; | ||
import { mockExperimentConnector } from './__mocks__/mock_experiment_connector'; | ||
import { getLlmType } from '../../../routes/utils'; | ||
|
||
jest.mock('@kbn/langchain/server', () => ({ | ||
...jest.requireActual('@kbn/langchain/server'), | ||
|
||
ActionsClientLlm: jest.fn(), | ||
})); | ||
|
||
jest.mock('langsmith/evaluation', () => ({ | ||
evaluate: jest.fn(async (predict: Function) => | ||
predict({ | ||
overrides: { | ||
errors: ['test-error'], | ||
}, | ||
}) | ||
), | ||
})); | ||
|
||
jest.mock('./helpers/get_custom_evaluator', () => ({ | ||
getCustomEvaluator: jest.fn(), | ||
})); | ||
|
||
jest.mock('./helpers/get_evaluator_llm', () => { | ||
const mockLlm = jest.fn() as unknown as ActionsClientLlm; | ||
|
||
return { | ||
getEvaluatorLlm: jest.fn().mockResolvedValue(mockLlm), | ||
}; | ||
}); | ||
|
||
const actionsClient = { | ||
get: jest.fn(), | ||
} as unknown as ActionsClient; | ||
const alertsIndexPattern = 'test-alerts-index-pattern'; | ||
const connectorTimeout = 1000; | ||
const datasetName = 'test-dataset'; | ||
const evaluationId = 'test-evaluation-id'; | ||
const evaluatorConnectorId = 'test-evaluator-connector-id'; | ||
const langSmithApiKey = 'test-api-key'; | ||
const langSmithProject = 'test-lang-smith-project'; | ||
const logger = loggerMock.create(); | ||
const mockEsClient = elasticsearchServiceMock.createElasticsearchClient(); | ||
const runName = 'test-run-name'; | ||
|
||
const connectors = [mockExperimentConnector]; | ||
|
||
const projectName = 'test-lang-smith-project'; | ||
|
||
const graphs: Array<{ | ||
connector: Connector; | ||
graph: DefaultAttackDiscoveryGraph; | ||
llmType: string | undefined; | ||
name: string; | ||
traceOptions: { | ||
projectName: string | undefined; | ||
tracers: LangChainTracer[]; | ||
}; | ||
}> = connectors.map((connector) => { | ||
const llmType = getLlmType(connector.actionTypeId); | ||
|
||
const traceOptions = { | ||
projectName, | ||
tracers: [ | ||
...getLangSmithTracer({ | ||
apiKey: langSmithApiKey, | ||
projectName, | ||
logger, | ||
}), | ||
], | ||
}; | ||
|
||
const graph = { | ||
invoke: jest.fn().mockResolvedValue({}), | ||
} as unknown as DefaultAttackDiscoveryGraph; | ||
|
||
return { | ||
connector, | ||
graph, | ||
llmType, | ||
name: `${runName} - ${connector.name} - ${evaluationId} - Attack discovery`, | ||
traceOptions, | ||
}; | ||
}); | ||
|
||
const attackDiscoveryGraphs: AttackDiscoveryGraphMetadata[] = [ | ||
{ | ||
getDefaultAttackDiscoveryGraph: jest.fn().mockReturnValue(graphs[0].graph), | ||
graphType: 'attack-discovery', | ||
}, | ||
]; | ||
|
||
describe('evaluateAttackDiscovery', () => { | ||
beforeEach(() => jest.clearAllMocks()); | ||
|
||
it('evaluates the attack discovery graphs', async () => { | ||
await evaluateAttackDiscovery({ | ||
actionsClient, | ||
attackDiscoveryGraphs, | ||
alertsIndexPattern, | ||
connectors, | ||
connectorTimeout, | ||
datasetName, | ||
esClient: mockEsClient, | ||
evaluationId, | ||
evaluatorConnectorId, | ||
langSmithApiKey, | ||
langSmithProject, | ||
logger, | ||
runName, | ||
size: 20, | ||
}); | ||
|
||
expect(graphs[0].graph.invoke).toHaveBeenCalledWith( | ||
{ | ||
errors: ['test-error'], | ||
}, | ||
{ | ||
callbacks: [...graphs[0].traceOptions.tracers], | ||
runName: graphs[0].name, | ||
tags: ['evaluation', graphs[0].llmType ?? ''], | ||
} | ||
); | ||
}); | ||
}); |
141 changes: 141 additions & 0 deletions
141
...ns/elastic_assistant/server/lib/attack_discovery/evaluation/run_evaluations/index.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { ActionsClient } from '@kbn/actions-plugin/server'; | ||
import type { Connector } from '@kbn/actions-plugin/server/application/connector/types'; | ||
import type { ActionsClientLlm } from '@kbn/langchain/server'; | ||
import { getLangSmithTracer } from '@kbn/langchain/server/tracers/langsmith'; | ||
import { loggerMock } from '@kbn/logging-mocks'; | ||
import type { LangChainTracer } from '@langchain/core/tracers/tracer_langchain'; | ||
|
||
import { runEvaluations } from '.'; | ||
import { type DefaultAttackDiscoveryGraph } from '../../graphs/default_attack_discovery_graph'; | ||
import { mockExperimentConnector } from '../__mocks__/mock_experiment_connector'; | ||
import { getLlmType } from '../../../../routes/utils'; | ||
|
||
jest.mock('@kbn/langchain/server', () => ({ | ||
...jest.requireActual('@kbn/langchain/server'), | ||
|
||
ActionsClientLlm: jest.fn(), | ||
})); | ||
|
||
jest.mock('langsmith/evaluation', () => ({ | ||
evaluate: jest.fn(async (predict: Function) => | ||
predict({ | ||
overrides: { | ||
errors: ['test-error'], | ||
}, | ||
}) | ||
), | ||
})); | ||
|
||
jest.mock('../helpers/get_custom_evaluator', () => ({ | ||
getCustomEvaluator: jest.fn(), | ||
})); | ||
|
||
jest.mock('../helpers/get_evaluator_llm', () => { | ||
const mockLlm = jest.fn() as unknown as ActionsClientLlm; | ||
|
||
return { | ||
getEvaluatorLlm: jest.fn().mockResolvedValue(mockLlm), | ||
}; | ||
}); | ||
|
||
const actionsClient = { | ||
get: jest.fn(), | ||
} as unknown as ActionsClient; | ||
const connectorTimeout = 1000; | ||
const datasetName = 'test-dataset'; | ||
const evaluatorConnectorId = 'test-evaluator-connector-id'; | ||
const langSmithApiKey = 'test-api-key'; | ||
const logger = loggerMock.create(); | ||
const connectors = [mockExperimentConnector]; | ||
|
||
const projectName = 'test-lang-smith-project'; | ||
|
||
const graphs: Array<{ | ||
connector: Connector; | ||
graph: DefaultAttackDiscoveryGraph; | ||
llmType: string | undefined; | ||
name: string; | ||
traceOptions: { | ||
projectName: string | undefined; | ||
tracers: LangChainTracer[]; | ||
}; | ||
}> = connectors.map((connector) => { | ||
const llmType = getLlmType(connector.actionTypeId); | ||
|
||
const traceOptions = { | ||
projectName, | ||
tracers: [ | ||
...getLangSmithTracer({ | ||
apiKey: langSmithApiKey, | ||
projectName, | ||
logger, | ||
}), | ||
], | ||
}; | ||
|
||
const graph = { | ||
invoke: jest.fn().mockResolvedValue({}), | ||
} as unknown as DefaultAttackDiscoveryGraph; | ||
|
||
return { | ||
connector, | ||
graph, | ||
llmType, | ||
name: `testRunName - ${connector.name} - testEvaluationId - Attack discovery`, | ||
traceOptions, | ||
}; | ||
}); | ||
|
||
describe('runEvaluations', () => { | ||
beforeEach(() => jest.clearAllMocks()); | ||
|
||
it('predict() invokes the graph with the expected overrides', async () => { | ||
await runEvaluations({ | ||
actionsClient, | ||
connectorTimeout, | ||
datasetName, | ||
evaluatorConnectorId, | ||
graphs, | ||
langSmithApiKey, | ||
logger, | ||
}); | ||
|
||
expect(graphs[0].graph.invoke).toHaveBeenCalledWith( | ||
{ | ||
errors: ['test-error'], | ||
}, | ||
{ | ||
callbacks: [...graphs[0].traceOptions.tracers], | ||
runName: graphs[0].name, | ||
tags: ['evaluation', graphs[0].llmType ?? ''], | ||
} | ||
); | ||
}); | ||
|
||
it('catches and logs errors that occur during evaluation', async () => { | ||
const error = new Error('Test error'); | ||
|
||
(graphs[0].graph.invoke as jest.Mock).mockRejectedValue(error); | ||
|
||
await runEvaluations({ | ||
actionsClient, | ||
connectorTimeout, | ||
datasetName, | ||
evaluatorConnectorId, | ||
graphs, | ||
langSmithApiKey, | ||
logger, | ||
}); | ||
|
||
expect(logger.error).toHaveBeenCalledWith( | ||
'Error evaluating connector "Gemini 1.5 Pro 002" (gemini), running experiment "testRunName - Gemini 1.5 Pro 002 - testEvaluationId - Attack discovery": Error: Test error' | ||
); | ||
}); | ||
}); |
Oops, something went wrong.