-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[8.15] [Automatic Import] Add support for handling unstructured syslo…
…g samples (#192817) (#193158) # Backport This will backport the following commits from `main` to `8.15`: - [[Automatic Import] Add support for handling unstructured syslog samples (#192817)](#192817) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Bharat Pasupula","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-09-17T12:28:01Z","message":"[Automatic Import] Add support for handling unstructured syslog samples (#192817)\n\n## Summary\r\n\r\nThis PR handles the `unstructured` syslog samples in Automatic Import.\r\n\r\nExamples of unstructured samples would be:\r\n\r\n```\r\n<34>Oct 11 00:14:05 mymachine su: 'su root' failed for user on /dev/pts/8\r\n<34>Dec 11 00:14:43 yourmachine su: 'su root' failed for someone on /dev/pts/5\r\n<34>Apr 11 00:14:05 mymachine su: 'su root' failed for otheruser on /dev/pts/3\r\n```\r\n\r\n\r\nhttps://github.com/user-attachments/assets/d1381ac9-4889-42cf-b3c1-d1b7a88def02\r\n\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n\r\n### For maintainers\r\n\r\n- [ ] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"77fe423f7b621b2ece51ca44544c430256437802","branchLabelMapping":{"^v9.0.0$":"main","^v8.16.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:enhancement","enhancement","v9.0.0","backport:prev-major","Team:Security-Scalability","Feature:AutomaticImport"],"title":"[Automatic Import] Add support for handling unstructured syslog samples","number":192817,"url":"https://github.com/elastic/kibana/pull/192817","mergeCommit":{"message":"[Automatic Import] Add support for handling unstructured syslog samples (#192817)\n\n## Summary\r\n\r\nThis PR handles the `unstructured` syslog samples in Automatic Import.\r\n\r\nExamples of unstructured samples would be:\r\n\r\n```\r\n<34>Oct 11 00:14:05 mymachine su: 'su root' failed for user on /dev/pts/8\r\n<34>Dec 11 00:14:43 yourmachine su: 'su root' failed for someone on /dev/pts/5\r\n<34>Apr 11 00:14:05 mymachine su: 'su root' failed for otheruser on /dev/pts/3\r\n```\r\n\r\n\r\nhttps://github.com/user-attachments/assets/d1381ac9-4889-42cf-b3c1-d1b7a88def02\r\n\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n\r\n### For maintainers\r\n\r\n- [ ] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"77fe423f7b621b2ece51ca44544c430256437802"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/192817","number":192817,"mergeCommit":{"message":"[Automatic Import] Add support for handling unstructured syslog samples (#192817)\n\n## Summary\r\n\r\nThis PR handles the `unstructured` syslog samples in Automatic Import.\r\n\r\nExamples of unstructured samples would be:\r\n\r\n```\r\n<34>Oct 11 00:14:05 mymachine su: 'su root' failed for user on /dev/pts/8\r\n<34>Dec 11 00:14:43 yourmachine su: 'su root' failed for someone on /dev/pts/5\r\n<34>Apr 11 00:14:05 mymachine su: 'su root' failed for otheruser on /dev/pts/3\r\n```\r\n\r\n\r\nhttps://github.com/user-attachments/assets/d1381ac9-4889-42cf-b3c1-d1b7a88def02\r\n\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n\r\n### For maintainers\r\n\r\n- [ ] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"77fe423f7b621b2ece51ca44544c430256437802"}}]}] BACKPORT--> Co-authored-by: Bharat Pasupula <[email protected]>
- Loading branch information
1 parent
47f8303
commit 74d5d30
Showing
22 changed files
with
633 additions
and
15 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
x-pack/plugins/integration_assistant/__jest__/fixtures/unstructured.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
export const unstructuredLogState = { | ||
lastExecutedChain: 'testchain', | ||
packageName: 'testPackage', | ||
dataStreamName: 'testDatastream', | ||
grokPatterns: ['%{GREEDYDATA:message}'], | ||
logSamples: ['dummy data'], | ||
jsonSamples: ['{"message":"dummy data"}'], | ||
finalized: false, | ||
ecsVersion: 'testVersion', | ||
errors: { test: 'testerror' }, | ||
additionalProcessors: [], | ||
}; | ||
|
||
export const unstructuredLogResponse = { | ||
grok_patterns: [ | ||
'####<%{MONTH} %{MONTHDAY}, %{YEAR} %{TIME} (?:AM|PM) %{WORD:timezone}> <%{WORD:log_level}> <%{WORD:component}> <%{DATA:hostname}> <%{DATA:server_name}> <%{DATA:thread_info}> <%{DATA:user}> <%{DATA:empty_field}> <%{DATA:empty_field2}> <%{NUMBER:timestamp}> <%{DATA:message_id}> <%{GREEDYDATA:message}>', | ||
], | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
x-pack/plugins/integration_assistant/server/graphs/unstructured/constants.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
export const GROK_EXAMPLE_ANSWER = { | ||
rfc: 'RFC2454', | ||
regex: | ||
'/(?:(d{4}[-]d{2}[-]d{2}[T]d{2}[:]d{2}[:]d{2}(?:.d{1,6})?(?:[+-]d{2}[:]d{2}|Z)?)|-)s(?:([w][wd.@-]*)|-)s(.*)$/', | ||
grok_patterns: ['%{WORD:key1}:%{WORD:value1};%{WORD:key2}:%{WORD:value2}:%{GREEDYDATA:message}'], | ||
}; | ||
|
||
export const GROK_ERROR_EXAMPLE_ANSWER = { | ||
grok_patterns: [ | ||
'%{TIMESTAMP:timestamp}:%{WORD:value1};%{WORD:key2}:%{WORD:value2}:%{GREEDYDATA:message}', | ||
], | ||
}; | ||
|
||
export const onFailure = { | ||
append: { | ||
field: 'error.message', | ||
value: | ||
'{% raw %}Processor {{{_ingest.on_failure_processor_type}}} with tag {{{_ingest.on_failure_processor_tag}}} in pipeline {{{_ingest.on_failure_pipeline}}} failed with message: {{{_ingest.on_failure_message}}}{% endraw %}', | ||
}, | ||
}; |
32 changes: 32 additions & 0 deletions
32
x-pack/plugins/integration_assistant/server/graphs/unstructured/error.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { JsonOutputParser } from '@langchain/core/output_parsers'; | ||
import type { UnstructuredLogState } from '../../types'; | ||
import type { HandleUnstructuredNodeParams } from './types'; | ||
import { GROK_ERROR_PROMPT } from './prompts'; | ||
import { GROK_ERROR_EXAMPLE_ANSWER } from './constants'; | ||
|
||
export async function handleUnstructuredError({ | ||
state, | ||
model, | ||
}: HandleUnstructuredNodeParams): Promise<Partial<UnstructuredLogState>> { | ||
const outputParser = new JsonOutputParser(); | ||
const grokErrorGraph = GROK_ERROR_PROMPT.pipe(model).pipe(outputParser); | ||
const currentPatterns = state.grokPatterns; | ||
|
||
const pattern = await grokErrorGraph.invoke({ | ||
current_pattern: JSON.stringify(currentPatterns, null, 2), | ||
errors: JSON.stringify(state.errors, null, 2), | ||
ex_answer: JSON.stringify(GROK_ERROR_EXAMPLE_ANSWER, null, 2), | ||
}); | ||
|
||
return { | ||
grokPatterns: pattern.grok_patterns, | ||
lastExecutedChain: 'unstructuredError', | ||
}; | ||
} |
40 changes: 40 additions & 0 deletions
40
x-pack/plugins/integration_assistant/server/graphs/unstructured/errors.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { FakeLLM } from '@langchain/core/utils/testing'; | ||
import { handleUnstructuredError } from './error'; | ||
import type { UnstructuredLogState } from '../../types'; | ||
import { | ||
unstructuredLogState, | ||
unstructuredLogResponse, | ||
} from '../../../__jest__/fixtures/unstructured'; | ||
import { | ||
ActionsClientChatOpenAI, | ||
ActionsClientSimpleChatModel, | ||
} from '@kbn/langchain/server/language_models'; | ||
import { IScopedClusterClient } from '@kbn/core-elasticsearch-server'; | ||
|
||
const model = new FakeLLM({ | ||
response: JSON.stringify(unstructuredLogResponse, null, 2), | ||
}) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel; | ||
|
||
const state: UnstructuredLogState = unstructuredLogState; | ||
|
||
describe('Testing unstructured error handling node', () => { | ||
const client = { | ||
asCurrentUser: { | ||
ingest: { | ||
simulate: jest.fn(), | ||
}, | ||
}, | ||
} as unknown as IScopedClusterClient; | ||
it('handleUnstructuredError()', async () => { | ||
const response = await handleUnstructuredError({ state, model, client }); | ||
expect(response.grokPatterns).toStrictEqual(unstructuredLogResponse.grok_patterns); | ||
expect(response.lastExecutedChain).toBe('unstructuredError'); | ||
}); | ||
}); |
39 changes: 39 additions & 0 deletions
39
x-pack/plugins/integration_assistant/server/graphs/unstructured/graph.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { | ||
ActionsClientChatOpenAI, | ||
ActionsClientSimpleChatModel, | ||
} from '@kbn/langchain/server/language_models'; | ||
import { FakeLLM } from '@langchain/core/utils/testing'; | ||
import { getUnstructuredGraph } from './graph'; | ||
import { IScopedClusterClient } from '@kbn/core-elasticsearch-server'; | ||
|
||
const model = new FakeLLM({ | ||
response: '{"log_type": "structured"}', | ||
}) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel; | ||
|
||
describe('UnstructuredGraph', () => { | ||
const client = { | ||
asCurrentUser: { | ||
ingest: { | ||
simulate: jest.fn(), | ||
}, | ||
}, | ||
} as unknown as IScopedClusterClient; | ||
describe('Compiling and Running', () => { | ||
it('Ensures that the graph compiles', async () => { | ||
// When getUnstructuredGraph runs, langgraph compiles the graph it will error if the graph has any issues. | ||
// Common issues for example detecting a node has no next step, or there is a infinite loop between them. | ||
try { | ||
await getUnstructuredGraph({ model, client }); | ||
} catch (error) { | ||
fail(`getUnstructuredGraph threw an error: ${error}`); | ||
} | ||
}); | ||
}); | ||
}); |
112 changes: 112 additions & 0 deletions
112
x-pack/plugins/integration_assistant/server/graphs/unstructured/graph.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { StateGraphArgs } from '@langchain/langgraph'; | ||
import { StateGraph, END, START } from '@langchain/langgraph'; | ||
import type { UnstructuredLogState } from '../../types'; | ||
import { handleUnstructured } from './unstructured'; | ||
import type { UnstructuredGraphParams, UnstructuredBaseNodeParams } from './types'; | ||
import { handleUnstructuredError } from './error'; | ||
import { handleUnstructuredValidate } from './validate'; | ||
|
||
const graphState: StateGraphArgs<UnstructuredLogState>['channels'] = { | ||
lastExecutedChain: { | ||
value: (x: string, y?: string) => y ?? x, | ||
default: () => '', | ||
}, | ||
packageName: { | ||
value: (x: string, y?: string) => y ?? x, | ||
default: () => '', | ||
}, | ||
dataStreamName: { | ||
value: (x: string, y?: string) => y ?? x, | ||
default: () => '', | ||
}, | ||
logSamples: { | ||
value: (x: string[], y?: string[]) => y ?? x, | ||
default: () => [], | ||
}, | ||
grokPatterns: { | ||
value: (x: string[], y?: string[]) => y ?? x, | ||
default: () => [], | ||
}, | ||
jsonSamples: { | ||
value: (x: string[], y?: string[]) => y ?? x, | ||
default: () => [], | ||
}, | ||
finalized: { | ||
value: (x: boolean, y?: boolean) => y ?? x, | ||
default: () => false, | ||
}, | ||
errors: { | ||
value: (x: object, y?: object) => y ?? x, | ||
default: () => [], | ||
}, | ||
additionalProcessors: { | ||
value: (x: object[], y?: object[]) => y ?? x, | ||
default: () => [], | ||
}, | ||
ecsVersion: { | ||
value: (x: string, y?: string) => y ?? x, | ||
default: () => '', | ||
}, | ||
}; | ||
|
||
function modelInput({ state }: UnstructuredBaseNodeParams): Partial<UnstructuredLogState> { | ||
return { | ||
finalized: false, | ||
lastExecutedChain: 'modelInput', | ||
}; | ||
} | ||
|
||
function modelOutput({ state }: UnstructuredBaseNodeParams): Partial<UnstructuredLogState> { | ||
return { | ||
finalized: true, | ||
additionalProcessors: state.additionalProcessors, | ||
lastExecutedChain: 'modelOutput', | ||
}; | ||
} | ||
|
||
function validationRouter({ state }: UnstructuredBaseNodeParams): string { | ||
if (Object.keys(state.errors).length === 0) { | ||
return 'modelOutput'; | ||
} | ||
return 'handleUnstructuredError'; | ||
} | ||
|
||
export async function getUnstructuredGraph({ model, client }: UnstructuredGraphParams) { | ||
const workflow = new StateGraph({ | ||
channels: graphState, | ||
}) | ||
.addNode('modelInput', (state: UnstructuredLogState) => modelInput({ state })) | ||
.addNode('modelOutput', (state: UnstructuredLogState) => modelOutput({ state })) | ||
.addNode('handleUnstructuredError', (state: UnstructuredLogState) => | ||
handleUnstructuredError({ state, model, client }) | ||
) | ||
.addNode('handleUnstructured', (state: UnstructuredLogState) => | ||
handleUnstructured({ state, model, client }) | ||
) | ||
.addNode('handleUnstructuredValidate', (state: UnstructuredLogState) => | ||
handleUnstructuredValidate({ state, model, client }) | ||
) | ||
.addEdge(START, 'modelInput') | ||
.addEdge('modelInput', 'handleUnstructured') | ||
.addEdge('handleUnstructured', 'handleUnstructuredValidate') | ||
.addConditionalEdges( | ||
'handleUnstructuredValidate', | ||
(state: UnstructuredLogState) => validationRouter({ state }), | ||
{ | ||
handleUnstructuredError: 'handleUnstructuredError', | ||
modelOutput: 'modelOutput', | ||
} | ||
) | ||
.addEdge('handleUnstructuredError', 'handleUnstructuredValidate') | ||
.addEdge('modelOutput', END); | ||
|
||
const compiledUnstructuredGraph = workflow.compile(); | ||
return compiledUnstructuredGraph; | ||
} |
7 changes: 7 additions & 0 deletions
7
x-pack/plugins/integration_assistant/server/graphs/unstructured/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
export { getUnstructuredGraph } from './graph'; |
Oops, something went wrong.