From ebaa751ad101d73ece1695bfbdcfdd421eef3485 Mon Sep 17 00:00:00 2001 From: Ilya Nikokoshev Date: Thu, 8 Aug 2024 12:00:09 +0000 Subject: [PATCH] [Automatic Import] Try parsing samples as both NDJSON and JSON (#190046) ## Summary Changes the logic for parsing log samples in the Automatic Import plugin. ## Explanation Previously we relied on the `fileType` as reported by the browser to differentiate between JSON and newline-delimited JSON (NDJSON) format. This required the user to save the file with a correct extension and was broken for some combinations of browsers + operating systems. The fix consists in optimistically attempting to parse the sample as NDJSON; if that fails we attempt JSON. We consider separately a case of file that consists of one JSON line, since it's both a valid JSON and a valid NDJSON. This makes `fileType` parameter unused, so we also remove it. We also use the new error message (in the spotlight of the image) when parsing fails: https://github.com/user-attachments/assets/334a6424-21dd-4fb4-8ce9-4536894c396f Note that the "logs sample" is consistent with the existing texts: defaultMessage: 'The logs sample file is not an array', ... defaultMessage: 'The logs sample file is empty', ... defaultMessage: 'The logs sample file contains non-object entries', This will also make it easier to upload multiple files at once, shall we decide to implement such a feature. ### Risk Matrix There are performance considerations, but they are fairly minimal: - Where the file was previously expected to have NDJSON format, we continue to parse it as such, without spending any additional time. - Where the file was previously expected to have JSON format, we now first try to parse it as NDJSON. This requires additional time to fail, but it should be fast (after the first line). - Where the file is actually malformed, previously we tried to parse it once, but now will try to parse it twice. This increases the time to fail, which can be significant if the file is a JSON that is malformed towards the end. However, we expect this to be a rare scenario. | Risk | Probability | Severity | Mitigation/Notes | |---------------------------|-------------|----------|-------------------------| | Longer time to process the log in the UI. | Low | Low | As explained, only likely to be significant for malformed files. | --------- Co-authored-by: Elastic Machine Co-authored-by: Marius Iversen --- .../sample_logs_input.test.tsx | 15 +++++--- .../data_stream_step/sample_logs_input.tsx | 36 +++++++++++-------- .../steps/data_stream_step/translations.ts | 11 +++--- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx index af21526b7f14d..a137933afed3f 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx @@ -40,7 +40,7 @@ describe('SampleLogsInput', () => { describe('when uploading a json logs sample', () => { const type = 'application/json'; - describe('when the file is valid', () => { + describe('when the file is valid json', () => { const logsSampleRaw = `{"message":"test message 1"},{"message":"test message 2"}`; beforeEach(async () => { await changeFile(input, new File([`[${logsSampleRaw}]`], 'test.json', { type })); @@ -73,9 +73,11 @@ describe('SampleLogsInput', () => { describe('when the file is invalid', () => { describe.each([ - ['[{"message":"test message 1"}', `The logs sample file has not a valid ${type} format`], + [ + '[{"message":"test message 1"}', + 'Cannot parse the logs sample file as either a JSON or NDJSON file', + ], ['["test message 1"]', 'The logs sample file contains non-object entries'], - ['{"message":"test message 1"}', 'The logs sample file is not an array'], ['[]', 'The logs sample file is empty'], ])('with logs content %s', (logsSample, errorMessage) => { beforeEach(async () => { @@ -98,7 +100,7 @@ describe('SampleLogsInput', () => { describe('when setting a ndjson logs sample', () => { const type = 'application/x-ndjson'; - describe('when the file is valid', () => { + describe('when the file is valid ndjson', () => { const logsSampleRaw = `{"message":"test message 1"}\n{"message":"test message 2"}`; beforeEach(async () => { await changeFile(input, new File([logsSampleRaw], 'test.json', { type })); @@ -131,7 +133,10 @@ describe('SampleLogsInput', () => { describe('when the file is invalid', () => { describe.each([ - ['{"message":"test message 1"]', `The logs sample file has not a valid ${type} format`], + [ + '{"message":"test message 1"}\n{"message": }', + 'Cannot parse the logs sample file as either a JSON or NDJSON file', + ], ['"test message 1"', 'The logs sample file contains non-object entries'], ['', 'The logs sample file is empty'], ])('with logs content %s', (logsSample, errorMessage) => { diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx index 072669a6bdd1d..cb4f735cc707c 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx @@ -19,24 +19,34 @@ const MaxLogsSampleRows = 10; * Parse the logs sample file content (json or ndjson) and return the parsed logs sample */ const parseLogsContent = ( - fileContent: string | undefined, - fileType: string + fileContent: string | undefined ): { error?: string; isTruncated?: boolean; logsSampleParsed?: string[] } => { if (fileContent == null) { return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_READ }; } let parsedContent; try { - if (fileType === 'application/json') { + parsedContent = fileContent + .split('\n') + .filter((line) => line.trim() !== '') + .map((line) => JSON.parse(line)); + + // Special case for files that can be parsed as both JSON and NDJSON: + // for a one-line array [] -> extract its contents + // for a one-line object {} -> do nothing + if ( + Array.isArray(parsedContent) && + parsedContent.length === 1 && + Array.isArray(parsedContent[0]) + ) { + parsedContent = parsedContent[0]; + } + } catch (parseNDJSONError) { + try { parsedContent = JSON.parse(fileContent); - } else if (fileType === 'application/x-ndjson') { - parsedContent = fileContent - .split('\n') - .filter((line) => line.trim() !== '') - .map((line) => JSON.parse(line)); + } catch (parseJSONError) { + return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_PARSE }; } - } catch (_) { - return { error: i18n.LOGS_SAMPLE_ERROR.FORMAT(fileType) }; } if (!Array.isArray(parsedContent)) { @@ -81,10 +91,7 @@ export const SampleLogsInput = React.memo(({ integrationSe const reader = new FileReader(); reader.onload = function (e) { const fileContent = e.target?.result as string | undefined; // We can safely cast to string since we call `readAsText` to load the file. - const { error, isTruncated, logsSampleParsed } = parseLogsContent( - fileContent, - logsSampleFile.type - ); + const { error, isTruncated, logsSampleParsed } = parseLogsContent(fileContent); setIsParsing(false); setSampleFileError(error); if (error) { @@ -137,7 +144,6 @@ export const SampleLogsInput = React.memo(({ integrationSe onChange={onChangeLogsSample} display="large" aria-label="Upload logs sample file" - accept="application/json,application/x-ndjson" isLoading={isParsing} data-test-subj="logsSampleFilePicker" data-loading={isParsing} diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts index e4cc004e2673b..0af9f803f71fc 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts @@ -126,11 +126,12 @@ export const LOGS_SAMPLE_ERROR = { defaultMessage: 'Failed to read the logs sample file', } ), - FORMAT: (fileType: string) => - i18n.translate('xpack.integrationAssistant.step.dataStream.logsSample.errorFormat', { - values: { fileType }, - defaultMessage: 'The logs sample file has not a valid {fileType} format', - }), + CAN_NOT_PARSE: i18n.translate( + 'xpack.integrationAssistant.step.dataStream.logsSample.errorCanNotParse', + { + defaultMessage: 'Cannot parse the logs sample file as either a JSON or NDJSON file', + } + ), NOT_ARRAY: i18n.translate('xpack.integrationAssistant.step.dataStream.logsSample.errorNotArray', { defaultMessage: 'The logs sample file is not an array', }),