Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lens] Quote csv values when contain separator char #155905

Merged
merged 5 commits into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion packages/kbn-generate-csv/src/get_export_settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ export const getExportSettings = async (
]);

const escapeFormulaValues = config.escapeFormulaValues;
const escapeValue = createEscapeValue(quoteValues, escapeFormulaValues);
const escapeValue = createEscapeValue({
separator,
quoteValues,
escapeFormulaValues,
});
const bom = config.useByteOrderMarkEncoding ? CSV_BOM_CHARS : '';

return {
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/data/common/exports/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
export const CSV_FORMULA_CHARS = ['=', '+', '-', '@'];
export const nonAlphaNumRE = /[^a-zA-Z0-9]/;
export const allDoubleQuoteRE = /"/g;
// this is a non-exhaustive list of delimiters that require to be quoted
export const commonQuotedDelimiters = new Set([',', ';', '\t', ' ', '|']);
86 changes: 81 additions & 5 deletions src/plugins/data/common/exports/escape_value.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ describe('escapeValue', function () {
describe('quoteValues is true', function () {
let escapeValue: (val: string) => string;
beforeEach(function () {
escapeValue = createEscapeValue(true, false);
escapeValue = createEscapeValue({
separator: ',',
quoteValues: true,
escapeFormulaValues: false,
});
});

it('should escape value with spaces', function () {
Expand Down Expand Up @@ -48,7 +52,11 @@ describe('escapeValue', function () {
describe('quoteValues is false', function () {
let escapeValue: (val: string) => string;
beforeEach(function () {
escapeValue = createEscapeValue(false, false);
escapeValue = createEscapeValue({
separator: ',',
quoteValues: false,
escapeFormulaValues: false,
});
});

it('should return the value unescaped', function () {
Expand All @@ -57,11 +65,15 @@ describe('escapeValue', function () {
});
});

describe('escapeValues', () => {
describe('escapeFormulaValues', () => {
describe('when true', () => {
let escapeValue: (val: string) => string;
beforeEach(function () {
escapeValue = createEscapeValue(true, true);
escapeValue = createEscapeValue({
separator: ',',
quoteValues: true,
escapeFormulaValues: true,
});
});

['@', '+', '-', '='].forEach((badChar) => {
Expand All @@ -76,7 +88,11 @@ describe('escapeValue', function () {
describe('when false', () => {
let escapeValue: (val: string) => string;
beforeEach(function () {
escapeValue = createEscapeValue(true, false);
escapeValue = createEscapeValue({
separator: ',',
quoteValues: true,
escapeFormulaValues: false,
});
});

['@', '+', '-', '='].forEach((badChar) => {
Expand All @@ -86,4 +102,64 @@ describe('escapeValue', function () {
});
});
});

describe('csvSeparator', () => {
it('should escape when text contains the separator char with quotes enabled', () => {
const escapeValue = createEscapeValue({
separator: ';',
quoteValues: true,
escapeFormulaValues: false,
});
expect(escapeValue('a;b')).to.be('"a;b"');
});

it('should not escape when text contains the separator char if quotes are disabled', () => {
const escapeValue = createEscapeValue({
separator: ';',
quoteValues: false,
escapeFormulaValues: false,
});
expect(escapeValue('a;b')).to.be('a;b');
});

it.each([', ', ' , ', ' ,'])(
'should handle also delimiters that contains white spaces "%p"',
(separator) => {
const escapeValue = createEscapeValue({
separator,
quoteValues: true,
escapeFormulaValues: false,
});
const nonStringValue = {
toString() {
return `a${separator}b`;
},
};
expect(escapeValue(nonStringValue)).to.be(`"a${separator}b"`);
}
);

it('should handle also non-string values (array)', () => {
const escapeValue = createEscapeValue({
separator: ',',
quoteValues: true,
escapeFormulaValues: true,
});
expect(escapeValue(['a', 'b'])).to.be('"a,b"');
});

it('should not quote non-string values, even if escapable, when separator is not in the quoted delimiters list', () => {
const escapeValue = createEscapeValue({
separator: ':',
quoteValues: true,
escapeFormulaValues: true,
});
const nonStringValue = {
toString() {
return 'a:b';
},
};
expect(escapeValue(nonStringValue)).to.be('a:b');
});
});
});
35 changes: 26 additions & 9 deletions src/plugins/data/common/exports/escape_value.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,18 @@
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { allDoubleQuoteRE, nonAlphaNumRE } from './constants';
import { allDoubleQuoteRE, commonQuotedDelimiters, nonAlphaNumRE } from './constants';
import { cellHasFormulas } from './formula_checks';

type RawValue = string | object | null | undefined;

// string with the delimiter/separator already inside need to be wrapped in quotes
// i.e. string with delimiter char in it like free text or some number formatting (1143 => 1,143)
function shouldBeQuoted(value: string, delimiter: string) {
const trimmedSeparator = delimiter.trim();
return value.includes(trimmedSeparator) && commonQuotedDelimiters.has(trimmedSeparator);
}

/**
* Create a function that will escape CSV values like "=", "@" and "+" with a
* "'". This will also place CSV values in "" if contain non-alphanumeric chars.
Expand All @@ -21,17 +28,27 @@ type RawValue = string | object | null | undefined;
*
* See OWASP: https://www.owasp.org/index.php/CSV_Injection.
*/
export function createEscapeValue(
quoteValues: boolean,
escapeFormulas: boolean
): (val: RawValue) => string {
export function createEscapeValue({
separator,
quoteValues,
escapeFormulaValues,
}: {
separator: string;
quoteValues: boolean;
escapeFormulaValues: boolean;
}): (val: RawValue) => string {
return function escapeValue(val: RawValue) {
if (val && typeof val === 'string') {
const formulasEscaped = escapeFormulas && cellHasFormulas(val) ? "'" + val : val;
if (quoteValues && nonAlphaNumRE.test(formulasEscaped)) {
return `"${formulasEscaped.replace(allDoubleQuoteRE, '""')}"`;
const formulasEscaped = escapeFormulaValues && cellHasFormulas(val) ? "'" + val : val;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR fixes a problem of escape for the , separator. But here we allow any string to be passed as separator, even the empty string "".
I am not sure we want to wrap in quotes if the separator is empty or the letter a, that is quite greedy 😊

Can we have a list of chars that would require wrapping? Something like

const doWrapWithQuotes = [',', ';'].includes(separator);
// Only wrap if flag is true

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, an other possibility is to force it via typescript with something like:

function createEscapeValue({
  separator,
  quoteValues,
  escapeFormulaValues,
}: {
  separator: ',' | ';';
  quoteValues: boolean;
  escapeFormulaValues: boolean;
})

Copy link
Contributor Author

@dej611 dej611 Apr 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a quick search the common delimiters for CSV and similar formats are:

const delimiters = [',', ';', '\t',' ', '|'];

I can change the PR to wrap the value only if these delimiters are used. Would it work for you @sebelga ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've checked where the separator value comes from. It arrives from the Settings csv:separator and there is no check for that value, so the typescript only will not work here.
I think checking for the most common separators is safe and doesn't break the common sense configurations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think we don't want to rely only on TS for this kind of validations that modifies values.

I can change the PR to wrap the value only if these delimiters are used.

That'd be great 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to refactor a bit the code, after some more unit tests: strings are always quoted when the quote flag is enabled AND it contains any char which is not a letter or a number.
So the fix applies only to non-string values.
The only thing I'm not 100% sure is the case where the quote flag is off BUT the string might fall into the shouldBeQuotable check: would it be ok to quote the string value in this case ignoring the user's will?

I would argue that it would be safer to quote it anyway, but I see that some users might not like it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also guess it is the safest bet 👍

if (quoteValues) {
if (nonAlphaNumRE.test(formulasEscaped)) {
return `"${formulasEscaped.replace(allDoubleQuoteRE, '""')}"`;
}
}
}
return val == null ? '' : val.toString();
// raw multi-terms are stringified as T1,T2,T3 so check if the final value contains the
// csv separator before returning (usually for raw values)
const stringVal = val == null ? '' : val.toString();
return quoteValues && shouldBeQuoted(stringVal, separator) ? `"${stringVal}"` : stringVal;
};
}
12 changes: 12 additions & 0 deletions src/plugins/data/common/exports/export_csv.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,16 @@ describe('CSV exporter', () => {
})
).toMatch('columnOne\r\n"\'=1"\r\n');
});

test('should escape text with csvSeparator char in it', () => {
const datatable = getDataTable();
datatable.rows[0].col1 = 'a,b';
expect(
datatableToCSV(datatable, {
...getDefaultOptions(),
escapeFormulaValues: true,
formatFactory: () => ({ convert: (v: unknown) => v } as FieldFormat),
})
).toMatch('columnOne\r\n"a,b"\r\n');
});
});
6 changes: 5 additions & 1 deletion src/plugins/data/common/exports/export_csv.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ export function datatableToCSV(
{ columns, rows }: Datatable,
{ csvSeparator, quoteValues, formatFactory, raw, escapeFormulaValues }: CSVOptions
) {
const escapeValues = createEscapeValue(quoteValues, escapeFormulaValues);
const escapeValues = createEscapeValue({
separator: csvSeparator,
quoteValues,
escapeFormulaValues,
});
// Build the header row by its names
const header = columns.map((col) => escapeValues(col.name));

Expand Down
10 changes: 8 additions & 2 deletions src/plugins/discover/public/utils/convert_value_to_string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ interface ConvertedResult {
withFormula: boolean;
}

const separator = ',';

export const convertValueToString = ({
rowIndex,
rows,
Expand Down Expand Up @@ -77,7 +79,7 @@ export const convertValueToString = ({

return stringify(formattedValue, disableMultiline) || '';
})
.join(', ');
.join(`${separator} `);

return {
formattedString: formatted,
Expand All @@ -97,7 +99,11 @@ const stringify = (val: object | string, disableMultiline: boolean) => {
return disableMultiline ? JSON.stringify(val) : JSON.stringify(val, null, 2);
};

const escapeValueFn = createEscapeValue(true, true);
const escapeValueFn = createEscapeValue({
separator,
quoteValues: true,
escapeFormulaValues: true,
});

const escapeFormattedValue = (formattedValue: string): string => {
return escapeValueFn(formattedValue);
Expand Down