From a1cac97538a42f36bd9134dabb94c198745e878a Mon Sep 17 00:00:00 2001 From: ashitsalesforce Date: Thu, 12 Dec 2024 19:04:47 -0800 Subject: [PATCH] output CSV with BOM if output format is UTF-8 or UTF-16 output CSV with BOM if output format is UTF-8 or UTF-16 --- .../dataloader/config/AppConfig.java | 5 ++- .../dataloader/dao/csv/CSVFileWriter.java | 31 ++++++++++++++----- src/main/resources/labels.properties | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/salesforce/dataloader/config/AppConfig.java b/src/main/java/com/salesforce/dataloader/config/AppConfig.java index 1579c9b0..211501e0 100644 --- a/src/main/java/com/salesforce/dataloader/config/AppConfig.java +++ b/src/main/java/com/salesforce/dataloader/config/AppConfig.java @@ -334,6 +334,7 @@ public class AppConfig { public static final String PROP_EXTRACT_SOQL = "sfdc.extractionSOQL"; //$NON-NLS-1$ public static final String PROP_SORT_EXTRACT_FIELDS = "sfdc.sortExtractionFields"; //$NON-NLS-1$ public static final String PROP_EXTRACT_ALL_CAPS_HEADERS="sfdc.extraction.allCapsHeaders"; + public static final String PROP_EXTRACT_CSV_OUTPUT_BOM="sfdc.extraction.outputByteOrderMark"; public static final String PROP_LOAD_PRESERVE_WHITESPACE_IN_RICH_TEXT = "sfdc.load.preserveWhitespaceInRichText"; // @@ -780,6 +781,8 @@ private void setDefaults(Map cliOptionsMap) { setDefaultValue(PROP_GMT_FOR_DATE_FIELD_VALUE, false); setDefaultValue(PROP_SAVE_ALL_PROPS, false); setDefaultValue(PROP_EXTRACT_ALL_CAPS_HEADERS, false); + setDefaultValue(PROP_EXTRACT_CSV_OUTPUT_BOM, true); + } /** @@ -1749,7 +1752,7 @@ public String getCsvEncoding(boolean isWrite) { return charset; } - private static String defaultCharsetForCsvReadWrite = null; + private static String defaultCharsetForCsvReadWrite = Charset.defaultCharset().name(); private synchronized static String getDefaultCharsetForCsvReadWrite() { if (defaultCharsetForCsvReadWrite != null) { return defaultCharsetForCsvReadWrite; diff --git a/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java b/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java index 5926f5e7..0bcf1fca 100644 --- a/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java +++ b/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java @@ -28,9 +28,10 @@ import java.io.BufferedWriter; import java.io.FileOutputStream; -import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -70,18 +71,24 @@ public class CSVFileWriter implements DataWriter { /** * encoding contains a value for output character encoding, blank indicates "use default" */ - private final String encoding; + private String encoding; /** * If capitalizedHeadings is true, output header row in caps */ private boolean capitalizedHeadings = false; private final char columnDelimiter; + private AppConfig appConfig; public CSVFileWriter(String fileName, AppConfig appConfig, String columnDelimiterStr) { this.fileName = fileName; + this.appConfig = appConfig; encoding = appConfig.getCsvEncoding(true); + logger.debug("CSV encoding is set to " + Charset.forName(encoding)); + if (encoding == null) { + encoding = Charset.defaultCharset().name(); + } logger.debug(this.getClass().getName(), "encoding used to write to CSV file is " + encoding); if (columnDelimiterStr.length() == 0) { columnDelimiterStr = AppUtil.COMMA; @@ -107,12 +114,13 @@ public void checkConnection() throws DataAccessObjectInitializationException { @Override public void open() throws DataAccessObjectInitializationException { try { - if (this.encoding != null) { - fileOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.fileName), this.encoding)); - } else { - fileOut = new BufferedWriter(new FileWriter(this.fileName)); - } + FileOutputStream os = new FileOutputStream(this.fileName); + OutputStreamWriter osw = new OutputStreamWriter(os, this.encoding); + fileOut = new BufferedWriter(osw); currentRowNumber = 0; + if (appConfig.getBoolean(AppConfig.PROP_EXTRACT_CSV_OUTPUT_BOM)) { + os.write(getBOM()); + } setOpen(true); } catch (IOException e) { String errMsg = Messages.getFormattedString("CSVWriter.errorOpening", this.fileName); @@ -120,6 +128,15 @@ public void open() throws DataAccessObjectInitializationException { throw new DataAccessObjectInitializationException(errMsg, e); } } + + private byte[] getBOM() { + if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) { + return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; + } else if (StandardCharsets.UTF_16.equals(Charset.forName(this.encoding))) { + return new byte[]{(byte) 0xFE, (byte) 0xFF}; + } + return new byte[0]; + } /* * (non-Javadoc) diff --git a/src/main/resources/labels.properties b/src/main/resources/labels.properties index 70200425..0deaacdc 100644 --- a/src/main/resources/labels.properties +++ b/src/main/resources/labels.properties @@ -479,6 +479,7 @@ AppConfig.property.description.sfdc.oauth.Sandbox.clientsecret=Specify encrypted AppConfig.property.description.sfdc.proxyNtlmDomain=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/configuring_the_data_loader.htm AppConfig.property.description.sfdc.extractionRequestSize=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/loader_params.htm AppConfig.property.description.sfdc.extraction.allCapsHeaders=set it to "true" to output extraction field headers in capital letters (uppercase characters). +AppConfig.property.description.sfdc.extraction.outputByteOrderMark=set to "true" by default. When set to "true", it writes Byte Order Mark (BOM) character if the CSV file is created in UTF-8 format. AppConfig.property.description.config.properties.readonly=Do not modify config.properties file even if the user makes changes through Settings dialog. AppConfig.property.description.dataAccess.readCharset=Override system default charset by specifying charset to use for import operations. Set it to UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE to handle import CSVs with Byte Order Mark (BOM) character. AppConfig.property.description.loader.cacheSObjectNamesAndField=Cache object names and fields metadata across multiple operations. Applicable in the UI mode because batch mode executes one operation and stops.