From a0526ca19903b86f4dcfe78dfba473110068fad4 Mon Sep 17 00:00:00 2001 From: ashitsalesforce Date: Thu, 12 Dec 2024 23:24:07 -0800 Subject: [PATCH] config property to output csv files in a specific charset config property "dataAccess.writeCharset" to output csv files in a specific charset and CSV BOM (Byte Order Mark) if charset supports CSV BOM (UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF_32BE). --- .../dataloader/config/AppConfig.java | 31 ++++++++++----- .../dataloader/dao/csv/CSVFileWriter.java | 2 +- src/main/resources/labels.properties | 1 + .../salesforce/dataloader/dao/CsvTest.java | 39 ++++++++++++++++++- 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/salesforce/dataloader/config/AppConfig.java b/src/main/java/com/salesforce/dataloader/config/AppConfig.java index 211501e0..c4c4a177 100644 --- a/src/main/java/com/salesforce/dataloader/config/AppConfig.java +++ b/src/main/java/com/salesforce/dataloader/config/AppConfig.java @@ -376,7 +376,8 @@ public class AppConfig { public static final String PROP_READ_UTF8 = "dataAccess.readUTF8"; //$NON-NLS-1$ public static final String PROP_WRITE_UTF8 = "dataAccess.writeUTF8"; //$NON-NLS-1$ public static final String PROP_READ_CHARSET = "dataAccess.readCharset"; - + public static final String PROP_WRITE_CHARSET = "dataAccess.writeCharset"; + public static final String PROP_API_VERSION="salesforce.api.version"; public static final String PROP_OAUTH_INSTANCE_URL="salesforce.oauth.instanceURL"; public static final String PROP_USE_LEGACY_HTTP_GET="sfdc.useLegacyHttpGet"; @@ -778,6 +779,7 @@ private void setDefaults(Map cliOptionsMap) { setDefaultValue(PROP_RICH_TEXT_FIELD_REGEX, DEFAULT_RICHTEXT_REGEX); setDefaultValue(PROP_DAO_SKIP_TOTAL_COUNT, true); setDefaultValue(PROP_READ_CHARSET ,getDefaultCharsetForCsvReadWrite()); + setDefaultValue(PROP_WRITE_CHARSET ,getDefaultCharsetForCsvReadWrite()); setDefaultValue(PROP_GMT_FOR_DATE_FIELD_VALUE, false); setDefaultValue(PROP_SAVE_ALL_PROPS, false); setDefaultValue(PROP_EXTRACT_ALL_CAPS_HEADERS, false); @@ -1735,24 +1737,35 @@ public String getCsvEncoding(boolean isWrite) { } else { logger.debug("Getting charset for reading from CSV"); } + String charset = getDefaultCharsetForCsvReadWrite(); if (getBoolean(configProperty)) { logger.debug("Using UTF8 charset because '" + configProperty +"' is set to true"); - return StandardCharsets.UTF_8.name(); - } - if (!isWrite) { - String charset = getString(PROP_READ_CHARSET); - if (charset != null && !charset.isEmpty()) { - return charset; + charset = StandardCharsets.UTF_8.name(); + } else { + if (isWrite) { + charset = getString(PROP_WRITE_CHARSET); + } else { + charset = getString(PROP_READ_CHARSET); + } + boolean validCharset = false; + for (String charsetName : Charset.availableCharsets().keySet()) { + if (charset.equalsIgnoreCase(charsetName)) { + validCharset = true; + break; + } + } + if (!validCharset) { + logger.warn("configured charset" + charset + " is not supported"); + charset = getDefaultCharsetForCsvReadWrite(); } } - String charset = getDefaultCharsetForCsvReadWrite(); logger.debug("Using charset " + charset); return charset; } - private static String defaultCharsetForCsvReadWrite = Charset.defaultCharset().name(); + private static String defaultCharsetForCsvReadWrite = null; private synchronized static String getDefaultCharsetForCsvReadWrite() { if (defaultCharsetForCsvReadWrite != null) { return defaultCharsetForCsvReadWrite; diff --git a/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java b/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java index 0bcf1fca..5b2e1e21 100644 --- a/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java +++ b/src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java @@ -132,7 +132,7 @@ public void open() throws DataAccessObjectInitializationException { private byte[] getBOM() { if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) { return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; - } else if (StandardCharsets.UTF_16.equals(Charset.forName(this.encoding))) { + } else if (this.encoding.startsWith(StandardCharsets.UTF_16.name())) { return new byte[]{(byte) 0xFE, (byte) 0xFF}; } return new byte[0]; diff --git a/src/main/resources/labels.properties b/src/main/resources/labels.properties index 0deaacdc..e70732dd 100644 --- a/src/main/resources/labels.properties +++ b/src/main/resources/labels.properties @@ -482,6 +482,7 @@ AppConfig.property.description.sfdc.extraction.allCapsHeaders=set it to "true" t AppConfig.property.description.sfdc.extraction.outputByteOrderMark=set to "true" by default. When set to "true", it writes Byte Order Mark (BOM) character if the CSV file is created in UTF-8 format. AppConfig.property.description.config.properties.readonly=Do not modify config.properties file even if the user makes changes through Settings dialog. AppConfig.property.description.dataAccess.readCharset=Override system default charset by specifying charset to use for import operations. Set it to UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE to handle import CSVs with Byte Order Mark (BOM) character. +AppConfig.property.description.dataAccess.writeCharset=Override system default charset by specifying charset to use for export operations. Set it to UTF-8 or UTF-16 to write export CSVs with Byte Order Mark (BOM) character. AppConfig.property.description.loader.cacheSObjectNamesAndField=Cache object names and fields metadata across multiple operations. Applicable in the UI mode because batch mode executes one operation and stops. AppConfig.property.description.sfdc.timezone=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/configuring_the_data_loader.htm AppConfig.property.description.process.outputSuccess=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/loader_params.htm diff --git a/src/test/java/com/salesforce/dataloader/dao/CsvTest.java b/src/test/java/com/salesforce/dataloader/dao/CsvTest.java index 767d3803..ad8c49a9 100644 --- a/src/test/java/com/salesforce/dataloader/dao/CsvTest.java +++ b/src/test/java/com/salesforce/dataloader/dao/CsvTest.java @@ -30,6 +30,7 @@ import java.util.List; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import com.salesforce.dataloader.ConfigTestBase; @@ -130,6 +131,41 @@ public void testCSVWriteBasic() throws Exception { doTestCSVWriteBasic(AppUtil.COMMA); } + @Test + public void testCSVWriteUTF8BOMBasic() throws Exception{ + getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-8"); + getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-8"); + doTestCSVWriteBasic(AppUtil.COMMA); + } + + @Test + public void testCSVWriteUTF16LEBOMBasic() throws Exception{ + getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16LE"); + getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16LE"); + doTestCSVWriteBasic(AppUtil.COMMA); + } + + @Test + public void testCSVWriteUTF16BEBOMBasic() throws Exception{ + getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16BE"); + getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16BE"); + doTestCSVWriteBasic(AppUtil.COMMA); + } + + @Test + public void testCSVWriteUTF32LEBOMBasic() throws Exception{ + getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32LE"); + getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32LE"); + doTestCSVWriteBasic(AppUtil.COMMA); + } + + @Test + public void testCSVWriteUTF32BEBOMBasic() throws Exception{ + getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32BE"); + getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32BE"); + doTestCSVWriteBasic(AppUtil.COMMA); + } + @Test public void testCSVWriteBasicWithDashDelimiter() throws Exception { doTestCSVWriteBasic("-"); @@ -145,8 +181,9 @@ public void testCSVWriteBasicWithTabDelimiter() throws Exception { doTestCSVWriteBasic(AppUtil.TAB); } + private String writeCSVFilename = getTestDataDir() + "/csvtestTemp.csv"; private void doTestCSVWriteBasic(String delimiter) throws Exception { - File f = new File(getTestDataDir(), "csvtestTemp.csv"); + File f = new File(writeCSVFilename); String path = f.getAbsolutePath(); CSVFileWriter writer = new CSVFileWriter(path, getController().getAppConfig(), delimiter); List rowList = new ArrayList();