Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

config property to output csv files in a specific charset #1408

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions src/main/java/com/salesforce/dataloader/config/AppConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ public class AppConfig {
public static final String PROP_READ_UTF8 = "dataAccess.readUTF8"; //$NON-NLS-1$
public static final String PROP_WRITE_UTF8 = "dataAccess.writeUTF8"; //$NON-NLS-1$
public static final String PROP_READ_CHARSET = "dataAccess.readCharset";

public static final String PROP_WRITE_CHARSET = "dataAccess.writeCharset";

public static final String PROP_API_VERSION="salesforce.api.version";
public static final String PROP_OAUTH_INSTANCE_URL="salesforce.oauth.instanceURL";
public static final String PROP_USE_LEGACY_HTTP_GET="sfdc.useLegacyHttpGet";
Expand Down Expand Up @@ -778,6 +779,7 @@ private void setDefaults(Map<String, String> cliOptionsMap) {
setDefaultValue(PROP_RICH_TEXT_FIELD_REGEX, DEFAULT_RICHTEXT_REGEX);
setDefaultValue(PROP_DAO_SKIP_TOTAL_COUNT, true);
setDefaultValue(PROP_READ_CHARSET ,getDefaultCharsetForCsvReadWrite());
setDefaultValue(PROP_WRITE_CHARSET ,getDefaultCharsetForCsvReadWrite());
setDefaultValue(PROP_GMT_FOR_DATE_FIELD_VALUE, false);
setDefaultValue(PROP_SAVE_ALL_PROPS, false);
setDefaultValue(PROP_EXTRACT_ALL_CAPS_HEADERS, false);
Expand Down Expand Up @@ -1735,24 +1737,35 @@ public String getCsvEncoding(boolean isWrite) {
} else {
logger.debug("Getting charset for reading from CSV");
}
String charset = getDefaultCharsetForCsvReadWrite();
if (getBoolean(configProperty)) {
logger.debug("Using UTF8 charset because '"
+ configProperty
+"' is set to true");
return StandardCharsets.UTF_8.name();
}
if (!isWrite) {
String charset = getString(PROP_READ_CHARSET);
if (charset != null && !charset.isEmpty()) {
return charset;
charset = StandardCharsets.UTF_8.name();
} else {
if (isWrite) {
charset = getString(PROP_WRITE_CHARSET);
} else {
charset = getString(PROP_READ_CHARSET);
}
boolean validCharset = false;
for (String charsetName : Charset.availableCharsets().keySet()) {
if (charset.equalsIgnoreCase(charsetName)) {
validCharset = true;
break;
}
}
if (!validCharset) {
logger.warn("configured charset" + charset + " is not supported");
charset = getDefaultCharsetForCsvReadWrite();
}
}
String charset = getDefaultCharsetForCsvReadWrite();
logger.debug("Using charset " + charset);
return charset;
}

private static String defaultCharsetForCsvReadWrite = Charset.defaultCharset().name();
private static String defaultCharsetForCsvReadWrite = null;
private synchronized static String getDefaultCharsetForCsvReadWrite() {
if (defaultCharsetForCsvReadWrite != null) {
return defaultCharsetForCsvReadWrite;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void open() throws DataAccessObjectInitializationException {
private byte[] getBOM() {
if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) {
return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
} else if (StandardCharsets.UTF_16.equals(Charset.forName(this.encoding))) {
} else if (this.encoding.startsWith(StandardCharsets.UTF_16.name())) {
return new byte[]{(byte) 0xFE, (byte) 0xFF};
}
return new byte[0];
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/labels.properties
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ AppConfig.property.description.sfdc.extraction.allCapsHeaders=set it to "true" t
AppConfig.property.description.sfdc.extraction.outputByteOrderMark=set to "true" by default. When set to "true", it writes Byte Order Mark (BOM) character if the CSV file is created in UTF-8 format.
AppConfig.property.description.config.properties.readonly=Do not modify config.properties file even if the user makes changes through Settings dialog.
AppConfig.property.description.dataAccess.readCharset=Override system default charset by specifying charset to use for import operations. Set it to UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE to handle import CSVs with Byte Order Mark (BOM) character.
AppConfig.property.description.dataAccess.writeCharset=Override system default charset by specifying charset to use for export operations. Set it to UTF-8 or UTF-16 to write export CSVs with Byte Order Mark (BOM) character.
AppConfig.property.description.loader.cacheSObjectNamesAndField=Cache object names and fields metadata across multiple operations. Applicable in the UI mode because batch mode executes one operation and stops.
AppConfig.property.description.sfdc.timezone=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/configuring_the_data_loader.htm
AppConfig.property.description.process.outputSuccess=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/loader_params.htm
Expand Down
39 changes: 38 additions & 1 deletion src/test/java/com/salesforce/dataloader/dao/CsvTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.List;

import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import com.salesforce.dataloader.ConfigTestBase;
Expand Down Expand Up @@ -130,6 +131,41 @@ public void testCSVWriteBasic() throws Exception {
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF8BOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-8");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-8");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF16LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16LE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16LE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF16BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16BE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16BE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF32LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32LE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32LE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF32BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32BE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32BE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteBasicWithDashDelimiter() throws Exception {
doTestCSVWriteBasic("-");
Expand All @@ -145,8 +181,9 @@ public void testCSVWriteBasicWithTabDelimiter() throws Exception {
doTestCSVWriteBasic(AppUtil.TAB);
}

private String writeCSVFilename = getTestDataDir() + "/csvtestTemp.csv";
private void doTestCSVWriteBasic(String delimiter) throws Exception {
File f = new File(getTestDataDir(), "csvtestTemp.csv");
File f = new File(writeCSVFilename);
String path = f.getAbsolutePath();
CSVFileWriter writer = new CSVFileWriter(path, getController().getAppConfig(), delimiter);
List<RowInterface> rowList = new ArrayList<RowInterface>();
Expand Down
Loading