Skip to content

Commit

Permalink
Merge pull request #49 from data-integrations/revert-48-bug/column-na…
Browse files Browse the repository at this point in the history
…me-issue

Revert "[PLUGIN-1785]Column name cleansing done as per other file plugins."
  • Loading branch information
vikasrathee-cs authored May 22, 2024
2 parents 41e40a0 + 7c0bc68 commit b923f45
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public class GoogleSheetsSourceConfig extends GoogleFilteringSourceConfig {
public static final String CONFIGURATION_PARSE_PROPERTY_NAME = "properties";
private static final Logger LOG = LoggerFactory.getLogger(GoogleSheetsSourceConfig.class);
private static final Pattern CELL_ADDRESS = Pattern.compile("^([A-Z]+)([0-9]+)$");
private static final Pattern NOT_VALID_PATTERN = Pattern.compile("[^A-Za-z0-9_]+");
private static final Pattern COLUMN_NAME = Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$");
private static LinkedHashMap<Integer, ColumnComplexSchemaInfo> dataSchemaInfo = new LinkedHashMap<>();

@Name(SHEETS_TO_PULL)
Expand Down Expand Up @@ -593,7 +593,7 @@ private LinkedHashMap<Integer, ColumnComplexSchemaInfo> processColumns(List<Cell
int lastDataColumn,
FailureCollector collector) {
LinkedHashMap<Integer, ColumnComplexSchemaInfo> columnHeaders = new LinkedHashMap<>();
final Map<String, Integer> seenFieldNames = new HashMap<>();

List<String> headerTitles = new ArrayList<>();
for (int i = 0; i < Math.min(columnsRow.size(), lastDataColumn); i++) {
CellData columnHeaderCell = columnsRow.get(i);
Expand All @@ -609,7 +609,7 @@ private LinkedHashMap<Integer, ColumnComplexSchemaInfo> processColumns(List<Cell
}
String title = columnHeaderCell.getFormattedValue();
if (StringUtils.isNotEmpty(title)) {
title = checkTitleFormat(title, seenFieldNames);
title = checkTitleFormat(title, i);

// for merge we should analyse sub headers for data schemas
if (isMergeHead) {
Expand All @@ -634,7 +634,6 @@ private LinkedHashMap<Integer, ColumnComplexSchemaInfo> processColumns(List<Cell
private List<ColumnComplexSchemaInfo> processSubHeaders(int startIndex, int length, List<CellData> subColumnsRow,
List<CellData> dataRow, FailureCollector collector) {
List<ColumnComplexSchemaInfo> subHeaders = new ArrayList<>();
final Map<String, Integer> seenFieldNames = new HashMap<>();
List<String> titles = new ArrayList<>();
for (int i = startIndex; i < startIndex + length; i++) {
String subHeaderTitle;
Expand All @@ -643,7 +642,7 @@ private List<ColumnComplexSchemaInfo> processSubHeaders(int startIndex, int leng
if (StringUtils.isEmpty(subHeaderTitle)) {
subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1);
}
subHeaderTitle = checkTitleFormat(subHeaderTitle, seenFieldNames);
subHeaderTitle = checkTitleFormat(subHeaderTitle, i);
} else {
subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1);
}
Expand All @@ -662,34 +661,14 @@ private List<ColumnComplexSchemaInfo> processSubHeaders(int startIndex, int leng
return subHeaders;
}

private String checkTitleFormat(String title, Map<String, Integer> seenFieldNames) {
final String replacementChar = "_";

StringBuilder cleanFieldNameBuilder = new StringBuilder();

// Remove any spaces at the end of the strings
title = title.trim();

// If it's an empty string replace it with BLANK
if (title.isEmpty()) {
cleanFieldNameBuilder.append("BLANK");
} else if (Character.isDigit(title.charAt(0))) {
// Prepend a col_ if the first character is a number
cleanFieldNameBuilder.append("col_");
}

// Replace all invalid characters with the replacement char
cleanFieldNameBuilder.append(NOT_VALID_PATTERN.matcher(title).replaceAll(replacementChar));

String cleanFieldName = cleanFieldNameBuilder.toString();
String lowerCaseCleanFieldName = cleanFieldName.toLowerCase();
int count = seenFieldNames.getOrDefault(lowerCaseCleanFieldName, 0) + 1;
seenFieldNames.put(lowerCaseCleanFieldName, count);
// In case column already exists in seenFieldNames map, append the count with column name.
if (count > 1) {
cleanFieldNameBuilder.append(replacementChar).append(count);
private String checkTitleFormat(String title, int columnIndex) {
if (!COLUMN_NAME.matcher(title).matches()) {
String defaultColumnName = ColumnAddressConverter.getColumnName(columnIndex + 1);
LOG.warn(String.format("Original column name '%s' doesn't satisfy column name requirements '%s', " +
"the default column name '%s' will be used.", title, COLUMN_NAME.pattern(), defaultColumnName));
return defaultColumnName;
}
return cleanFieldNameBuilder.toString();
return title;
}

private Schema getDataCellSchema(List<CellData> dataRow, int index, String headerName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,13 @@ public void testProcessColumnsInvalidTitles()
Assert.assertTrue(columns.get(0).getSubColumns().isEmpty());

// check complex columns, top header should have column name as name
Assert.assertEquals("title_with_space", columns.get(1).getHeaderTitle());
Assert.assertEquals("B", columns.get(1).getHeaderTitle());
List<ColumnComplexSchemaInfo> subColumns = columns.get(1).getSubColumns();
Assert.assertFalse(subColumns.isEmpty());

// check sub-columns
Assert.assertEquals(2, subColumns.size());
Assert.assertEquals("col_9titleWithFirstNumber", subColumns.get(0).getHeaderTitle());
Assert.assertEquals("B", subColumns.get(0).getHeaderTitle());
Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty());
Assert.assertEquals("d", subColumns.get(1).getHeaderTitle());
Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty());
Expand All @@ -376,41 +376,4 @@ private void setFieldValue(String fieldName, Object fieldValue) throws NoSuchFie
metadataKeyCellsField.setAccessible(true);
metadataKeyCellsField.set(config, fieldValue);
}

@Test
public void testProcessColumnsSameCaseSensitiveTitles()
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
Method processColumnsMethod = config.getClass().getDeclaredMethod("processColumns", List.class,
List.class, List.class, List.class, int.class,
FailureCollector.class);
processColumnsMethod.setAccessible(true);

List<CellData> columnsRow = new ArrayList<>();
columnsRow.add(new CellData().setFormattedValue("title with space"));
columnsRow.add(new CellData().setFormattedValue("Title with space"));
columnsRow.add(new CellData().setFormattedValue("Title%with%space"));

List<CellData> dataRow = new ArrayList<>();
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setStringValue("aa")));
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setNumberValue(13d)));
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setBoolValue(true)));

List<GridRange> columnMerges = new ArrayList<>();

FailureCollector collector = new DefaultFailureCollector("", Collections.EMPTY_MAP);

int lastDataColumn = 3;

LinkedHashMap<Integer, ColumnComplexSchemaInfo> columns =
(LinkedHashMap<Integer, ColumnComplexSchemaInfo>) processColumnsMethod.invoke(config, columnsRow,
null, dataRow, columnMerges,
lastDataColumn, collector);

Assert.assertEquals(3, columns.size());
Assert.assertTrue(columns.keySet().containsAll(Arrays.asList(0, 1, 2)));

Assert.assertEquals("title_with_space", columns.get(0).getHeaderTitle());
Assert.assertEquals("Title_with_space_2", columns.get(1).getHeaderTitle());
Assert.assertEquals("Title_with_space_3", columns.get(2).getHeaderTitle());
}
}

0 comments on commit b923f45

Please sign in to comment.