Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PLUGIN-1785] Google sheet upgrade issue fix #52

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/GoogleSheets-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ Names Row contains less number of columns.

**Read Buffer Size:** Number of rows the source reads with a single API request. Default value is 100.

**Enable Cleansing Column Names:** Toggle that specifies whether to cleanse column names containing special characters.
Special characters will be replaced by underscores.

### Steps to Generate OAuth2 Credentials
1. Create credentials for the Client ID and Client Secret properties [here](https://console.cloud.google.com/apis/credentials).
2. On the Create OAuth client ID page, under Authorized redirect URIs, specify a URI of `http://localhost:8080`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ public class GoogleSheetsSourceConfig extends GoogleFilteringSourceConfig {
private static final Logger LOG = LoggerFactory.getLogger(GoogleSheetsSourceConfig.class);
private static final Pattern CELL_ADDRESS = Pattern.compile("^([A-Z]+)([0-9]+)$");
private static final Pattern NOT_VALID_PATTERN = Pattern.compile("[^A-Za-z0-9_]+");
private static final Pattern COLUMN_NAME = Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$");
public static final String CLEANSE_COLUMN_NAMES = "columnNameCleansingEnabled";
private static LinkedHashMap<Integer, ColumnComplexSchemaInfo> dataSchemaInfo = new LinkedHashMap<>();

@Name(SHEETS_TO_PULL)
Expand Down Expand Up @@ -234,6 +236,13 @@ public class GoogleSheetsSourceConfig extends GoogleFilteringSourceConfig {
@Macro
private String sheetFieldName;

@Nullable
@Name(CLEANSE_COLUMN_NAMES)
@Description("Toggle that specifies whether to cleanse column names containing special characters. " +
"Special characters will be replaced by underscores.")
@Macro
private Boolean columnNameCleansingEnabled;

public GoogleSheetsSourceConfig(String referenceName, @Nullable String sheetsIdentifiers, String formatting,
Boolean skipEmptyData, String columnNamesSelection,
@Nullable Integer customColumnNamesRow, String metadataFieldName,
Expand Down Expand Up @@ -609,7 +618,7 @@ private LinkedHashMap<Integer, ColumnComplexSchemaInfo> processColumns(List<Cell
}
String title = columnHeaderCell.getFormattedValue();
if (StringUtils.isNotEmpty(title)) {
title = checkTitleFormat(title, seenFieldNames);
title = checkTitleFormat(title, seenFieldNames, i);

// for merge we should analyse sub headers for data schemas
if (isMergeHead) {
Expand Down Expand Up @@ -643,7 +652,7 @@ private List<ColumnComplexSchemaInfo> processSubHeaders(int startIndex, int leng
if (StringUtils.isEmpty(subHeaderTitle)) {
subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1);
}
subHeaderTitle = checkTitleFormat(subHeaderTitle, seenFieldNames);
subHeaderTitle = checkTitleFormat(subHeaderTitle, seenFieldNames, i);
} else {
subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1);
}
Expand All @@ -662,7 +671,14 @@ private List<ColumnComplexSchemaInfo> processSubHeaders(int startIndex, int leng
return subHeaders;
}

private String checkTitleFormat(String title, Map<String, Integer> seenFieldNames) {
private String checkTitleFormat(String title, Map<String, Integer> seenFieldNames, int columnIndex) {
if (getColumnNameCleansingEnabled()) {
return applyFileTitleConvention(title, seenFieldNames);
}
return applySheetTitleConvention(title, columnIndex);
}

private String applyFileTitleConvention(String title, Map<String, Integer> seenFieldNames) {
final String replacementChar = "_";

StringBuilder cleanFieldNameBuilder = new StringBuilder();
Expand Down Expand Up @@ -692,6 +708,17 @@ private String checkTitleFormat(String title, Map<String, Integer> seenFieldName
return cleanFieldNameBuilder.toString();
}

private String applySheetTitleConvention(String title, int columnIndex) {
if (!COLUMN_NAME.matcher(title).matches()) {
String defaultColumnName = ColumnAddressConverter.getColumnName(columnIndex + 1);
LOG.warn(String.format("Original column name '%s' doesn't satisfy column name requirements '%s', " +
"the default column name '%s' will be used.", title, COLUMN_NAME.pattern(),
defaultColumnName));
return defaultColumnName;
}
return title;
}

private Schema getDataCellSchema(List<CellData> dataRow, int index, String headerName) {
Schema dataSchema = Schema.of(Schema.Type.STRING);
if (dataRow != null && dataRow.size() > index) {
Expand Down Expand Up @@ -950,6 +977,11 @@ public boolean getAutoDetectRowsAndColumns() {
return Boolean.TRUE.equals(autoDetectRowsAndColumns);
}

@Nullable
public boolean getColumnNameCleansingEnabled() {
return Boolean.TRUE.equals(columnNameCleansingEnabled);
}

public Integer getLastDataColumn() {
return lastDataColumn == null ? 0 : Integer.parseInt(lastDataColumn);
}
Expand Down Expand Up @@ -1143,6 +1175,10 @@ public void setEndDate(String endDate) {
this.endDate = endDate;
}

public void setColumnNameCleansingEnabled(@Nullable Boolean columnNameCleansingEnabled) {
this.columnNameCleansingEnabled = columnNameCleansingEnabled;
}

public static GoogleSheetsSourceConfig of(JsonObject properties) throws IOException {

GoogleSheetsSourceConfig googleSheetsSourceConfig = GoogleSheetsSourceConfig
Expand Down Expand Up @@ -1326,6 +1362,11 @@ public static GoogleSheetsSourceConfig of(JsonObject properties) throws IOExcept
properties.get(GoogleSheetsSourceConfig.AUTO_DETECT_ROWS_AND_COLUMNS).getAsBoolean());
}

if (properties.has(GoogleSheetsSourceConfig.CLEANSE_COLUMN_NAMES)) {
googleSheetsSourceConfig.setColumnNameCleansingEnabled(
properties.get(GoogleSheetsSourceConfig.CLEANSE_COLUMN_NAMES).getAsBoolean());
}

return googleSheetsSourceConfig;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ public void testProcessColumnsWithLastDataColumnLessThanColumnsRowSize()
@Test
public void testProcessColumnsInvalidTitles()
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
config.setColumnNameCleansingEnabled(true);
Method processColumnsMethod = config.getClass().getDeclaredMethod("processColumns", List.class,
List.class, List.class, List.class, int.class, FailureCollector.class);
processColumnsMethod.setAccessible(true);
Expand Down Expand Up @@ -380,6 +381,7 @@ private void setFieldValue(String fieldName, Object fieldValue) throws NoSuchFie
@Test
public void testProcessColumnsSameCaseSensitiveTitles()
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
config.setColumnNameCleansingEnabled(true);
Method processColumnsMethod = config.getClass().getDeclaredMethod("processColumns", List.class,
List.class, List.class, List.class, int.class,
FailureCollector.class);
Expand Down Expand Up @@ -413,4 +415,60 @@ public void testProcessColumnsSameCaseSensitiveTitles()
Assert.assertEquals("Title_with_space_2", columns.get(1).getHeaderTitle());
Assert.assertEquals("Title_with_space_3", columns.get(2).getHeaderTitle());
}

@Test
public void testProcessColumnsInvalidTitlesOldSchema()
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
Method processColumnsMethod = config.getClass().getDeclaredMethod("processColumns", List.class,
List.class, List.class, List.class, int.class,
FailureCollector.class);
processColumnsMethod.setAccessible(true);

List<CellData> columnsRow = new ArrayList<>();
columnsRow.add(new CellData().setFormattedValue("a"));
columnsRow.add(new CellData().setFormattedValue("title with space"));
columnsRow.add(new CellData());

List<CellData> subColumnsRow = new ArrayList<>();
subColumnsRow.add(new CellData().setFormattedValue("no header value"));
subColumnsRow.add(new CellData().setFormattedValue("9titleWithFirstNumber"));
subColumnsRow.add(new CellData().setFormattedValue("d"));

List<CellData> dataRow = new ArrayList<>();
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setStringValue("aa")));
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setNumberValue(13d)));
dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setBoolValue(true)));

List<GridRange> columnMerges = new ArrayList<>();
columnMerges.add(new GridRange().setStartRowIndex(0).setEndRowIndex(1).setStartColumnIndex(1).setEndColumnIndex(3));

FailureCollector collector = new DefaultFailureCollector("", Collections.EMPTY_MAP);

int lastDataColumn = 3;

LinkedHashMap<Integer, ColumnComplexSchemaInfo> columns =
(LinkedHashMap<Integer, ColumnComplexSchemaInfo>) processColumnsMethod.invoke(config, columnsRow,
subColumnsRow, dataRow,
columnMerges, lastDataColumn,
collector);

Assert.assertEquals(2, columns.size());
Assert.assertTrue(columns.keySet().containsAll(Arrays.asList(0, 1)));

// check simple column
Assert.assertEquals("a", columns.get(0).getHeaderTitle());
Assert.assertTrue(columns.get(0).getSubColumns().isEmpty());

// check complex columns, top header should have column name as name
Assert.assertEquals("B", columns.get(1).getHeaderTitle());
List<ColumnComplexSchemaInfo> subColumns = columns.get(1).getSubColumns();
Assert.assertFalse(subColumns.isEmpty());

// check sub-columns
Assert.assertEquals(2, subColumns.size());
Assert.assertEquals("B", subColumns.get(0).getHeaderTitle());
Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty());
Assert.assertEquals("d", subColumns.get(1).getHeaderTitle());
Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty());
}
}
16 changes: 16 additions & 0 deletions widgets/GoogleSheets-batchsource.json
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,22 @@
"default": "100",
"min": "1"
}
},
{
"widget-type": "toggle",
"label": "Enable Cleansing Column Names",
"name": "columnNameCleansingEnabled",
"widget-attributes": {
"on": {
"value": "true",
"label": "Yes"
},
"off": {
"value": "false",
"label": "No"
},
"default": "true"
vikasrathee-cs marked this conversation as resolved.
Show resolved Hide resolved
}
}
]
}
Expand Down