diff --git a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java index b22ae8369..1b755539d 100644 --- a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java +++ b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java @@ -16,6 +16,7 @@ package io.cdap.plugin.batch.source; +import com.github.pjfanning.xlsx.StreamingReader; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; @@ -146,8 +147,10 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro String sheetValue = job.get(SHEET_VALUE); Sheet workSheet; // sheet can be used as common for XSSF and HSSF workbook + // match regex pattern *.xls or *.xlsx try { - Workbook workbook = WorkbookFactory.create(fileIn); + // Workbook workbook = WorkbookFactory.create(fileIn); + Workbook workbook = StreamingReader.builder().rowCacheSize(10).open(fileIn); if (sheet.equalsIgnoreCase(SHEET_NAME)) { workSheet = workbook.getSheet(sheetValue); } else { @@ -157,7 +160,8 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro throw new IllegalArgumentException("Exception while reading excel sheet. " + e.getMessage(), e); } - rowCount = job.getInt(ROWS_LIMIT, workSheet.getPhysicalNumberOfRows()); +// rowCount = job.getInt(ROWS_LIMIT, workSheet.getPhysicalNumberOfRows()); + rowCount = job.getInt(ROWS_LIMIT, 10000); rows = workSheet.iterator(); lastRowNum = workSheet.getLastRowNum(); rowIdx = 0;