Skip to content

Commit

Permalink
Streaming ?
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Mar 3, 2024
1 parent 99336b5 commit 71536ff
Showing 1 changed file with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.cdap.plugin.batch.source;

import com.github.pjfanning.xlsx.StreamingReader;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -146,8 +147,10 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro
String sheetValue = job.get(SHEET_VALUE);

Sheet workSheet; // sheet can be used as common for XSSF and HSSF workbook
// match regex pattern *.xls or *.xlsx
try {
Workbook workbook = WorkbookFactory.create(fileIn);
// Workbook workbook = WorkbookFactory.create(fileIn);
Workbook workbook = StreamingReader.builder().rowCacheSize(10).open(fileIn);
if (sheet.equalsIgnoreCase(SHEET_NAME)) {
workSheet = workbook.getSheet(sheetValue);
} else {
Expand All @@ -157,7 +160,8 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro
throw new IllegalArgumentException("Exception while reading excel sheet. " + e.getMessage(), e);
}

rowCount = job.getInt(ROWS_LIMIT, workSheet.getPhysicalNumberOfRows());
// rowCount = job.getInt(ROWS_LIMIT, workSheet.getPhysicalNumberOfRows());
rowCount = job.getInt(ROWS_LIMIT, 10000);
rows = workSheet.iterator();
lastRowNum = workSheet.getLastRowNum();
rowIdx = 0;
Expand Down

0 comments on commit 71536ff

Please sign in to comment.