diff --git a/CHANGELOG.md b/CHANGELOG.md index dd98d79..5ac9fd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 3.5.0 - 8 August 2022 + +- Enhancements + - \#291 Speed improvement for getAllSheetFormulas() + - \#293 Add includeHiddenRows option to read() + - \#296 Add readLargeFile() + ## 3.4.4 - 25 March 2022 - \#289 Prevent one-off OSGi bundle errors when the bundle version changes diff --git a/ModuleConfig.cfc b/ModuleConfig.cfc index b11a3fc..d6824a8 100644 --- a/ModuleConfig.cfc +++ b/ModuleConfig.cfc @@ -4,7 +4,7 @@ component{ this.author = "Julian Halliwell"; this.webURL = "https://github.com/cfsimplicity/spreadsheet-cfml"; this.description = "CFML Spreadsheet Library"; - this.version = "3.4.4"; + this.version = "3.5.0"; this.autoMapModels = false; function configure(){ diff --git a/README.md b/README.md index 9c212f9..9c03ada 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ You may wish to place the spreadsheet library files in a central location with a * [isRowHidden](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/isRowHidden) * [isStreamingXmlFormat](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/isStreamingXmlFormat) * [isXmlFormat](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/isXmlFormat) +* [readLargeFile](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/readLargeFile) * [removePrintGridlines](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/removePrintGridlines) * [renameSheet](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/renameSheet) * [removeSheetNumber](https://github.com/cfsimplicity/spreadsheet-cfml/wiki/removeSheetNumber) diff --git a/Spreadsheet.cfc b/Spreadsheet.cfc index 7cf22c4..abe52ce 100644 --- a/Spreadsheet.cfc +++ b/Spreadsheet.cfc @@ -1,8 +1,8 @@ component accessors="true"{ //"static" - property name="version" default="3.4.4" setter="false"; - property name="osgiLibBundleVersion" default="5.2.2.0" setter="false"; //first 3 octets = POI version; increment 4th with other jar updates + property name="version" default="3.5.0" setter="false"; + property name="osgiLibBundleVersion" default="5.2.2.1" setter="false"; //first 3 octets = POI version; increment 4th with other jar updates property name="osgiLibBundleSymbolicName" default="spreadsheet-cfml" setter="false"; property name="exceptionType" default="cfsimplicity.spreadsheet" setter="false"; //commonly invoked POI class names @@ -39,6 +39,7 @@ component accessors="true"{ property name="rangeHelper"; property name="rowHelper"; property name="sheetHelper"; + property name="streamingReaderHelper"; property name="stringHelper"; property name="workbookHelper"; @@ -80,12 +81,13 @@ component accessors="true"{ setRangeHelper( New helpers.range( this ) ); setRowHelper( New helpers.row( this ) ); setSheetHelper( New helpers.sheet( this ) ); + setStreamingReaderHelper( New helpers.streamingReader( this ) ); setStringHelper( New helpers.string( this ) ); setWorkbookHelper( New helpers.workbook( this ) ); } /* Meta utilities */ - + private void function detectEngineProperties(){ this.setIsACF( ( server.coldfusion.productname == "ColdFusion Server" ) ); } @@ -196,7 +198,7 @@ component accessors="true"{ Throw( type=this.getExceptionType(), message="Invalid argument 'queryColumnTypes'.", detail="When specifying 'queryColumnTypes' as a struct you must also set the 'firstRowIsHeader' argument to true OR provide 'queryColumnNames'" ); if( arguments.trim ) csvString = csvString.Trim(); - var format = arguments.KeyExists( "delimiter" )? + var format = arguments.KeyExists( "delimiter" )? getCsvHelper().getCsvFormatForDelimiter( arguments.delimiter ) : getClassHelper().loadClass( "org.apache.commons.csv.CSVFormat" )[ JavaCast( "string", "RFC4180" ) ].withIgnoreSurroundingSpaces(); var parsed = getClassHelper().loadClass( "org.apache.commons.csv.CSVParser" ).parse( csvString, format ); @@ -385,7 +387,7 @@ component accessors="true"{ public Spreadsheet function addAutofilter( required workbook, string cellRange="", numeric row=1 ){ arguments.cellRange = arguments.cellRange.Trim(); if( arguments.cellRange.IsEmpty() ){ - //default to all columns in the first (default) or specified row + //default to all columns in the first (default) or specified row var rowIndex = ( Max( 0, arguments.row -1 ) ); var indices = { startRow: rowIndex @@ -692,7 +694,7 @@ component accessors="true"{ public Spreadsheet function cleanUpStreamingXml( required workbook ){ // SXSSF uses temporary files which MUST be cleaned up, see http://poi.apache.org/components/spreadsheet/how-to.html#sxssf if( isStreamingXmlFormat( arguments.workbook ) ) - arguments.workbook.dispose(); + arguments.workbook.dispose(); return this; } @@ -784,7 +786,7 @@ component accessors="true"{ Throw( type=this.getExceptionType(), message="Invalid row value", detail="The value for row must be greater than or equal to 1." ); var sheet = getSheetHelper().getActiveSheet( arguments.workbook ); var rowIndex = ( arguments.row -1 ); - if( + if( ( rowIndex < getSheetHelper().getFirstRowIndex( sheet ) ) || ( rowIndex > getSheetHelper().getLastRowIndex( sheet ) ) @@ -1064,9 +1066,8 @@ component accessors="true"{ public numeric function getColumnCount( required workbook, sheetNameOrNumber ){ if( arguments.KeyExists( "sheetNameOrNumber" ) ) getSheetHelper().setActiveSheetNameOrNumber( argumentCollection=arguments ); - var sheet = getSheetHelper().getActiveSheet( arguments.workbook ); - var rowIterator = sheet.rowIterator(); var result = 0; + var rowIterator = getSheetHelper().getActiveSheet( arguments.workbook ).rowIterator(); while( rowIterator.hasNext() ){ var row = rowIterator.next(); result = Max( result, row.getLastCellNum() ); @@ -1159,7 +1160,7 @@ component accessors="true"{ } public boolean function isRowHidden( required workbook, required numeric row ){ - return getRowHelper().getRowFromActiveSheet( arguments.workbook, arguments.row ).getZeroHeight(); + return getRowHelper().isRowHidden( arguments.workbook, arguments.row ); } public boolean function isSpreadsheetFile( required string path ){ @@ -1256,7 +1257,7 @@ component accessors="true"{ return new( sheetName=arguments.sheetName, xmlFormat=true ); } - public string function queryToCsv( required query query, boolean includeHeaderRow=false, string delimiter="," ){ + public string function queryToCsv( required query query, boolean includeHeaderRow=false, string delimiter="," ){ var data = []; var columns = getQueryHelper()._QueryColumnArray( arguments.query ); if( arguments.includeHeaderRow ) @@ -1274,7 +1275,7 @@ component accessors="true"{ data.Append( rowValues ); } var builder = getStringHelper().newJavaStringBuilder(); - var csvFormat = getCsvHelper().delimiterIsTab( arguments.delimiter )? + var csvFormat = getCsvHelper().delimiterIsTab( arguments.delimiter )? getClassHelper().loadClass( "org.apache.commons.csv.CSVFormat" )[ JavaCast( "string", "TDF" ) ] : getClassHelper().loadClass( "org.apache.commons.csv.CSVFormat" )[ JavaCast( "string", "EXCEL" ) ] .withDelimiter( JavaCast( "char", arguments.delimiter ) ); @@ -1297,6 +1298,7 @@ component accessors="true"{ ,boolean includeBlankRows=false ,boolean fillMergedCellsWithVisibleValue=false ,boolean includeHiddenColumns=true + ,boolean includeHiddenRows=true ,boolean includeRichTextFormatting=false ,string password ,string csvDelimiter="," @@ -1305,10 +1307,8 @@ component accessors="true"{ ){ if( arguments.KeyExists( "query" ) ) Throw( type=this.getExceptionType(), message="Invalid argument 'query'.", detail="Just use format='query' to return a query object" ); - if( arguments.KeyExists( "format" ) && !ListFindNoCase( "query,html,csv", arguments.format ) ) - Throw( type=this.getExceptionType(), message="Invalid format", detail="Supported formats are: 'query', 'html' and 'csv'" ); - if( arguments.KeyExists( "sheetName" ) && arguments.KeyExists( "sheetNumber" ) ) - Throw( type=this.getExceptionType(), message="Cannot provide both sheetNumber and sheetName arguments", detail="Only one of either 'sheetNumber' or 'sheetName' arguments may be provided." ); + getExceptionHelper().throwExceptionIFreadFormatIsInvalid( argumentCollection=arguments ); + getSheetHelper().throwErrorIFSheetNameAndNumberArgumentsBothPassed( argumentCollection=arguments ); getFileHelper().throwErrorIFfileNotExists( arguments.src ); var passwordProtected = ( arguments.KeyExists( "password") && !arguments.password.Trim().IsEmpty() ); var workbook = passwordProtected? getWorkbookHelper().workbookFromFile( arguments.src, arguments.password ): getWorkbookHelper().workbookFromFile( arguments.src ); @@ -1334,12 +1334,13 @@ component accessors="true"{ else if( arguments.KeyExists( "queryColumnNames" ) ) args.columnNames = arguments.queryColumnNames;// accept better alias `queryColumnNames` to match csvToQuery if( ( arguments.format == "query" ) && arguments.KeyExists( "queryColumnTypes" ) ){ - getQueryHelper().throwErrorIFinvalidQueryColumnTypesArgument( argumentCollection=arguments ); args.queryColumnTypes = arguments.queryColumnTypes; + getQueryHelper().throwErrorIFinvalidQueryColumnTypesArgument( argumentCollection=args ); } args.includeBlankRows = arguments.includeBlankRows; args.fillMergedCellsWithVisibleValue = arguments.fillMergedCellsWithVisibleValue; args.includeHiddenColumns = arguments.includeHiddenColumns; + args.includeHiddenRows = arguments.includeHiddenRows; args.includeRichTextFormatting = arguments.includeRichTextFormatting; args.makeColumnNamesSafe = arguments.makeColumnNamesSafe; var generatedQuery = getSheetHelper().sheetToQuery( argumentCollection=args ); @@ -1365,6 +1366,68 @@ component accessors="true"{ return baos.toByteArray(); } + public any function readLargeFile( + required string src + ,string format="query" + ,string sheetName + ,numeric sheetNumber // 1-based + ,numeric headerRow + ,boolean includeHeaderRow=false + ,boolean includeBlankRows=false + ,boolean includeHiddenColumns=true + ,boolean includeHiddenRows=true + ,any queryColumnNames //list or array + ,any queryColumnTypes //'auto', list of types, or struct of column names/types mapping. Null means no types are specified. + ,boolean makeColumnNamesSafe=false + ,string password + ,string csvDelimiter="," + ,struct streamingReaderOptions + ){ + if( this.getIsACF() ){ + Throw( type="#this.getExceptionType()#.methodNotSupported", message="'readLargeFile()' is not supported with ColdFusion", detail="'readLargeFile()' currently only works with Lucee." ); + } + getFileHelper().throwErrorIFfileNotExists( arguments.src ); + getExceptionHelper().throwExceptionIFreadFormatIsInvalid( argumentCollection=arguments ); + getSheetHelper().throwErrorIFSheetNameAndNumberArgumentsBothPassed( argumentCollection=arguments ); + var builderOptions = arguments.streamingReaderOptions?:{}; + if( arguments.KeyExists( "password" ) ) + builderOptions.password = arguments.password; + var sheetToQueryArgs = { + includeBlankRows: arguments.includeBlankRows + ,includeHiddenColumns: arguments.includeHiddenColumns + ,includeHiddenRows: arguments.includeHiddenRows + ,makeColumnNamesSafe: arguments.makeColumnNamesSafe + }; + if( arguments.KeyExists( "sheetName" ) ) + sheetToQueryArgs.sheetName = arguments.sheetName; + if( arguments.KeyExists( "sheetNumber" ) ) + sheetToQueryArgs.sheetNumber = arguments.sheetNumber; + if( arguments.KeyExists( "headerRow" ) ){ + sheetToQueryArgs.headerRow = arguments.headerRow; + sheetToQueryArgs.includeHeaderRow = arguments.includeHeaderRow; + } + if( arguments.KeyExists( "queryColumnNames" ) ) + sheetToQueryArgs.columnNames = arguments.queryColumnNames; + if( ( arguments.format == "query" ) && arguments.KeyExists( "queryColumnTypes" ) ){ + sheetToQueryArgs.queryColumnTypes = arguments.queryColumnTypes; + getQueryHelper().throwErrorIFinvalidQueryColumnTypesArgument( argumentCollection=sheetToQueryArgs ); + } + var generatedQuery = getStreamingReaderHelper().readFileIntoQuery( arguments.src, builderOptions, sheetToQueryArgs ); + if( arguments.format == "query" ) + return generatedQuery; + var exportArgs = { query: generatedQuery }; + if( arguments.KeyExists( "headerRow" ) ){ + exportArgs.headerRow = arguments.headerRow; + exportArgs.includeHeaderRow = arguments.includeHeaderRow; + } + if( arguments.format == "csv" ){ + exportArgs.delimiter = arguments.csvDelimiter; + return queryToCsv( argumentCollection=exportArgs ); + } + // format = html + return getQueryHelper().queryToHtml( argumentCollection=exportArgs ); + } + public Spreadsheet function removePrintGridlines( required workbook ){ getSheetHelper().getActiveSheet( arguments.workbook ).setPrintGridlines( JavaCast( "boolean", false ) ); return this; @@ -1477,7 +1540,7 @@ component accessors="true"{ cell.setCellFormula( JavaCast( "string", arguments.formula ) ); return this; } - + public Spreadsheet function setCellHyperlink( required workbook ,required string link diff --git a/box.json b/box.json index 5249c9a..80884e5 100644 --- a/box.json +++ b/box.json @@ -1,10 +1,10 @@ { "name" : "Spreadsheet CFML", "slug" : "spreadsheet-cfml", - "version" : "3.4.4", + "version" : "3.5.0", "shortDescription" : "CFML spreadsheet library", "author" : "Julian Halliwell", - "location" : "https://github.com/cfsimplicity/spreadsheet-cfml/archive/v3.4.4.zip", + "location" : "https://github.com/cfsimplicity/spreadsheet-cfml/archive/v3.5.0.zip", "homepage" : "https://github.com/cfsimplicity/spreadsheet-cfml", "projectURL" : "https://github.com/cfsimplicity/spreadsheet-cfml", "documentation" : "https://github.com/cfsimplicity/spreadsheet-cfml/blob/main/README.md", diff --git a/build/lib-osgi.mf b/build/lib-osgi.mf index ca388ea..c5bbb47 100644 --- a/build/lib-osgi.mf +++ b/build/lib-osgi.mf @@ -2,17 +2,19 @@ Manifest-Version: 1.0 Bundle-ManifestVersion: 2 Bundle-Name: Spreadsheet CFML Bundle-SymbolicName: spreadsheet-cfml -Bundle-Version: 5.2.2.0 +Bundle-Version: 5.2.2.1 Bundle-ClassPath: commons-codec-1.15.jar, commons-collections4-4.4.jar, commons-compress-1.21.jar, commons-csv-1.9.0.jar, commons-io-2.11.0.jar, commons-math3-3.6.1.jar, + excel-streaming-reader-4.0.1.jar, log4j-api-2.17.2.jar, poi-5.2.2.jar, poi-ooxml-5.2.2.jar, poi-ooxml-full-5.2.2.jar, + slf4j-api-1.7.36.jar, SparseBitSet-1.2.jar, spreadsheet-cfml.jar, xmlbeans-5.0.3.jar diff --git a/helpers/base.cfc b/helpers/base.cfc index 704b148..74799bf 100644 --- a/helpers/base.cfc +++ b/helpers/base.cfc @@ -89,6 +89,10 @@ component accessors="true"{ return library().getSheetHelper(); } + any function getStreamingReaderHelper(){ + return library().getStreamingReaderHelper(); + } + any function getStringHelper(){ return library().getStringHelper(); } diff --git a/helpers/cell.cfc b/helpers/cell.cfc index e992fae..628a281 100644 --- a/helpers/cell.cfc +++ b/helpers/cell.cfc @@ -37,6 +37,9 @@ component extends="base" accessors="true"{ } any function getCellFormulaValue( required workbook, required cell ){ + // streaming reader cannot calculate formulas: return cached value + if( getStreamingReaderHelper().isStreamingReaderFormat( arguments.workbook ) ) + return arguments.cell.getStringCellValue(); var formulaEvaluator = arguments.workbook.getCreationHelper().createFormulaEvaluator(); try{ return getFormatHelper().getDataFormatter().formatCellValue( arguments.cell, formulaEvaluator ); diff --git a/helpers/exception.cfc b/helpers/exception.cfc index 7afbc77..837b88d 100644 --- a/helpers/exception.cfc +++ b/helpers/exception.cfc @@ -17,4 +17,13 @@ component extends="base" accessors="true"{ Throw( type=library().getExceptionType(), message="Could not determine image type", detail="An image type could not be determined from the image provided" ); } + void function throwExceptionIFreadFormatIsInvalid(){ + if( arguments.KeyExists( "format" ) && !ListFindNoCase( "query,html,csv", arguments.format ) ) + Throw( type=library().getExceptionType() & ".invalidReadFormat", message="Invalid format", detail="Supported formats are: 'query', 'html' and 'csv'" ); + } + + void function throwInvalidFileForReadLargeFileException(){ + Throw( type=library().getExceptionType() & ".invalidFile", message="Invalid spreadsheet file", detail="readLargeFile() can only be used with XLSX files. The file you are trying to read does not appear to be an XLSX file." ); + } + } \ No newline at end of file diff --git a/helpers/row.cfc b/helpers/row.cfc index 4ae811c..bcfb69a 100644 --- a/helpers/row.cfc +++ b/helpers/row.cfc @@ -5,14 +5,15 @@ component extends="base" accessors="true"{ ,required struct sheet ,required numeric rowIndex ,boolean includeRichTextFormatting=false + ,any rowObject ){ if( ( arguments.rowIndex == arguments.sheet.headerRowIndex ) && !arguments.sheet.includeHeaderRow ){ - var row = arguments.sheet.object.getRow( JavaCast( "int", arguments.rowIndex ) ); + var row = arguments.rowObject?: arguments.sheet.object.getRow( JavaCast( "int", arguments.rowIndex ) ); setSheetColumnCountFromRow( row, arguments.sheet ); return this; } var rowData = []; - var row = arguments.sheet.object.getRow( JavaCast( "int", arguments.rowIndex ) ); + var row = arguments.rowObject?: arguments.sheet.object.getRow( JavaCast( "int", arguments.rowIndex ) ); if( IsNull( row ) ){ if( arguments.sheet.includeBlankRows ) arguments.sheet.data.Append( rowData ); @@ -20,6 +21,8 @@ component extends="base" accessors="true"{ } if( rowIsEmpty( row ) && !arguments.sheet.includeBlankRows ) return this; + if( rowIsHidden( row ) && !arguments.sheet.includeHiddenRows ) + return this; rowData = getRowData( arguments.workbook, row, arguments.sheet.columnRanges, arguments.includeRichTextFormatting ); arguments.sheet.data.Append( rowData ); setSheetColumnCountFromRow( row, arguments.sheet ); @@ -86,6 +89,18 @@ component extends="base" accessors="true"{ return getSheetHelper().getActiveSheet( arguments.workbook ).getRow( JavaCast( "int", rowIndex ) ); } + any function getRowFromSheet( required workbook, required sheet, required numeric rowIndex ){ + if( !getStreamingReaderHelper().isStreamingReaderFormat( arguments.workbook ) ) + return arguments.sheet.getRow( JavaCast( "int", arguments.rowIndex ) ); + //streaming reader sheet, no random access so iterate + var rowIterator = arguments.sheet.rowIterator(); + while( rowIterator.hasNext() ){ + var rowObject = rowIterator.next(); + if( rowObject.getRowNum() == arguments.rowIndex ) + return rowObject; + } + } + array function parseListDataToArray( required string line, required string delimiter, boolean handleEmbeddedCommas=true ){ var elements = ListToArray( arguments.line, arguments.delimiter ); var potentialQuotes = 0; @@ -210,8 +225,12 @@ component extends="base" accessors="true"{ return this; } + boolean function isRowHidden( required workbook, required numeric row ){ + return rowIsHidden( getRowFromActiveSheet( arguments.workbook, arguments.row ) ); + } + any function toggleRowHidden( required workbook, required numeric rowNumber, required boolean state ){ - getRowHelper().getRowFromActiveSheet( arguments.workbook, arguments.rowNumber ).setZeroHeight( JavaCast( "boolean", arguments.state ) ); + getRowFromActiveSheet( arguments.workbook, arguments.rowNumber ).setZeroHeight( JavaCast( "boolean", arguments.state ) ); return this; } @@ -226,6 +245,10 @@ component extends="base" accessors="true"{ return true; } + private boolean function rowIsHidden( required row ){ + return arguments.row.getZeroHeight() || arguments.row.getHeight() == 0; + } + private void function setSheetColumnCountFromRow( required any row, required struct sheet ){ if( arguments.sheet.columnRanges.Len() )//don't change the column count if specific columns have been specified return; diff --git a/helpers/sheet.cfc b/helpers/sheet.cfc index aee948b..142770b 100644 --- a/helpers/sheet.cfc +++ b/helpers/sheet.cfc @@ -46,18 +46,14 @@ component extends="base" accessors="true"{ var cellIterator = rowIterator.next().cellIterator(); while( cellIterator.hasNext() ){ var cell = cellIterator.next(); - var formulaStruct = { - row: ( cell.getRowIndex() + 1 ) - ,column: ( cell.getColumnIndex() + 1 ) - }; - try{ - formulaStruct.formula = cell.getCellFormula(); + var cellFormula = getCellFormula( cell ); + if( cellFormula.Len() ) { + formulas.Append( { + row: ( cell.getRowIndex() + 1 ) + ,column: ( cell.getColumnIndex() + 1 ) + ,formula: cellFormula + } ); } - catch( any exception ){ - formulaStruct.formula = ""; - } - if( formulaStruct.formula.Len() ) - formulas.Append( formulaStruct ); } } return formulas; @@ -165,6 +161,7 @@ component extends="base" accessors="true"{ ,boolean includeHeaderRow=false ,boolean includeBlankRows=false ,boolean includeHiddenColumns=false + ,boolean includeHiddenRows=false ,boolean fillMergedCellsWithVisibleValue=false ,boolean includeRichTextFormatting=false ,string rows //range @@ -177,6 +174,7 @@ component extends="base" accessors="true"{ includeHeaderRow: arguments.includeHeaderRow ,hasHeaderRow: ( arguments.KeyExists( "headerRow" ) && Val( arguments.headerRow ) ) ,includeBlankRows: arguments.includeBlankRows + ,includeHiddenRows: arguments.includeHiddenRows ,columnNames: [] ,columnRanges: [] ,totalColumnCount: 0 @@ -198,24 +196,17 @@ component extends="base" accessors="true"{ if( arguments.sheetNumber == 0 ) return QueryNew( "" );//no visible sheets sheet.object = getSheetByNumber( arguments.workbook, arguments.sheetNumber ); - var sheetHasRows = !sheetIsEmpty( sheet.object ); + var sheetHasRows = !sheetIsEmpty( sheet.object, arguments.workbook ); if( sheetHasRows ){ - if( arguments.fillMergedCellsWithVisibleValue ) - doFillMergedCellsWithVisibleValue( arguments.workbook, sheet.object ); - if( arguments.KeyExists( "rows" ) ){ - var allRanges = getRangeHelper().extractRanges( arguments.rows, arguments.workbook ); - for( var thisRange in allRanges ){ - for( var rowNumber = thisRange.startAt; rowNumber <= thisRange.endAt; rowNumber++ ){ - var rowIndex = ( rowNumber -1 ); - getRowHelper().addRowToSheetData( arguments.workbook, sheet, rowIndex, arguments.includeRichTextFormatting ); - } - } - } - else{ - var lastRowIndex = sheet.object.getLastRowNum();// zero based - for( var rowIndex = 0; rowIndex <= lastRowIndex; rowIndex++ ) - getRowHelper().addRowToSheetData( arguments.workbook, sheet, rowIndex, arguments.includeRichTextFormatting ); - } + var populateDataArgs = { + workbook: arguments.workbook + ,sheet: sheet + ,fillMergedCellsWithVisibleValue: arguments.fillMergedCellsWithVisibleValue + ,includeRichTextFormatting: arguments.includeRichTextFormatting + }; + if( arguments.KeyExists( "rows" ) ) + populateDataArgs.rows = arguments.rows; + populateSheetData( argumentCollection= populateDataArgs ); } generateQueryColumnNames( arguments.workbook, sheet ); arguments.queryColumnTypes = getQueryHelper().parseQueryColumnTypesArgument( arguments.queryColumnTypes, sheet.columnNames, sheet.totalColumnCount, sheet.data ); @@ -265,18 +256,30 @@ component extends="base" accessors="true"{ throwErrorIFSheetNameAndNumberArgumentsBothPassed( argumentCollection=arguments ); } + any function throwErrorIFSheetNameAndNumberArgumentsBothPassed(){ + if( sheetNameArgumentWasProvided( argumentCollection=arguments ) && sheetNumberArgumentWasProvided( argumentCollection=arguments ) ) + Throw( type=library().getExceptionType() & ".invalidArguments", message="Invalid arguments", detail="Specify either a sheetName or sheetNumber, not both" ); + return this; + } + /* Private */ + private string function getCellFormula( required cell ) { + if( getCellHelper().cellIsOfType( cell, "FORMULA" ) ) + return cell.getCellFormula(); + return ""; + } + private any function generateQueryColumnNames( required workbook, required struct sheet ){ if( arguments.sheet.columnNames.Len() ){ forceQueryColumnsToMatchSpecifiedColumns( arguments.sheet ); return this; // already generated } - if( sheetIsEmpty( arguments.sheet.object ) ) + if( sheetIsEmpty( arguments.sheet.object, arguments.workbook ) ) return this; if( arguments.sheet.hasHeaderRow ){ // use specified header row values as column names - var headerRowObject = arguments.sheet.object.getRow( JavaCast( "int", arguments.sheet.headerRowIndex ) ); + var headerRowObject = getRowHelper().getRowFromSheet( arguments.workbook, arguments.sheet.object, arguments.sheet.headerRowIndex ); var headerRowData = getRowHelper().getRowData( arguments.workbook, headerRowObject, arguments.sheet.columnRanges ); // adds default column names if header row column count is less than total data column count cfloop( from=1, to=arguments.sheet.totalColumnCount, index="local.i" ){ @@ -339,6 +342,8 @@ component extends="base" accessors="true"{ } private string function getVisibility( required workbook, required numeric sheetNumber ){ + if( getStreamingReaderHelper().isStreamingReaderFormat( arguments.workbook ) ) // getSheetVisibility() not supported + return "VISIBLE"; validateSheetNumber( arguments.workbook, arguments.sheetNumber ); var sheetIndex = ( arguments.sheetNumber -1 ); return arguments.workbook.getSheetVisibility( sheetIndex ).toString(); @@ -356,8 +361,8 @@ component extends="base" accessors="true"{ return ( sheetIndex == workbook.getActiveSheetIndex() ); } - private boolean function sheetIsEmpty( required sheet ){ - return ( getLastRowIndex( arguments.sheet ) == -1 ); + private boolean function sheetIsEmpty( required sheet, required workbook ){ + return !arguments.sheet.rowIterator().hasNext(); } private boolean function sheetNameArgumentWasProvided(){ @@ -374,14 +379,8 @@ component extends="base" accessors="true"{ return this; } - private any function throwErrorIFSheetNameAndNumberArgumentsBothPassed(){ - if( sheetNameArgumentWasProvided( argumentCollection=arguments ) && sheetNumberArgumentWasProvided( argumentCollection=arguments ) ) - Throw( type=library().getExceptionType(), message="Invalid arguments", detail="Only one argument is allowed. Specify either a sheetName or sheetNumber, not both" ); - return this; - } - private void function doFillMergedCellsWithVisibleValue( required workbook, required sheet ){ - if( !getSheetHelper().hasMergedRegions( arguments.sheet ) ) + if( !hasMergedRegions( arguments.sheet ) ) return this; for( var regionIndex = 0; regionIndex < arguments.sheet.getNumMergedRegions(); regionIndex++ ){ var region = arguments.sheet.getMergedRegion( regionIndex ); @@ -394,4 +393,37 @@ component extends="base" accessors="true"{ } } -} \ No newline at end of file + private void function populateSheetData( + required workbook + ,required sheet + ,required boolean fillMergedCellsWithVisibleValue + ,required boolean includeRichTextFormatting + ){ + if( getStreamingReaderHelper().isStreamingReaderFormat( arguments.workbook ) ){ + var rowIterator = arguments.sheet.object.rowIterator(); + while( rowIterator.hasNext() ){ + var rowObject = rowIterator.next(); + var rowIndex = rowObject.getRowNum(); + getRowHelper().addRowToSheetData( arguments.workbook, arguments.sheet, rowIndex, arguments.includeRichTextFormatting, rowObject ); + } + return; + } + if( arguments.fillMergedCellsWithVisibleValue ) + doFillMergedCellsWithVisibleValue( arguments.workbook, arguments.sheet.object ); + if( arguments.KeyExists( "rows" ) ){ + var allRanges = getRangeHelper().extractRanges( arguments.rows, arguments.workbook ); + for( var thisRange in allRanges ){ + for( var rowNumber = thisRange.startAt; rowNumber <= thisRange.endAt; rowNumber++ ){ + var rowIndex = ( rowNumber -1 ); + getRowHelper().addRowToSheetData( arguments.workbook, arguments.sheet, rowIndex, arguments.includeRichTextFormatting ); + } + } + } + else{ + var lastRowIndex = arguments.sheet.object.getLastRowNum();// zero based + for( var rowIndex = 0; rowIndex <= lastRowIndex; rowIndex++ ) + getRowHelper().addRowToSheetData( arguments.workbook, arguments.sheet, rowIndex, arguments.includeRichTextFormatting ); + } + } + +} diff --git a/helpers/streamingReader.cfc b/helpers/streamingReader.cfc new file mode 100644 index 0000000..3a16145 --- /dev/null +++ b/helpers/streamingReader.cfc @@ -0,0 +1,41 @@ +component extends="base" accessors="true"{ + + boolean function isStreamingReaderFormat( required workbook ){ + return arguments.workbook.getClass().getCanonicalName() == "com.github.pjfanning.xlsx.impl.StreamingWorkbook"; + } + + query function readFileIntoQuery( required string path, required struct builderOptions, required struct sheetToQueryArgs ){ + lock name="#arguments.path#" timeout=5 { + try{ + var file = CreateObject( "java", "java.io.FileInputStream" ).init( arguments.path ); + arguments.sheetToQueryArgs.workbook = getBuilder( arguments.builderOptions ).open( file ); + return getSheetHelper().sheetToQuery( argumentCollection=arguments.sheetToQueryArgs ); + } + catch( org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException exception ){ + getExceptionHelper().throwInvalidFileForReadLargeFileException(); + } + catch( org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException exception ){ + getExceptionHelper().throwInvalidFileForReadLargeFileException(); + } + finally{ + getFileHelper().closeLocalFileOrStream( local, "file" ); + getFileHelper().closeLocalFileOrStream( local, "workbook" ); + } + } + } + + // NB: called from tests + any function getBuilder( required struct options ){ + var passwordProtected = ( arguments.options.KeyExists( "password") && arguments.options.password.Trim().Len() ); + var builder = getClassHelper().loadClass( "com.github.pjfanning.xlsx.StreamingReader" ).builder() + .setFullFormatRichText( JavaCast( "boolean", true ) ); //some sheet methods e.g. getLastRowNum() may error if not set to true! + if( passwordProtected ) + builder.password( JavaCast( "string", arguments.options.password ) ); + if( arguments.options.KeyExists( "bufferSize" ) ) + builder.bufferSize( JavaCast( "int", arguments.options.bufferSize ) ); + if( arguments.options.KeyExists( "rowCacheSize" ) ) + builder.rowCacheSize( JavaCast( "int", arguments.options.rowCacheSize ) ); + return builder; + } + +} \ No newline at end of file diff --git a/lib-osgi.jar b/lib-osgi.jar index 06f6343..ea81711 100644 Binary files a/lib-osgi.jar and b/lib-osgi.jar differ diff --git a/lib/excel-streaming-reader-4.0.1.jar b/lib/excel-streaming-reader-4.0.1.jar new file mode 100644 index 0000000..8a3a0a2 Binary files /dev/null and b/lib/excel-streaming-reader-4.0.1.jar differ diff --git a/lib/slf4j-api-1.7.36.jar b/lib/slf4j-api-1.7.36.jar new file mode 100644 index 0000000..7d3ce68 Binary files /dev/null and b/lib/slf4j-api-1.7.36.jar differ diff --git a/lib/spreadsheet-cfml.jar b/lib/spreadsheet-cfml.jar index 03d3187..1379b33 100644 Binary files a/lib/spreadsheet-cfml.jar and b/lib/spreadsheet-cfml.jar differ diff --git a/test/files/commaAndSpaceInColumnHeaders.xlsx b/test/files/commaAndSpaceInColumnHeaders.xlsx new file mode 100644 index 0000000..9bf11f8 Binary files /dev/null and b/test/files/commaAndSpaceInColumnHeaders.xlsx differ diff --git a/test/files/formula.xlsx b/test/files/formula.xlsx new file mode 100644 index 0000000..c5fcc07 Binary files /dev/null and b/test/files/formula.xlsx differ diff --git a/test/files/large.xlsx b/test/files/large.xlsx new file mode 100644 index 0000000..7c6f604 Binary files /dev/null and b/test/files/large.xlsx differ diff --git a/test/files/nullCell.xlsx b/test/files/nullCell.xlsx new file mode 100644 index 0000000..a192334 Binary files /dev/null and b/test/files/nullCell.xlsx differ diff --git a/test/specs/read.cfm b/test/specs/read.cfm index a8e1e19..33857eb 100644 --- a/test/specs/read.cfm +++ b/test/specs/read.cfm @@ -104,7 +104,7 @@ describe( "read", function(){ expect( actual ).toBe( expected ); }); - it( "Uses header row for column names if specified", function(){ + it( "Uses the specified header row for column names", function(){ var path = getTestFilePath( "test.xls" ); var expected = querySim( "a,b @@ -189,7 +189,7 @@ describe( "read", function(){ var dateValue = CreateDate( 2015, 04, 12 ); var data = QueryNew( "column1,column2,column3,column4,column5", "Integer,Integer,Bit,Date,VarChar", [ [ 2, 0, true, dateValue, "01" ] ] ); var workbook = s.new(); - s.addRows( workbook,data ) + s.addRows( workbook, data ) .write( workbook, tempXlsPath, true ); var expected = data; var actual = s.getSheetHelper().sheetToQuery( workbook ); @@ -329,7 +329,7 @@ describe( "read", function(){ it( "Can return a CSV string from an Excel file", function(){ var path = getTestFilePath( "test.xls" ); var expected = 'a,b#crlf#1,2015-04-01 00:00:00#crlf#2015-04-01 01:01:01,2'; - var actual = s.read( src=path,format="csv" ); + var actual = s.read( src=path, format="csv" ); expect( actual ).toBe( expected ); expected = 'a,b#crlf#a,b#crlf#1,2015-04-01 00:00:00#crlf#2015-04-01 01:01:01,2'; actual = s.read( src=path, format="csv", headerRow=1, includeHeaderRow=true ); @@ -353,6 +353,20 @@ describe( "read", function(){ expect( actual ).toBe( expected ); }); + it( "Includes columns formatted as 'hidden' by default", function(){ + spreadsheetTypes.Each( function( type ){ + var path = variables[ "temp" & type & "Path" ]; + s.newChainable( type ) + .addColumn( "a1" ) + .addColumn( "b1" ) + .hideColumn( 1 ) + .write( path, true ); + var actual = s.read( src=path, format="query" ); + var expected = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "a1", "b1" ] ] ); + expect( actual ).toBe( expected ); + }); + }); + it( "Can exclude columns formatted as 'hidden'", function(){ var workbook = s.new(); s.addColumn( workbook, "a1" ) @@ -365,6 +379,32 @@ describe( "read", function(){ expect( actual ).toBe( expected ); }); + it( "Includes rows formatted as 'hidden' by default", function(){ + var data = QueryNew( "column1", "VarChar", [ [ "Apple" ], [ "Banana" ], [ "Carrot" ], [ "Doughnut" ] ] ); + spreadsheetTypes.Each( function( type ){ + var path = variables[ "temp" & type & "Path" ]; + s.newChainable( type ) + .addRows( data ) + .hideRow( 1 ) + .write( path, true ); + var actual = s.read( src=path, format="query" ); + var expected = data; + expect( actual ).toBe( expected ); + }); + }); + + it( "Can exclude rows formatted as 'hidden'", function(){ + var data = QueryNew( "column1", "VarChar", [ [ "Apple" ], [ "Banana" ], [ "Carrot" ], [ "Doughnut" ] ] ); + var workbook = s.new(); + s.addRows( workbook, data ); + s.hideRow( workbook, 1 ); + s.hideRow( workbook, 3 ); + s.write( workbook, tempXlsPath, true ); + var actual = s.read( src=tempXlsPath, format="query", includeHiddenRows=false ); + var expected = QueryNew( "column1", "VarChar", [ [ "Banana" ], [ "Doughnut" ] ] ); + expect( actual ).toBe( expected ); + }); + it( "Returns an empty query if the spreadsheet is empty even if headerRow is specified", function(){ var workbooks = [ s.newXls(), s.newXlsx() ]; workbooks.Each( function( wb ){ @@ -412,10 +452,10 @@ describe( "read", function(){ var actual = s.read( src=path, format="query", password="pass" ); expect( actual ).toBe( expected ); }); - + it( "Can read a spreadsheet containing a formula", function(){ var workbook = s.new(); - s.addColumn( workbook,"1,1" ); + s.addColumn( workbook, "1,1" ); var theFormula = "SUM(A1:A2)"; s.setCellFormula( workbook, theFormula, 3, 1 ) .write( workbook=workbook, filepath=tempXlsPath, overwrite=true ); @@ -509,7 +549,7 @@ describe( "read", function(){ var actual = s.read( src=tempXlsPath, format="query", columnNames=columnNames ); expect( actual ).toBe( expected ); }); - + }); describe( "query column type setting", function(){ @@ -613,7 +653,7 @@ describe( "read", function(){ expect( actual ).toBe( expected ); SetTimeZone( currentTZ ); }); - + }); }, @@ -676,6 +716,13 @@ describe( "read", function(){ }).toThrow( regex="Invalid sheet|out of range" ); }); + it( "both sheetName and sheetNumber arguments are specified", function(){ + expect( function(){ + var path = getTestFilePath( "test.xls" ); + s.read( src=path, sheetName="sheet1", sheetNumber=2 ); + }).toThrow( type="cfsimplicity.spreadsheet.invalidArguments" ); + }); + it( "the password for an encrypted XML file is incorrect", function(){ expect( function(){ var tempXlsxPath = getTestFilePath( "passworded.xlsx" ); @@ -722,5 +769,5 @@ describe( "read", function(){ if( FileExists( variables.tempXlsxPath ) ) FileDelete( variables.tempXlsxPath ); }); -}); - \ No newline at end of file +}); + diff --git a/test/specs/readLargeFile.cfm b/test/specs/readLargeFile.cfm new file mode 100644 index 0000000..3668bbc --- /dev/null +++ b/test/specs/readLargeFile.cfm @@ -0,0 +1,464 @@ + +describe( + title="readLargeFile (Lucee only)" + ,body=function(){ + + it( "Can read an XLSX file into a query", function(){ + var path = getTestFilePath( "large.xlsx" ); + var expected = querySim( + "column1,column2 + FirstSheet A1|FirstSheet B1"); + var actual = s.readLargeFile( src=path ); + }); + + it( "Reads from the specified sheet name", function(){ + var path = getTestFilePath( "large.xlsx" );// has 2 sheets + var expected = querySim( + "column1,column2 + SecondSheet A1|SecondSheet B1"); + var actual = s.readLargeFile( src=path, sheetName="SecondSheet" ); + expect( actual ).toBe( expected ); + }); + + it( "Reads from the specified sheet name", function(){ + var path = getTestFilePath( "large.xlsx" );// has 2 sheets + var expected = querySim( + "column1,column2 + SecondSheet A1|SecondSheet B1"); + var actual = s.readLargeFile( src=path, sheetNumber=2 ); + expect( actual ).toBe( expected ); + }); + + it( "Uses the specifed header row for column names", function(){ + var path = getTestFilePath( "large.xlsx" ); + var expected = querySim( + "heading1,heading2 + A2 value|B2 value"); + var actual = s.readLargeFile( src=path, headerRow=1, sheetName="HeaderRow" ); + expect( actual ).toBe( expected ); + }); + + it( "Generates default column names if the data has more columns than the specifed header row", function(){ + var headerRow = [ "firstColumn" ]; + var dataRow1 = [ "row 1 col 1 value" ]; + var dataRow2 = [ "row 2 col 1 value", "row 2 col 2 value" ]; + var expected = querySim( + "firstColumn,column2 + row 1 col 1 value| + row 2 col 1 value|row 2 col 2 value" + ); + s.newChainable( "xlsx" ) + .addRow( headerRow ) + .addRow( dataRow1 ) + .addRow( dataRow2 ) + .write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, headerRow=1 ); + expect( actual ).toBe( expected ); + }); + + it( "Includes the specified header row in query if includeHeader is true", function(){ + var headerRow = [ "a", "b" ]; + var dataRow = [ "c", "d" ]; + s.newChainable( "xlsx" ) + .addRow( headerRow ) + .addRow( dataRow ) + .write( tempXlsxPath, true ); + var expected = querySim( + "a,b + a|b + c|d"); + var actual = s.readLargeFile( src=tempXlsxPath, headerRow=1, includeHeaderRow=true ); + expect( actual ).toBe( expected ); + }); + + it( "Excludes null and blank rows in query by default", function(){ + var data = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "", "" ], [ "a", "b" ] ] ); + var workbook = s.newXlsx(); + s.addRows( workbook, data ) + .write( workbook, tempXlsxPath, true ); + var expected = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "a", "b" ] ] ); + var actual = s.readLargeFile( src=tempXlsxPath ); + expect( actual ).toBe( expected ); + }); + + it( "Includes null and blank rows in query if includeBlankRows is true", function(){ + var data = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "", "" ], [ "a", "b" ] ] ); + var workbook = s.newXlsx(); + s.addRows( workbook, data ) + .write( workbook, tempXlsxPath, true ); + var expected = data; + var actual = s.readLargeFile( src=tempXlsxPath, includeBlankRows=true ); + expect( actual ).toBe( expected ); + }); + + it( "Can handle null/empty cells", function(){ + var path = getTestFilePath( "nullCell.xlsx" ); + var actual = s.readLargeFile( src=path, headerRow=1 ); + var expected = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "", "a" ] ] ); + expect( actual ).toBe( expected ); + }); + + it( "Includes trailing empty columns when using a header row", function(){ + var expected = QuerySim( "col1,col2,emptyCol + value|value|"); + var workbook = s.newChainable( "xlsx" ) + .addRow( "col1,col2,emptyCol" ) + .addRow( "value,value" ) + .write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, headerRow=1 ); + expect( actual ).toBe( expected ); + }); + + it( "Can return HTML table rows from an Excel file", function(){ + var headerRow = [ "header1", "header2" ]; + var dataRow = [ "a", CreateDate( 2015, 04, 01 ) ]; + s.newChainable( "xlsx" ) + .addRow( headerRow ) + .addRow( dataRow ) + .write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, format="html" ); + var expected = "header1header2a2015-04-01 00:00:00"; + expect( actual ).toBe( expected ); + actual = s.readLargeFile( src=tempXlsxPath, format="html", headerRow=1 ); + expected = "a2015-04-01 00:00:00"; + expect( actual ).toBe( expected ); + actual = s.read( src=tempXlsxPath, format="html", headerRow=1, includeHeaderRow=true ); + expected = "header1header2header1header2a2015-04-01 00:00:00"; + expect( actual ).toBe( expected ); + }); + + it( "Can return a CSV string from an Excel file", function(){ + var headerRow = [ "header1", "header2" ]; + var dataRow = [ "a", CreateDate( 2015, 04, 01 ) ]; + s.newChainable( "xlsx" ) + .addRow( headerRow ) + .addRow( dataRow ) + .write( tempXlsxPath, true ); + var expected = 'header1,header2#crlf#a,2015-04-01 00:00:00'; + var actual = s.readLargeFile( src=tempXlsxPath, format="csv" ); + expect( actual ).toBe( expected ); + expected = 'header1,header2#crlf#header1,header2#crlf#a,2015-04-01 00:00:00'; + actual = s.readLargeFile( src=tempXlsxPath, format="csv", headerRow=1, includeHeaderRow=true ); + expect( actual ).toBe( expected ); + }); + + it( "Accepts a custom delimiter when generating CSV", function(){ + var dataRow = [ "a", CreateDate( 2015, 04, 01 ) ]; + s.newChainable( "xlsx" ) + .addRow( dataRow ) + .write( tempXlsxPath, true ); + var expected = 'a|2015-04-01 00:00:00'; + var actual = s.readLargeFile( src=tempXlsxPath, format="csv", csvDelimiter="|" ); + expect( actual ).toBe( expected ); + }); + + it( "Includes columns formatted as 'hidden' by default", function(){ + s.newChainable( "xlsx" ) + .addColumn( "a1" ) + .addColumn( "b1" ) + .hideColumn( 1 ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath ); + var expected = QueryNew( "column1,column2", "VarChar,VarChar", [ [ "a1", "b1" ] ] ); + expect( actual ).toBe( expected ); + }); + + it( "Can exclude columns formatted as 'hidden'", function(){ + s.newChainable( "xlsx" ) + .addColumn( "a1" ) + .addColumn( "b1" ) + .hideColumn( 1 ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath, includeHiddenColumns=false ); + var expected = QueryNew( "column2", "VarChar", [ [ "b1" ] ] ); + expect( actual ).toBe( expected ); + }); + + it( "Includes rows formatted as 'hidden' by default", function(){ + var data = QueryNew( "column1", "VarChar", [ [ "Apple" ], [ "Banana" ], [ "Carrot" ], [ "Doughnut" ] ] ); + s.newChainable( "xlsx" ) + .addRows( data ) + .hideRow( 1 ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath ); + var expected = data; + expect( actual ).toBe( expected ); + }); + + it( "Can exclude rows formatted as 'hidden'", function(){ + var data = QueryNew( "column1", "VarChar", [ [ "Apple" ], [ "Banana" ], [ "Carrot" ], [ "Doughnut" ] ] ); + s.newChainable( "xlsx" ) + .addRows( data ) + .hideRow( 1 ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath, includeHiddenRows=false ); + var expected = QueryNew( "column1", "VarChar", [ [ "Banana" ], [ "Carrot" ], [ "Doughnut" ] ] ); + expect( actual ).toBe( expected ); + }); + + it( "Returns an empty query if the spreadsheet is empty even if headerRow is specified", function(){ + s.newChainable( "xlsx" ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath, headerRow=1 ); + var expected = QueryNew( "" ); + expect( actual ).toBe( expected ); + }); + + it( "Returns an empty query if excluding hidden columns and ALL columns are hidden", function(){ + s.newChainable( "xlsx" ) + .addColumn( "a1" ) + .addColumn( "b1" ) + .hideColumn( 1 ) + .hideColumn( 2 ) + .write( tempXlsPath, true ); + var actual = s.readLargeFile( src=tempXlsPath, includeHiddenColumns=false ); + var expected = QueryNew( "" ); + expect( actual ).toBe( expected ); + }); + + it( "Returns a query with column names but no rows if column names are specified but spreadsheet is empty", function(){ + s.newChainable( "xlsx" ).write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, queryColumnNames="One,Two" ); + var expected = QueryNew( "One,Two","Varchar,Varchar", [] ); + expect( actual ).toBe( expected ); + }); + + it( "Can read an encrypted XLSX file", function(){ + var path = getTestFilePath( "passworded.xlsx" ); + var expected = QueryNew( "column1", "VarChar", [ [ "secret" ] ] ); + var actual = s.readLargeFile( src=path, password="pass" ); + expect( actual ).toBe( expected ); + }); + + it( "Can read a spreadsheet containing a CACHED (i.e. pre-evaluated) formula", function(){ + /* NB: Setting a formula with POI does not cache its value. The Streaming Reader cannot evaluate formulas */ + var path = getTestFilePath( "formula.xlsx" ); + var expected = QueryNew( "column1","Integer", [ [ 1 ], [ 1 ], [ 2 ] ] ); + var actual = s.readLargeFile( path ); + expect( actual ).toBe( expected ); + }); + + describe( "query column name setting", function() { + + it( "Allows column names to be specified as a list when reading a sheet into a query", function(){ + s.newChainable( "xlsx" ).addRow( "a,b" ).write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, queryColumnNames="One,Two" ); + var expected = QueryNew( "One,Two","Varchar,Varchar", [ "a", "b" ] ); + expect( actual ).toBe( expected ); + }); + + it( "Allows column names to be specified as an array when reading a sheet into a query", function(){ + s.newChainable( "xlsx" ).addRow( "a,b" ).write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, queryColumnNames=[ "One", "Two" ] ); + var expected = QueryNew( "One,Two","Varchar,Varchar", [ "a", "b" ] ); + expect( actual ).toBe( expected ); + }); + + it( "ColumnNames list overrides headerRow: none of the header row values will be used", function(){ + s.newChainable( "xlsx" ).addRow( "a,b" ).addRow( "c,d" ).write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, queryColumnNames="One,Two", headerRow=1 ); + var expected = QueryNew( "One,Two","Varchar,Varchar", [ "c", "d" ] ); + expect( actual ).toBe( expected ); + }); + + it( "can handle column names containing commas or spaces", function(){ + var path = getTestFilePath( "commaAndSpaceInColumnHeaders.xlsx" ); + var actual = s.readLargeFile( src=path, headerRow=1 ); + var columnNames = [ "first name", "surname,comma" ];// these are the file column headers + expect( actual.getColumnNames()[ 1 ] ).toBe( columnNames[ 1 ] ); + expect( actual.getColumnNames()[ 2 ] ).toBe( columnNames[ 2 ] ); + }); + + it( "Allows header names to be made safe for query column names", function(){ + var data = [ [ "id","id","A B","x/?y","(a)"," A","##1","1a" ], [ 1,2,3,4,5,6,7,8 ] ]; + s.newChainable( "xlsx" ).addRows( data ).write( tempXlsxPath, true ); + var q = s.readLargeFile( src=tempXlsxPath, headerRow=1, makeColumnNamesSafe=true ); + var expected = [ "id", "id2", "A_B", "x_y", "_a_", "A", "Number1", "_a" ]; + cfloop( from=1, to=expected.Len(), index="i" ){ + expect( q.getColumnNames()[ i ] ).toBe( expected[ i ] ); + } + }); + + it( "Generates default column names if the data has more columns than the specifed column names", function(){ + var columnNames = [ "firstColumn" ]; + var dataRow1 = [ "row 1 col 1 value" ]; + var dataRow2 = [ "row 2 col 1 value", "row 2 col 2 value" ]; + var expected = querySim( + "firstColumn,column2 + row 1 col 1 value| + row 2 col 1 value|row 2 col 2 value" + ); + s.newChainable( "xlsx" ).addRow( dataRow1 ).addRow( dataRow2 ).write( tempXlsxPath, true ); + var actual = s.readLargeFile( src=tempXlsxPath, queryColumnNames=columnNames ); + expect( actual ).toBe( expected ); + }); + + }); + + describe( "query column type setting", function(){ + + it( "allows the query column types to be manually set using list", function(){ + s.newChainable( "xlsx" ).addRow( [ 1, 1.1, "string", CreateTime( 1, 0, 0 ) ] ).write( tempXlsxPath, true ); + var q = s.readLargeFile( src=tempXlsxPath, queryColumnTypes="Integer,Double,VarChar,Time" ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "INTEGER" ); + expect( columns[ 2 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "TIME" ); + }); + + it( "allows the query column types to be manually set where the column order isn't known, but the header row values are", function(){ + s.newChainable( "xlsx" ) + .addRows( [ [ "integer", "double", "string column", "time" ], [ 1, 1.1, "text", CreateTime( 1, 0, 0 ) ] ] ) + .write( tempXlsxPath, true ); + var columnTypes = { "string column": "VARCHAR", "integer": "INTEGER", "time": "TIME", "double": "DOUBLE" };//not in order + var q = s.readLargeFile( src=tempXlsxPath, format="query", queryColumnTypes=columnTypes, headerRow=1 ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "INTEGER" ); + expect( columns[ 2 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "TIME" ); + }); + + it( "allows the query column types to be manually set where the column order isn't known, but the column names are", function(){ + s.newChainable( "xlsx" ).addRows( [ [ 1, 1.1, "text", CreateTime( 1, 0, 0 ) ] ] ).write( tempXlsxPath, true ); + var columnNames = "integer,double,string column,time"; + var columnTypes = { "string": "VARCHAR", "integer": "INTEGER", "time": "TIME", "double": "DOUBLE" };//not in order + var q = s.readLargeFile( src=tempXlsxPath, queryColumnTypes=columnTypes, queryColumnNames=columnNames ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "INTEGER" ); + expect( columns[ 2 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "TIME" ); + }); + + it( "allows the query column types to be automatically set", function(){ + s.newChainable( "xlsx" ).addRow( [ 1, 1.1, "string", Now() ] ).write( tempXlsxPath, true ); + var q = s.readLargeFile( src=tempXlsxPath, queryColumnTypes="auto" ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 2 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "TIMESTAMP" ); + }); + + it( "automatic detecting of query column types ignores blank cells", function(){ + var data = [ + [ "", "", "", "" ], + [ "", 2, "test", Now() ], + [ 1, 1.1, "string", Now() ], + [ 1, "", "", "" ] + ]; + s.newChainable( "xlsx" ).addRows( data ).write( tempXlsxPath, true ); + var q = s.readLargeFile( src=tempXlsxPath, queryColumnTypes="auto" ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 2 ].typeName ).toBe( "DOUBLE" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "TIMESTAMP" ); + }); + + it( "allows a default type to be set for all query columns", function(){ + s.newChainable( "xlsx" ).addRow( [ 1, 1.1, "string", Now() ] ).write( tempXlsxPath, true ); + var q = s.readLargeFile( src=tempXlsxPath, queryColumnTypes="VARCHAR" ); + var columns = GetMetaData( q ); + expect( columns[ 1 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 2 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 3 ].typeName ).toBe( "VARCHAR" ); + expect( columns[ 4 ].typeName ).toBe( "VARCHAR" ); + }); + + }); + + describe( "readLargeFile throws an exception if", function(){ + + it( "the file doesn't exist", function(){ + expect( function(){ + var path = getTestFilePath( "nonexistent.xls" ); + s.readLargeFile( src=path ); + }).toThrow( regex="Non-existent file" ); + }); + + it( "the file to be read is not an XLSX type", function(){ + expect( function(){ + var path = getTestFilePath( "test.xls" ); + s.readLargeFile( src=path ); + }).toThrow( type="cfsimplicity.spreadsheet.invalidFile" ); + }); + + it( "both sheetName and sheetNumber arguments are specified", function(){ + expect( function(){ + var path = getTestFilePath( "large.xlsx" ); + s.readLargeFile( src=path, sheetName="sheet1", sheetNumber=2 ); + }).toThrow( type="cfsimplicity.spreadsheet.invalidArguments" ); + }); + + it( "the format argument is invalid", function(){ + expect( function(){ + s.readLargeFile( src=getTestFilePath( "large.xlsx" ), format="wrong" ); + }).toThrow( regex="Invalid format" ); + }); + + it( "the sheet name doesn't exist", function(){ + expect( function(){ + s.readLargeFile( src=getTestFilePath( "large.xlsx" ), sheetName="nonexistent" ); + }).toThrow( regex="Invalid sheet" ); + }); + + it( "the sheet number doesn't exist", function(){ + expect( function(){ + s.readLargeFile( src=getTestFilePath( "large.xlsx" ), sheetNumber=20 ); + }).toThrow( regex="Invalid sheet|out of range" ); + }); + + it( "the source file is not a spreadsheet", function(){ + expect( function(){ + s.readLargeFile( src=getTestFilePath( "notaspreadsheet.txt" ) ); + }).toThrow( type="cfsimplicity.spreadsheet.invalidFile" ); + }); + + }); + + describe( "the streaming reader", function(){ + + it( "allows options to be passed", function(){ + var options = { + bufferSize: 512 + ,rowCacheSize: 5 + }; + var builder = s.getStreamingReaderHelper().getBuilder( options ); + expect( builder.getBufferSize() ).toBe( options.bufferSize ); + expect( builder.getRowCacheSize() ).toBe( options.rowCacheSize ); + }); + + }); + + afterEach( function(){ + if( FileExists( variables.tempXlsxPath ) ) + FileDelete( variables.tempXlsxPath ); + }); + + } + ,skip=function(){ + return ( s.getIsACF() ); + } +); + +describe( + title="readLargeFile (when run on ACF)" + ,body=function(){ + + it( "throws a methodNotSupported exception", function(){ + expect( function(){ + var path = getTestFilePath( "large.xlsx" ); + s.readLargeFile( src=path ); + }).toThrow( type="cfsimplicity.spreadsheet.methodNotSupported" ); + }); + + } + + ,skip=function(){ + return ( !s.getIsACF() ); + } +); + \ No newline at end of file