diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index e54dbc9..e299cde 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -1,5 +1,8 @@ # Sylvan.Data.Excel Release Notes +_0.4.21_ +- Fixes a bug that could cause incorrect behavior when reading certain .xls files. + _0.4.20_ - Fixes a bug where advancing to the next worksheet can result in the incorrect number of columns being reported when reading xlsx files. #160 - Calling field accessor methods (`GetString`, `GetInt32()`, etc) now throw an exception when called when the reader is in an invalid state. diff --git a/source/Sylvan.Data.Excel.Tests/DataExtensions.cs b/source/Sylvan.Data.Excel.Tests/DataExtensions.cs index 571f429..6abf03a 100644 --- a/source/Sylvan.Data.Excel.Tests/DataExtensions.cs +++ b/source/Sylvan.Data.Excel.Tests/DataExtensions.cs @@ -4,115 +4,114 @@ using System.Linq; using System.Threading.Tasks; -namespace Sylvan.Data +namespace Sylvan.Data; + +static class DataExtensions { - static class DataExtensions + public static void ProcessStrings(this IDataReader reader) { - public static void ProcessStrings(this IDataReader reader) + while (reader.Read()) { - while (reader.Read()) + for (int i = 0; i < reader.FieldCount; i++) { - for (int i = 0; i < reader.FieldCount; i++) - { - reader.GetString(i); - } + reader.GetString(i); } } + } - public static void ProcessValues(this IDataReader reader) + public static void ProcessValues(this IDataReader reader) + { + while (reader.Read()) { - while (reader.Read()) + for (int i = 0; i < reader.FieldCount; i++) { - for (int i = 0; i < reader.FieldCount; i++) - { - reader.GetValue(i); - } + reader.GetValue(i); } } + } - public static void Process(this IDataReader reader) + public static void Process(this IDataReader reader) + { + TypeCode[] types = new TypeCode[reader.FieldCount]; + for (int i = 0; i < reader.FieldCount; i++) { - TypeCode[] types = new TypeCode[reader.FieldCount]; - for (int i = 0; i < reader.FieldCount; i++) - { - var t = reader.GetFieldType(i); - t = Nullable.GetUnderlyingType(t) ?? t; - types[i] = Type.GetTypeCode(t); - } + var t = reader.GetFieldType(i); + t = Nullable.GetUnderlyingType(t) ?? t; + types[i] = Type.GetTypeCode(t); + } - while (reader.Read()) + while (reader.Read()) + { + for (int i = 0; i < reader.FieldCount; i++) { - for (int i = 0; i < reader.FieldCount; i++) - { - if (reader.IsDBNull(i)) - continue; - ProcessField(reader, i, types[i]); - } + if (reader.IsDBNull(i)) + continue; + ProcessField(reader, i, types[i]); } } + } - public static void Process(this DbDataReader reader) + public static void Process(this DbDataReader reader) + { + var cols = reader.GetColumnSchema(); + bool[] allowDbNull = cols.Select(c => c.AllowDBNull != false).ToArray(); + TypeCode[] types = cols.Select(c => Type.GetTypeCode(c.DataType)).ToArray(); + while (reader.Read()) { - var cols = reader.GetColumnSchema(); - bool[] allowDbNull = cols.Select(c => c.AllowDBNull != false).ToArray(); - TypeCode[] types = cols.Select(c => Type.GetTypeCode(c.DataType)).ToArray(); - while (reader.Read()) + for (int i = 0; i < reader.FieldCount; i++) { - for (int i = 0; i < reader.FieldCount; i++) - { - if (allowDbNull[i] && reader.IsDBNull(i)) - continue; - ProcessField(reader, i, types[i]); - } + if (allowDbNull[i] && reader.IsDBNull(i)) + continue; + ProcessField(reader, i, types[i]); } } + } - public static async Task ProcessAsync(this DbDataReader reader) + public static async Task ProcessAsync(this DbDataReader reader) + { + var cols = reader.GetColumnSchema(); + bool[] allowDbNull = cols.Select(c => c.AllowDBNull != false).ToArray(); + TypeCode[] types = cols.Select(c => Type.GetTypeCode(c.DataType)).ToArray(); + while (await reader.ReadAsync()) { - var cols = reader.GetColumnSchema(); - bool[] allowDbNull = cols.Select(c => c.AllowDBNull != false).ToArray(); - TypeCode[] types = cols.Select(c => Type.GetTypeCode(c.DataType)).ToArray(); - while (await reader.ReadAsync()) + for (int i = 0; i < reader.FieldCount; i++) { - for (int i = 0; i < reader.FieldCount; i++) - { - if (allowDbNull[i] && await reader.IsDBNullAsync(i)) - continue; + if (allowDbNull[i] && await reader.IsDBNullAsync(i)) + continue; - ProcessField(reader, i, types[i]); - } + ProcessField(reader, i, types[i]); } } + } - static void ProcessField(this IDataReader reader, int i, TypeCode typeCode) + static void ProcessField(this IDataReader reader, int i, TypeCode typeCode) + { + switch (typeCode) { - switch (typeCode) - { - case TypeCode.Boolean: - reader.GetBoolean(i); - break; - case TypeCode.Int32: - reader.GetInt32(i); - break; - case TypeCode.DateTime: - reader.GetDateTime(i); - break; - case TypeCode.Single: - reader.GetFloat(i); - break; - case TypeCode.Double: - reader.GetDouble(i); - break; - case TypeCode.Decimal: - reader.GetDecimal(i); - break; - case TypeCode.String: - reader.GetString(i); - break; - default: - // no cheating - throw new NotSupportedException("" + typeCode); - } + case TypeCode.Boolean: + reader.GetBoolean(i); + break; + case TypeCode.Int32: + reader.GetInt32(i); + break; + case TypeCode.DateTime: + reader.GetDateTime(i); + break; + case TypeCode.Single: + reader.GetFloat(i); + break; + case TypeCode.Double: + reader.GetDouble(i); + break; + case TypeCode.Decimal: + reader.GetDecimal(i); + break; + case TypeCode.String: + reader.GetString(i); + break; + default: + // no cheating + throw new NotSupportedException("" + typeCode); } } } diff --git a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs index 71afefc..a85b4b4 100644 --- a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs @@ -11,7 +11,7 @@ namespace Sylvan.Data.Excel; // the tests defined here will be run against .xls, .xlsx, and .xlsb file -// containing the same content. The expectation is the behavior of the two +// containing the same content. The expectation is the behavior of the three // implementations is the same, so the same test code can validate the // behavior of the three formats. public class XlsxTests @@ -124,7 +124,7 @@ public void DateTime() var epoch = new DateTime(1900, 1, 1); using var edr = ExcelDataReader.Create(file); for (int i = 0; i < 22; i++) - { + { Assert.True(edr.Read()); var value = edr.GetDouble(0); var vs = value.ToString("G15"); diff --git a/source/Sylvan.Data.Excel.Tests/TestStream.cs b/source/Sylvan.Data.Excel.Tests/TestStream.cs index 4d2eef3..77523e9 100644 --- a/source/Sylvan.Data.Excel.Tests/TestStream.cs +++ b/source/Sylvan.Data.Excel.Tests/TestStream.cs @@ -1,65 +1,60 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.IO; -namespace Sylvan.Testing +namespace Sylvan.Testing; + +sealed class TestStream : Stream { - class TestStream : Stream + readonly Stream stream; + + public TestStream(Stream stream) { - Stream stream; - public TestStream(Stream stream) - { - this.stream = stream; - } + this.stream = stream; + } - public override bool CanRead => this.stream.CanRead; + public override bool CanRead => this.stream.CanRead; - public override bool CanSeek => this.stream.CanSeek; + public override bool CanSeek => this.stream.CanSeek; - public override bool CanWrite => this.stream.CanWrite; + public override bool CanWrite => this.stream.CanWrite; - public override long Length => this.stream.Length; + public override long Length => this.stream.Length; - public bool IsClosed { get; private set; } + public bool IsClosed { get; private set; } - public override void Close() - { - this.IsClosed = true; - stream.Close(); - base.Close(); - } + public override void Close() + { + this.IsClosed = true; + stream.Close(); + base.Close(); + } - public override long Position { - get => this.stream.Position; - set => this.stream.Position = value; - } + public override long Position { + get => this.stream.Position; + set => this.stream.Position = value; + } - public override void Flush() - { - this.stream.Flush(); - } + public override void Flush() + { + this.stream.Flush(); + } - public override int Read(byte[] buffer, int offset, int count) - { - return this.stream.Read(buffer, offset, count); - } + public override int Read(byte[] buffer, int offset, int count) + { + return this.stream.Read(buffer, offset, count); + } - public override long Seek(long offset, SeekOrigin origin) - { - return this.stream.Seek(offset, origin); - } + public override long Seek(long offset, SeekOrigin origin) + { + return this.stream.Seek(offset, origin); + } - public override void SetLength(long value) - { - this.stream.SetLength(value); - } + public override void SetLength(long value) + { + this.stream.SetLength(value); + } - public override void Write(byte[] buffer, int offset, int count) - { - this.stream.Write(buffer, offset, count); - } + public override void Write(byte[] buffer, int offset, int count) + { + this.stream.Write(buffer, offset, count); } } \ No newline at end of file diff --git a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Entry.cs b/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Entry.cs index 090f401..5394463 100644 --- a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Entry.cs +++ b/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Entry.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; @@ -102,15 +103,16 @@ public override string ToString() public Stream Open() { - if (this.StreamSize < Package.miniSectorCutoff) + var pkg = this.Package; + if (this.StreamSize < pkg.miniSectorCutoff) { - var sectors = this.Package.GetMiniStreamSectors(this.StartSector).ToArray(); - return new Ole2MiniStream(this.Package, this.Package.miniStream, sectors, StreamSize); + var sectors = pkg.GetMiniStreamSectors(this.StartSector).ToArray(); + return new Ole2Stream(pkg.miniStream, sectors, MiniSectorSize, 0, StreamSize); } else { - var sectors = this.Package.GetStreamSectors(this.StartSector).ToArray(); - return new Ole2Stream(this.Package, sectors, StreamSize); + var sectors = pkg.GetStreamSectors(this.StartSector).ToArray(); + return new Ole2Stream(pkg.stream, sectors, pkg.sectorSize, 1, StreamSize); } } } diff --git a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2MiniStream.cs b/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2MiniStream.cs deleted file mode 100644 index 227b7b9..0000000 --- a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2MiniStream.cs +++ /dev/null @@ -1,178 +0,0 @@ -using System; -using System.Diagnostics; -using System.IO; - -namespace Sylvan.Data.Excel; - -partial class Ole2Package -{ - public sealed class Ole2MiniStream : Stream - { - Ole2Package package; - - readonly long length; - long position; - - int sectorOff; - readonly int sectorLen; - - readonly uint[] sectors; - int sectorIdx; - uint sector; - - long streamPos; - Stream miniStream; - - public Ole2MiniStream(Ole2Package package, Stream miniStream, uint[] sectors, long length) - { - this.miniStream = miniStream; - this.package = package; - this.sectors = sectors; - this.sectorIdx = 0; - this.sector = sectors[sectorIdx]; - this.length = length; - this.position = 0; - this.sectorLen = this.package.sectorSize; - this.sectorOff = 0; - this.streamPos = -1; - } - - public override long Position - { - get - { - return this.position; - } - set - { - Seek(value - this.position, SeekOrigin.Current); - } - } - - public override long Seek(long offset, SeekOrigin origin) - { - long pos = 0; - switch (origin) - { - case SeekOrigin.Begin: - pos = offset; - break; - case SeekOrigin.Current: - pos = this.position + offset; - break; - case SeekOrigin.End: - pos = this.length + offset; - break; - } - if (pos < 0) - { - throw new ArgumentOutOfRangeException(nameof(offset)); - } - - this.position = pos; - var idx = pos / MiniSectorSize; - - this.sectorIdx = (int)idx; - this.sectorOff = (int) (pos - (idx * MiniSectorSize)); - this.sector = this.sectors[sectorIdx]; - return this.position; - } - - public override int Read(byte[] buffer, int offset, int count) - { - if (offset + count > buffer.Length) - throw new ArgumentOutOfRangeException(); - - var sectors = this.sectors; - - int bytesRead = 0; - var c = count; - var z = (int) Math.Min(count, this.length - position); - - while (bytesRead < count && position < length) - { - var readLen = 0; - var readStart = sector * MiniSectorSize + sectorOff; - var curSector = sector; - - while (readLen < z) - { - if (this.sectorOff >= MiniSectorSize) - { - sectorOff = 0; - sectorIdx++; - if (sectorIdx >= sectors.Length) - { - break; - } - var nextSector = sectors[sectorIdx]; - if (nextSector != curSector + 1) - { - // next sector is not coniguious, so read - // the current contig block - sector = nextSector; - if (readLen > 0) - { - break; - } - } - sector = curSector = nextSector; - } - - var sectorAvail = MiniSectorSize - this.sectorOff; - Debug.WriteLine("SA: " + sectorAvail); - var sectorRead = Math.Min(sectorAvail, z - readLen); - - readLen += sectorRead; - this.sectorOff += sectorRead; - } - - // avoid seek if we are already positioned. - if (streamPos != readStart) - { - package.miniStream.Seek(readStart, SeekOrigin.Begin); - streamPos = readStart; - } - - if (readLen == 0) - break; - int len = 0; - while (len < readLen) - { - int l = package.miniStream.Read(buffer, offset, readLen); - if (l == 0) - throw new IOException();//"Unexpectedly encountered end of Ole2Package Stream" - len += l; - offset += l; - c -= l; - this.position += l; - this.streamPos += l; - } - bytesRead += len; - } - return bytesRead; - } - - public override void Flush() - { - } - - public override void SetLength(long value) - { - throw new NotSupportedException(); - } - - public override void Write(byte[] buffer, int offset, int count) - { - throw new NotSupportedException(); - } - - public override bool CanRead => true; - - public override bool CanSeek => true; - - public override bool CanWrite => false; - - public override long Length => length; - } -} diff --git a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Stream.cs b/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Stream.cs index 267e8ad..b001262 100644 --- a/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Stream.cs +++ b/source/Sylvan.Data.Excel/Packaging/Ole2Package+Ole2Stream.cs @@ -1,8 +1,5 @@ using System; -using System.Diagnostics; using System.IO; -using System.Threading.Tasks; -using System.Threading; namespace Sylvan.Data.Excel; @@ -10,31 +7,30 @@ partial class Ole2Package { public sealed class Ole2Stream : Stream { - Ole2Package package; + readonly Stream stream; readonly long length; + readonly int sectorLen; + readonly int startSector; + readonly uint[] sectors; + long position; - int sectorOff; - readonly int sectorLen; - readonly uint[] sectors; int sectorIdx; uint sector; - long streamPos; - - public Ole2Stream(Ole2Package package, uint[] sectors, long length) + public Ole2Stream(Stream stream, uint[] sectors, int sectorLen, int startSector, long length) { - this.package = package; + this.stream = stream; this.sectors = sectors; this.sectorIdx = 0; this.sector = sectors[sectorIdx]; + this.startSector = startSector; this.length = length; this.position = 0; - this.sectorLen = this.package.sectorSize; + this.sectorLen = sectorLen; this.sectorOff = 0; - this.streamPos = -1; } public override long Position @@ -73,30 +69,36 @@ public override long Seek(long offset, SeekOrigin origin) var idx = pos / this.sectorLen; this.sectorIdx = (int)idx; - this.sectorOff = (int) (pos - (idx * sectorLen)); + this.sectorOff = (int)(pos - (idx * sectorLen)); this.sector = this.sectors[sectorIdx]; return this.position; } - public override async Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + public override int Read(byte[] buffer, int offset, int count) { if (offset + count > buffer.Length) throw new ArgumentOutOfRangeException(); - //Debug.WriteLine($"{offset} {count} {this.position}"); + var stream = this.stream; var sectors = this.sectors; int bytesRead = 0; var c = count; + var streamAvail = this.length - this.position; + + // the amount to read is the lesser of the user requested count + // and what remains in the stream. + var readAvail = (int)Math.Min(count, streamAvail); + while (bytesRead < count && position < length) { var readLen = 0; - var readStart = (sector + 1) * sectorLen + sectorOff; + var readStart = (sector + startSector) * sectorLen + sectorOff; var curSector = sector; - while (readLen < c) + while (readLen < readAvail) { if (this.sectorOff >= this.sectorLen) { @@ -121,17 +123,16 @@ public override async Task ReadAsync(byte[] buffer, int offset, int count, } var sectorAvail = this.sectorLen - this.sectorOff; - var sectorRead = Math.Min(sectorAvail, c - readLen); + var sectorRead = Math.Min(sectorAvail, readAvail - readLen); readLen += sectorRead; this.sectorOff += sectorRead; } // avoid seek if we are already positioned. - if (streamPos != readStart) + if (stream.Position != readStart) { - package.stream.Seek(readStart, SeekOrigin.Begin); - streamPos = readStart; + stream.Seek(readStart, SeekOrigin.Begin); } if (readLen == 0) @@ -139,25 +140,21 @@ public override async Task ReadAsync(byte[] buffer, int offset, int count, int len = 0; while (len < readLen) { - int l = await package.stream.ReadAsync(buffer, offset, readLen).ConfigureAwait(false); + int l = stream.Read(buffer, offset, readLen); if (l == 0) - throw new IOException();//"Unexpectedly encountered end of Ole2Package Stream" + { + throw new IOException(); + } len += l; offset += l; c -= l; this.position += l; - this.streamPos += l; } bytesRead += len; } return bytesRead; } - public override int Read(byte[] buffer, int offset, int count) - { - return ReadAsync(buffer, offset, count, default).GetAwaiter().GetResult(); - } - public override void Flush() { } diff --git a/source/Sylvan.Data.Excel/Packaging/Ole2Package.cs b/source/Sylvan.Data.Excel/Packaging/Ole2Package.cs index 721651d..dde26c3 100644 --- a/source/Sylvan.Data.Excel/Packaging/Ole2Package.cs +++ b/source/Sylvan.Data.Excel/Packaging/Ole2Package.cs @@ -75,7 +75,7 @@ public Ole2Package(Stream iStream) { var miniStreamSectors = GetStreamSectors(RootEntry.StartSector).ToArray(); - this.miniStream = new Ole2Stream(this, miniStreamSectors, RootEntry.StreamSize); + this.miniStream = new Ole2Stream(this.stream, miniStreamSectors, this.sectorSize, 1, RootEntry.StreamSize); } else { @@ -164,7 +164,7 @@ void LoadDirectoryEntries() int entryCount = sectorSize * directorySectorCount / DirectoryEntrySize; this.entryList = new Ole2Entry[entryCount]; var sectors = GetStreamSectors(directorySectorStart).ToArray(); - Ole2Stream dirStream = new Ole2Stream(this, sectors, sectorSize * directorySectorCount); + Ole2Stream dirStream = new Ole2Stream(this.stream, sectors, this.sectorSize, 1, sectorSize * directorySectorCount); for (int i = 0; i < entryCount; i++) { this.entryList[i] = new Ole2Entry(this, dirStream, i); diff --git a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj index a19e2a7..233377a 100644 --- a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj +++ b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj @@ -3,7 +3,7 @@ net6.0;netstandard2.1;netstandard2.0 latest - 0.4.20 + 0.4.21 A cross-platform .NET library for reading Excel data files. excel;xls;xlsx;xlsb;datareader enable diff --git a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs index 35b216e..58a9f28 100644 --- a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs +++ b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs @@ -287,7 +287,7 @@ public bool NextRecord() { bufferPos = recordOff + recordLen; - if (bufferPos + 4 >= bufferLen) + if (bufferPos + 4 > bufferLen) { if (!FillBuffer(4)) {