diff --git a/Project.toml b/Project.toml index 7a728628..69dc0557 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TiffImages" uuid = "731e570b-9d59-4bfa-96dc-6df516fadf69" authors = ["Tamas Nagy "] -version = "0.6.8" +version = "0.7.0" [deps] ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" diff --git a/src/TiffImages.jl b/src/TiffImages.jl index 2d1c7235..6eeabf3c 100644 --- a/src/TiffImages.jl +++ b/src/TiffImages.jl @@ -23,9 +23,9 @@ const PKGVERSION = @PkgVersion.Version 0 include("enum.jl") include("utils.jl") include("files.jl") -include("compression.jl") include("tags.jl") include("ifds.jl") +include("compression.jl") include("layout.jl") include(joinpath("types", "common.jl")) include(joinpath("types", "dense.jl")) diff --git a/src/compression.jl b/src/compression.jl index fb086f2f..4557b319 100644 --- a/src/compression.jl +++ b/src/compression.jl @@ -1,26 +1,31 @@ """ - read!(tf, arr, comp) + read!(io, arr, comp) -Read in an array `arr` from the [`TiffFile`](@ref) stream `tf` inflating the -data using compression method `comp`. `read!` will dispatch on the value of -compression and use the correct compression technique to read the data. +Read in an array `arr` from the [`TiffFile`](@ref) or [`TiffFileStrip`](@ref) +stream `io`, inflating the data using compression method `comp`. `read!` will +dispatch on the value of compression and use the correct compression technique +to read the data. """ -Base.read!(tf::TiffFile, arr::AbstractArray, comp::CompressionType) = read!(tf, arr, Val(comp)) +function memcpy(dest::Ptr{T}, src::Ptr{T}, n::Int) where T + ccall(:memcpy, Ptr{T}, (Ptr{T}, Ptr{T}, Int), dest, src, n) +end + +Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, comp::CompressionType) = read!(io, arr, Val(comp)) -Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(tf, arr) +Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(io, arr) -function Base.read!(tf::TiffFile, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N} +function Base.read!(tfs::TiffFileStrip, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N} pos = 1 nbit = Array{Int8}(undef, 1) nxt = Array{T}(undef, 1) while pos < length(arr) - read!(tf, nbit) + read!(tfs.tf, nbit) n = nbit[1] if 0 <= n <= 127 - read!(tf, view(arr, pos:pos+n)) + read!(tfs.tf, view(arr, pos:pos+n)) pos += n elseif -127 <= n <= -1 - read!(tf, nxt) + read!(tfs.tf, nxt) arr[pos:(pos-n)] .= nxt[1] pos += -n end @@ -28,12 +33,183 @@ function Base.read!(tf::TiffFile, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PA end end -function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE}) - readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr))) +function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE}) + readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr))) end -function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE}) - readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr))) +function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE}) + readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr))) +end + +function lzw_decode!(io, arr::AbstractArray) + CLEAR_CODE::Int = 256 + 1 + EOI_CODE::Int = 257 + 1 + TABLE_ENTRY_LENGTH_BITS::Int = 16 + + out_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, pointer(arr)) + output_size::Int = sizeof(arr) + out_position::Int = 0 # current position in out + + table_size::Int = output_size * 2 + 258 + table_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, Libc.malloc(table_size)) # table of strings + table_offsets_pointer::Ptr{Int} = reinterpret(Ptr{Int}, Libc.malloc(sizeof(Int) * 4097)) # offsets into table + + @inline create_table_entry(length, offset) = Base.shl_int(length, (64 - TABLE_ENTRY_LENGTH_BITS)) | offset + @inline table_entry_length(table_entry) = Base.lshr_int(table_entry, 64 - TABLE_ENTRY_LENGTH_BITS) + @inline table_entry_offset(table_entry) = table_entry & (Base.shl_int(1, 64 - TABLE_ENTRY_LENGTH_BITS) - 1) + + try + # InitializeTable(); + foreach(i -> unsafe_store!(table_pointer + i, UInt8(i)), 0:255) + foreach(i -> unsafe_store!(table_offsets_pointer, create_table_entry(1, i), i+1), 0:259) # length is stored in upper 16 bits + + code = -1 + + buffer::Int=0 # buffer for reading in codes + bitcount::Int=0 # number of valid bits in buffer + codesize::Int=9 # current number of bits per code + input::Vector{UInt8} = Vector{UInt8}(undef, bytesavailable(io)) + read!(io, input) + function getcode(buffer, code, bitcount, codesize, i) + old_code::Int = code + + # make sure we have enough bits in the buffer + if bitcount < codesize + buffer = Base.shl_int(buffer, 8) | input[i+=1] + bitcount += 8 + end + + # one more time (since the max code size is 12 bits, only need to check twice) + if bitcount < codesize + buffer = Base.shl_int(buffer, 8) | input[i+=1] + bitcount += 8 + end + + code = Base.lshr_int(buffer, bitcount - codesize) & (Base.shl_int(1, codesize) - 1) + bitcount -= codesize + # code + 1 because this is Julia + (buffer, code + 1, old_code, bitcount, codesize, i) + end + + @inline check_table_overflow(start, length) = start + length > table_size && @error "LZW: table buffer overflow" + @inline check_output_overflow(start, length) = start + length > output_size && @error "LZW: output buffer overflow" + + # annotated with excerpts from the LZW pseudocode in the TIFF 6.0 spec + # https://developer.adobe.com/content/dam/udp/en/open/standards/tiff/TIFF6.pdf + table_count::Int = 258 # number of (valid) table entries; 256 one-byte codes + CLEAR_CODE + EOI_CODE + next_table_offset::Int = 258 + input_pos::Int = 0 # current position in input + while true + # GetNextCode() + (buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos) + if code == EOI_CODE || out_position >= output_size + break + elseif code == CLEAR_CODE # reset table + # InitializeTable(); + table_count = 258 + next_table_offset = 258 + codesize = 9 + # Code = GetNextCode(); + (buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos) + if code == EOI_CODE + break + end + # WriteString(StringFromCode(Code)) + r = unsafe_load(table_offsets_pointer, code) + len = table_entry_length(r) + + check_output_overflow(out_position, len) + + memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len) + out_position += len + else + if code <= table_count + # WriteString(StringFromCode(Code)); + if code <= 256 + # this is redundant with the check above, but it makes + # the code easier to reason about and less bug prone + check_output_overflow(out_position, 1) + + unsafe_store!(out_pointer + out_position, code - 1) + out_position += 1 + else + r = unsafe_load(table_offsets_pointer, code) + len = table_entry_length(r) + + check_output_overflow(out_position, len) + + memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len) + out_position += len + end + + # AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(Code))); + table_count += 1 + len = 1 + if old_code <= 256 + check_table_overflow(next_table_offset, 2) # this byte + the next one + + unsafe_store!(table_pointer + next_table_offset, UInt8(old_code - 1)) + else + r = unsafe_load(table_offsets_pointer, old_code) + len = table_entry_length(r) + + check_table_overflow(next_table_offset, len + 1) # these bytes + the next one + + memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len) + end + + if code <= 256 + unsafe_store!(table_pointer + next_table_offset + len, UInt8(code - 1)) + else + r = unsafe_load(table_offsets_pointer, code) + memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1) + end + unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count) + next_table_offset += len + 1 + else + # WriteString(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode))); + r = unsafe_load(table_offsets_pointer, old_code) + len = table_entry_length(r) + + check_output_overflow(out_position, len + 1) + + memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len) + unsafe_store!(out_pointer + out_position + len, unsafe_load(table_pointer + table_entry_offset(r))) + out_position += len + 1 + + check_table_overflow(next_table_offset, len + 1) + + # AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode))); + table_count += 1 + memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len) + memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1) + unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count) + next_table_offset += len + 1 + end + end + + if table_count == 511 + codesize = 10 + elseif table_count == 1023 + codesize = 11 + elseif table_count == 2047 + codesize = 12 + end + end + + out_position != output_size && @warn "LZW: expected $output_size bytes, got $out_position bytes" + out_position == output_size && code != EOI_CODE && @warn "LZW: missing EOI code" + catch e + error("LZW: $e") + rethrow() + finally + Libc.free(table_pointer) + Libc.free(table_offsets_pointer) + end +end + +function Base.read!(tfs::TiffFileStrip{S}, arr::AbstractArray{T, N}, ::Val{COMPRESSION_LZW}) where {T, N, S} + lzw_decode!(tfs, arr) end """ @@ -46,15 +222,16 @@ julia> TiffImages.get_inflator(first(methods(read!, [TiffImages.TiffFile, Abstra COMPRESSION_NONE::CompressionType = 1 ``` """ -get_inflator(::Type{Tuple{typeof(read!), TiffFile, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C +get_inflator(::Type{Tuple{typeof(read!), TiffFileStrip, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C +get_inflator(::Type{Tuple{typeof(read!), Union{TiffFile, TiffFileStrip{S} where S}, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C # autogenerate nice error messages for all non-implemented inflation methods -implemented = map(x->get_inflator(x.sig), methods(read!, [TiffFile, AbstractArray, Val], )) +implemented = map(x->get_inflator(x.sig), methods(read!, [Union{TiffFile, TiffFileStrip}, AbstractArray, Val], )) comps = Set(instances(CompressionType)) setdiff!(comps, implemented) for comp in comps eval(quote - Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.") + Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.") end) -end \ No newline at end of file +end diff --git a/src/ifds.jl b/src/ifds.jl index dc8619e5..67bb948d 100644 --- a/src/ifds.jl +++ b/src/ifds.jl @@ -203,7 +203,28 @@ function Base.iterate(file::TiffFile, state::Tuple{Union{IFD{O}, Nothing}, Int}) return (curr_ifd, (next_ifd, next_ifd_offset)) end -function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where {T, N} +""" + $(TYPEDEF) + +A strip is a contiguous block of separately-encoded image data. A TIFF +file will typically have multiple strips, each representing multiple rows of +pixels in the image + +$(FIELDS) +""" +struct TiffFileStrip{O, S, P} + """The file stream""" + tf::TiffFile{O, S} + """The IFD corresponding to this strip""" + ifd::IFD{O} + """The number of bytes in this strip""" + bytes::Int +end + +Base.read!(tfs::TiffFileStrip, arr::AbstractArray) = read!(tfs.tf, arr) +Base.bytesavailable(tfs::TiffFileStrip) = tfs.bytes + +function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}) where {T, N, O, S} strip_offsets = ifd[STRIPOFFSETS].data if PLANARCONFIG in ifd @@ -229,11 +250,18 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where { strip_nbytes[end] = (rows - (rowsperstrip * (nstrips-1))) * cols * sizeof(T) end + bytes = ifd[STRIPBYTECOUNTS].data + startbyte = 1 + comp = Val(compression) + rtype = rawtype(ifd) for i in 1:nstrips seek(tf, strip_offsets[i]::Core.BuiltinInts) nbytes = Int(strip_nbytes[i]::Core.BuiltinInts / sizeof(T)) - read!(tf, view(target, startbyte:(startbyte+nbytes-1)), compression) + tfs = TiffFileStrip{O, S, rtype}(tf, ifd, bytes[i]) + arr = view(target, startbyte:(startbyte+nbytes-1)) + read!(tfs, arr, comp) + reverse_prediction!(tfs, arr) startbyte += nbytes end else @@ -297,3 +325,26 @@ function Base.write(tf::TiffFile{O}, ifd::IFD{O}) where {O <: Unsigned} return ifd_end_pos end + +function reverse_prediction!(tfs::TiffFileStrip{O, S, P}, arr::AbstractArray{T, N}) where {O, S, P, T, N} + predictor::Int = Int(getdata(tfs.ifd, PREDICTOR, 0)) + spp::Int = Int(getdata(tfs.ifd, SAMPLESPERPIXEL, 0)) + if predictor == 2 + columns = Int(ncols(tfs.ifd)) + rows = cld(length(arr), columns) # number of rows in this strip + + # horizontal differencing + temp::Ptr{P} = reinterpret(Ptr{P}, pointer(arr)) + for row in 1:rows + start = (row - 1) * columns * spp + for plane in 1:spp + previous::P = unsafe_load(temp, start + plane) + for i in (spp + plane):spp:(columns - 1) * spp + plane + current = unsafe_load(temp, start + i) + previous + unsafe_store!(temp, current, start + i) + previous = current + end + end + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index c695e538..6a2d0d41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -215,4 +215,10 @@ end ifd[TiffImages.ROWSPERSTRIP] = 256 @test TiffImages.iscontiguous(ifd) -end \ No newline at end of file +end + +@testset "LZW" begin + uncompressed = get_example("shapes_uncompressed.tif") + compressed = get_example("shapes_lzw.tif") + @test TiffImages.load(uncompressed) == TiffImages.load(compressed) +end