Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for LZW compression #116

Merged
merged 21 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TiffImages"
uuid = "731e570b-9d59-4bfa-96dc-6df516fadf69"
authors = ["Tamas Nagy <[email protected]>"]
version = "0.6.7"
version = "0.7.0"

[deps]
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
Expand Down
2 changes: 1 addition & 1 deletion src/TiffImages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ const PKGVERSION = @PkgVersion.Version 0
include("enum.jl")
include("utils.jl")
include("files.jl")
include("compression.jl")
include("tags.jl")
include("ifds.jl")
include("compression.jl")
include("layout.jl")
include(joinpath("types", "common.jl"))
include(joinpath("types", "dense.jl"))
Expand Down
213 changes: 195 additions & 18 deletions src/compression.jl
Original file line number Diff line number Diff line change
@@ -1,39 +1,215 @@
"""
read!(tf, arr, comp)
read!(io, arr, comp)

Read in an array `arr` from the [`TiffFile`](@ref) stream `tf` inflating the
data using compression method `comp`. `read!` will dispatch on the value of
compression and use the correct compression technique to read the data.
Read in an array `arr` from the [`TiffFile`](@ref) or [`TiffFileStrip`](@ref)
stream `io`, inflating the data using compression method `comp`. `read!` will
dispatch on the value of compression and use the correct compression technique
to read the data.
"""
Base.read!(tf::TiffFile, arr::AbstractArray, comp::CompressionType) = read!(tf, arr, Val(comp))
function memcpy(dest::Ptr{T}, src::Ptr{T}, n::Int) where T
ccall(:memcpy, Ptr{T}, (Ptr{T}, Ptr{T}, Int), dest, src, n)
end

Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, comp::CompressionType) = read!(io, arr, Val(comp))

Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(tf, arr)
Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(io, arr)

function Base.read!(tf::TiffFile, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N}
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N}
pos = 1
nbit = Array{Int8}(undef, 1)
nxt = Array{T}(undef, 1)
while pos < length(arr)
read!(tf, nbit)
read!(tfs.tf, nbit)
n = nbit[1]
if 0 <= n <= 127
read!(tf, view(arr, pos:pos+n))
read!(tfs.tf, view(arr, pos:pos+n))
pos += n
elseif -127 <= n <= -1
read!(tf, nxt)
read!(tfs.tf, nxt)
arr[pos:(pos-n)] .= nxt[1]
pos += -n
end
pos += 1
end
end

function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE})
readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr)))
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
end

function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE})
readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr)))
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
end

function lzw_decode!(io, arr::AbstractArray)
CLEAR_CODE::Int = 256 + 1
EOI_CODE::Int = 257 + 1
TABLE_ENTRY_LENGTH_BITS::Int = 16

out_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, pointer(arr))
output_size::Int = sizeof(arr)
out_position::Int = 0 # current position in out

table_size::Int = output_size * 2 + 258
table_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, Libc.malloc(table_size)) # table of strings
table_offsets_pointer::Ptr{Int} = reinterpret(Ptr{Int}, Libc.malloc(sizeof(Int) * 4097)) # offsets into table

@inline create_table_entry(length, offset) = Base.shl_int(length, (64 - TABLE_ENTRY_LENGTH_BITS)) | offset
@inline table_entry_length(table_entry) = Base.lshr_int(table_entry, 64 - TABLE_ENTRY_LENGTH_BITS)
@inline table_entry_offset(table_entry) = table_entry & (Base.shl_int(1, 64 - TABLE_ENTRY_LENGTH_BITS) - 1)

try
# InitializeTable();
foreach(i -> unsafe_store!(table_pointer + i, UInt8(i)), 0:255)
foreach(i -> unsafe_store!(table_offsets_pointer, create_table_entry(1, i), i+1), 0:259) # length is stored in upper 16 bits

code = -1

buffer::Int=0 # buffer for reading in codes
bitcount::Int=0 # number of valid bits in buffer
codesize::Int=9 # current number of bits per code
input::Vector{UInt8} = Vector{UInt8}(undef, bytesavailable(io))
read!(io, input)
function getcode(buffer, code, bitcount, codesize, i)
old_code::Int = code

# make sure we have enough bits in the buffer
if bitcount < codesize
buffer = Base.shl_int(buffer, 8) | input[i+=1]
bitcount += 8
end

# one more time (since the max code size is 12 bits, only need to check twice)
if bitcount < codesize
buffer = Base.shl_int(buffer, 8) | input[i+=1]
bitcount += 8
end

code = Base.lshr_int(buffer, bitcount - codesize) & (Base.shl_int(1, codesize) - 1)
bitcount -= codesize
# code + 1 because this is Julia
(buffer, code + 1, old_code, bitcount, codesize, i)
end

@inline check_table_overflow(start, length) = start + length > table_size && @error "LZW: table buffer overflow"
@inline check_output_overflow(start, length) = start + length > output_size && @error "LZW: output buffer overflow"

# annotated with excerpts from the LZW pseudocode in the TIFF 6.0 spec
# https://developer.adobe.com/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
table_count::Int = 258 # number of (valid) table entries; 256 one-byte codes + CLEAR_CODE + EOI_CODE
next_table_offset::Int = 258
input_pos::Int = 0 # current position in input
while true
# GetNextCode()
(buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos)
if code == EOI_CODE || out_position >= output_size
break
elseif code == CLEAR_CODE # reset table
# InitializeTable();
table_count = 258
next_table_offset = 258
codesize = 9
# Code = GetNextCode();
(buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos)
if code == EOI_CODE
break
end
# WriteString(StringFromCode(Code))
r = unsafe_load(table_offsets_pointer, code)
len = table_entry_length(r)

check_output_overflow(out_position, len)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
out_position += len
else
if code <= table_count
# WriteString(StringFromCode(Code));
if code <= 256
# this is redundant with the check above, but it makes
# the code easier to reason about and less bug prone
check_output_overflow(out_position, 1)

unsafe_store!(out_pointer + out_position, code - 1)
out_position += 1
else
r = unsafe_load(table_offsets_pointer, code)
len = table_entry_length(r)

check_output_overflow(out_position, len)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
out_position += len
end

# AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(Code)));
table_count += 1
len = 1
if old_code <= 256
check_table_overflow(next_table_offset, 2) # this byte + the next one

unsafe_store!(table_pointer + next_table_offset, UInt8(old_code - 1))
else
r = unsafe_load(table_offsets_pointer, old_code)
len = table_entry_length(r)

check_table_overflow(next_table_offset, len + 1) # these bytes + the next one

memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len)
end

if code <= 256
unsafe_store!(table_pointer + next_table_offset + len, UInt8(code - 1))
else
r = unsafe_load(table_offsets_pointer, code)
memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1)
end
unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count)
next_table_offset += len + 1
else
# WriteString(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode)));
r = unsafe_load(table_offsets_pointer, old_code)
len = table_entry_length(r)

check_output_overflow(out_position, len + 1)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
unsafe_store!(out_pointer + out_position + len, unsafe_load(table_pointer + table_entry_offset(r)))
out_position += len + 1

check_table_overflow(next_table_offset, len + 1)

# AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode)));
table_count += 1
memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len)
memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1)
unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count)
next_table_offset += len + 1
end
end

if table_count == 511
codesize = 10
elseif table_count == 1023
codesize = 11
elseif table_count == 2047
codesize = 12
end
end

out_position != output_size && @warn "LZW: expected $output_size bytes, got $out_position bytes"
out_position == output_size && code != EOI_CODE && @warn "LZW: missing EOI code"
catch e
error("LZW: $e")
rethrow()
finally
Libc.free(table_pointer)
Libc.free(table_offsets_pointer)
end
end

function Base.read!(tfs::TiffFileStrip{S}, arr::AbstractArray{T, N}, ::Val{COMPRESSION_LZW}) where {T, N, S}
lzw_decode!(tfs, arr)
end

"""
Expand All @@ -46,15 +222,16 @@ julia> TiffImages.get_inflator(first(methods(read!, [TiffImages.TiffFile, Abstra
COMPRESSION_NONE::CompressionType = 1
```
"""
get_inflator(::Type{Tuple{typeof(read!), TiffFile, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C
get_inflator(::Type{Tuple{typeof(read!), TiffFileStrip, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C
get_inflator(::Type{Tuple{typeof(read!), Union{TiffFile, TiffFileStrip{S} where S}, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C

# autogenerate nice error messages for all non-implemented inflation methods
implemented = map(x->get_inflator(x.sig), methods(read!, [TiffFile, AbstractArray, Val], ))
implemented = map(x->get_inflator(x.sig), methods(read!, [Union{TiffFile, TiffFileStrip}, AbstractArray, Val], ))
comps = Set(instances(CompressionType))
setdiff!(comps, implemented)

for comp in comps
eval(quote
Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.")
Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.")
end)
end
end
53 changes: 52 additions & 1 deletion src/ifds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,27 @@ function Base.iterate(file::TiffFile, state::Tuple{Union{IFD{O}, Nothing}, Int})
return (curr_ifd, (next_ifd, next_ifd_offset))
end

"""
$(TYPEDEF)

A strip is a contiguous block of separately-encoded image data. A TIFF
file will typically have multiple strips, each representing multiple rows of
pixels in the image

$(FIELDS)
"""
struct TiffFileStrip{T}
tlnagy marked this conversation as resolved.
Show resolved Hide resolved
"""The file stream"""
tf::TiffFile
"""The IFD corresponding to this strip"""
ifd::IFD
"""The number of bytes in this strip"""
bytes::Int
end
tlnagy marked this conversation as resolved.
Show resolved Hide resolved

Base.read!(tfs::TiffFileStrip, arr::AbstractArray) = read!(tfs.tf, arr)
Base.bytesavailable(tfs::TiffFileStrip) = tfs.bytes

function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where {T, N}
strip_offsets = ifd[STRIPOFFSETS].data

Expand All @@ -229,11 +250,18 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where {
strip_nbytes[end] = (rows - (rowsperstrip * (nstrips-1))) * cols * sizeof(T)
end

bytes = ifd[STRIPBYTECOUNTS].data

startbyte = 1
comp = Val(compression)
rtype = rawtype(ifd)
for i in 1:nstrips
seek(tf, strip_offsets[i]::Core.BuiltinInts)
nbytes = Int(strip_nbytes[i]::Core.BuiltinInts / sizeof(T))
read!(tf, view(target, startbyte:(startbyte+nbytes-1)), compression)
tfs = TiffFileStrip{rtype}(tf, ifd, bytes[i])
arr = view(target, startbyte:(startbyte+nbytes-1))
read!(tfs, arr, comp)
reverse_prediction!(tfs, arr)
startbyte += nbytes
end
else
Expand Down Expand Up @@ -297,3 +325,26 @@ function Base.write(tf::TiffFile{O}, ifd::IFD{O}) where {O <: Unsigned}

return ifd_end_pos
end

function reverse_prediction!(tfs::TiffFileStrip{S}, arr::AbstractArray{T,N}) where {T, N, S}
predictor::Int = Int(getdata(tfs.ifd, PREDICTOR, 0))
spp::Int = Int(getdata(tfs.ifd, SAMPLESPERPIXEL, 0))
if predictor == 2
columns = Int(ncols(tfs.ifd))
rows = cld(length(arr), columns) # number of rows in this strip

# horizontal differencing
temp::Ptr{S} = reinterpret(Ptr{S}, pointer(arr))
for row in 1:rows
start = (row - 1) * columns * spp
for plane in 1:spp
previous::S = unsafe_load(temp, start + plane)
for i in (spp + plane):spp:(columns - 1) * spp + plane
current = unsafe_load(temp, start + i) + previous
unsafe_store!(temp, current, start + i)
previous = current
end
end
end
end
end
8 changes: 7 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,10 @@ end
ifd[TiffImages.ROWSPERSTRIP] = 256

@test TiffImages.iscontiguous(ifd)
end
end

@testset "LZW" begin
uncompressed = get_example("shapes_uncompressed.tif")
compressed = get_example("shapes_lzw.tif")
@test TiffImages.load(uncompressed) == TiffImages.load(compressed)
end
Loading