diff --git a/Project.toml b/Project.toml index 857afe7..0ca1e60 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "2.2.0" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" +CodecInflate64 = "6309b1aa-fc58-479c-8956-599a07234577" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" InputBuffers = "0c81fc1b-5583-44fc-8770-48be1e1cca08" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" @@ -13,6 +14,7 @@ Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a" [compat] ArgCheck = "2" +CodecInflate64 = "0.1" CodecZlib = "0.7" InputBuffers = "1" PrecompileTools = "1" diff --git a/README.md b/README.md index 238dfc6..e907db2 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ The central directory makes it fast to read just one random entry out of a very When writing it is important to close the writer so the central directory gets written out. +More details on the file format can be found at https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + ### Reading Zip archives Archives can be read from any `AbstractVector{UInt8}` containing the data of a zip archive. @@ -85,6 +87,13 @@ ZipWriter(filename) do w end ``` +### Supported Compression Methods + +| Compression Method | Reading | Writing | +|--------------------|---------|---------| +| 0 - Store (none) | Yes | Yes | +| 8 - Deflate | Yes | Yes | +| 9 - Deflate64 | Yes | No | ### Limitations @@ -92,7 +101,6 @@ end 1. Ignores time stamps. 1. Cannot write an archive fully in streaming mode. See https://github.com/madler/zipflow if you need this functionality. 1. Encryption and decryption not supported. -1. Only deflated and uncompressed data are supported. There is no support for bzip2 or zstd. 1. Multi disk archives not supported. 1. Cannot recover data from a corrupted archive. Especially if the end of the archive is corrupted. diff --git a/src/ZipArchives.jl b/src/ZipArchives.jl index c967b32..11fb866 100644 --- a/src/ZipArchives.jl +++ b/src/ZipArchives.jl @@ -47,6 +47,7 @@ end module ZipArchives using CodecZlib: DeflateCompressorStream, DeflateDecompressorStream, DeflateCompressor +using CodecInflate64: Deflate64DecompressorStream using TranscodingStreams: TranscodingStreams, TranscodingStream, Noop, NoopStream using ArgCheck: @argcheck using Zlib_jll: Zlib_jll diff --git a/src/constants.jl b/src/constants.jl index 8d183be..3091924 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -4,6 +4,9 @@ const Store = UInt16(0) "Deflate compression method" const Deflate = UInt16(8) +"Deflate64 compression method" +const Deflate64 = UInt16(9) + #= see https://github.com/madler/zipflow/blob/2bef2123ebe519c17b18d2d0c3c71065088de952/zipflow.c#L214 =# diff --git a/src/reader.jl b/src/reader.jl index b134ccc..25ec83f 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -104,7 +104,7 @@ Return the compression method used for entry `i`. See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT for a current list of methods. -Only Store(0x0000) and Deflate(0x0008) supported for now. +Only Store(0), Deflate(8), and Deflate64(9) are supported for now. Note: if the zip file was corrupted, this might be wrong. """ @@ -774,8 +774,8 @@ end function zip_openentry(r::ZipReader, i::Int) compressed_size::Int64 = zip_compressed_size(r, i) method = zip_compression_method(r, i) - if method != Store && method != Deflate - throw(ArgumentError("invalid compression method: $(method). Only Store(0) and Deflate(8) supported for now")) + if method != Store && method != Deflate && method != Deflate64 + throw(ArgumentError("invalid compression method: $(method). Only Store(0), Deflate(8), and Deflate64(9) supported for now")) end entry_data_offset = zip_entry_data_offset(r, i) @@ -792,6 +792,8 @@ function zip_openentry(r::ZipReader, i::Int) return base_io elseif method == Deflate return DeflateDecompressorStream(base_io) + elseif method == Deflate64 + return Deflate64DecompressorStream(base_io) else # should throw and ArgumentError before this error("unreachable") diff --git a/test/Artifacts.toml b/test/Artifacts.toml index 85fedc7..5ea48dc 100644 --- a/test/Artifacts.toml +++ b/test/Artifacts.toml @@ -1,6 +1,6 @@ [fixture] -git-tree-sha1 = "6ab9957b72056a0c388b056305a24d9742cfb840" +git-tree-sha1 = "ed34b433321060f64bbdbda044851336788b9af6" [[fixture.download]] - url = "https://github.com/JuliaIO/ZipArchives.jl/releases/download/v0.2.1/fixture.tar.gz" - sha256 = "a0505b18c35455b7163060355401ed722b9fdcd177bf2f93ab5e696f758d9507" + sha256 = "99a9bb1d9cba1fae77fd7be224b654da1f247815845ef4ac682a546e8dc70ceb" + url = "https://github.com/JuliaIO/ZipArchives.jl/releases/download/v2.1.6/fixture.tar.gz" diff --git a/test/Fixture-README.md b/test/Fixture-README.md index 545b2d6..b3c5223 100644 --- a/test/Fixture-README.md +++ b/test/Fixture-README.md @@ -16,7 +16,7 @@ Add the file to the "fixture" directory, and a description to this file. Then run ```julia # This is the url that the artifact will be available from: -url_to_upload_to = "https://github.com/medyan-dev/ZipArchives.jl/releases/download/v0.2.1/fixture.tar.gz" +url_to_upload_to = "https://github.com/medyan-dev/ZipArchives.jl/releases/download/v2.1.6/fixture.tar.gz" # This is the path to the Artifacts.toml we will manipulate artifact_toml = "Artifacts.toml" fixture_hash = create_artifact() do artifact_dir @@ -30,14 +30,36 @@ bind_artifact!(artifact_toml, "fixture", fixture_hash; force=true, Finally, upload the new "fixture.tar.gz" to `url_to_upload_to` -## `win11-excel.xlsx` -Small excel file created on windows 11 in microsoft Excel version 2305. +## `dotnet-deflate64.zip` +This file is downloaded from https://github.com/dotnet/runtime-assets/blob/95277f38e68b66f1b48600d90d456c32c9ae0fa2/src/System.IO.Compression.TestData/ZipTestData/compat/deflate64.zip + +## `leftpad-core_2.13-0.1.11.jar` +Example jar file from https://mvnrepository.com/artifact/io.github.asakaev/leftpad-core_2.13/0.1.11 + +## `ubuntu22-7zip.zip` +Created with 7zip version 22.01 (x64) + +## `ubuntu22-files.zip` +Created with default ubuntu files program + +## `ubuntu22-infozip.zip` +Small zip file created with ubuntu22 Info-ZIP Zip 3.0 + +## `ubuntu22-old7zip.zip` +Created with 7zip version 16.02 p7zip 16.02 + +## `win11-7zip.zip` +Small zip file created with windows 11 7Zip 22.01 + +## `win11-deflate64.zip` +Large zip file created with windows 11 file explorer. +Designed to test the deflate64 decompressor. ## `win11-excel.ods` Small OpenDocument Spreadsheet file created on windows 11 in microsoft Excel version 2305. -## `win11-libreoffice.ods` -Small OpenDocument Spreadsheet file created on windows 11 in LibreOffice Calc 7.5 +## `win11-excel.xlsx` +Small excel file created on windows 11 in microsoft Excel version 2305. ## `win11-explorer.zip` Small zip file created with windows 11 file explorer @@ -45,12 +67,12 @@ Small zip file created with windows 11 file explorer ## `win11-infozip.zip` Small zip file created with windows 11 Info-ZIP Zip 3.0 -## `win11-7zip.zip` -Small zip file created with windows 11 7Zip 22.01 - ## `win11-julia-p7zip.zip` Small zip file created with windows 11 p7zip_jll 17.4.0+0 +## `win11-libreoffice.ods` +Small OpenDocument Spreadsheet file created on windows 11 in LibreOffice Calc 7.5 + ## `win11-powerpoint.odp` Small odp file created on windows 11 in microsoft PowerPoint version 2305 @@ -60,17 +82,5 @@ Small pptx file created on windows 11 in microsoft PowerPoint version 2305 ## `ZipArchives.jl-main.zip` Zip file downloaded from a github on 20 JUN 2023 -## `leftpad-core_2.13-0.1.11.jar` -Example jar file from https://mvnrepository.com/artifact/io.github.asakaev/leftpad-core_2.13/0.1.11 - -## `ubuntu22-files.zip` -Created with default ubuntu files program - -## `ubuntu22-7zip.zip` -Created with 7zip version 22.01 (x64) - -## `ubuntu22-old7zip.zip` -Created with 7zip version 16.02 p7zip 16.02 - -## `ubuntu22-infozip.zip` -Small zip file created with ubuntu22 Info-ZIP Zip 3.0 \ No newline at end of file +## `zipfile-deflate64.zip` +Test file from https://github.com/brianhelba/zipfile-deflate64/blob/beec33184da6da4697a1994c0ac4c64cef8cff50/tests/data/deflate64.zip \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 0759be7..9c9a47b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -9,6 +9,7 @@ OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" diff --git a/test/test_reader.jl b/test/test_reader.jl index 197a16a..2e4d678 100644 --- a/test/test_reader.jl +++ b/test/test_reader.jl @@ -4,6 +4,7 @@ using Base64: base64decode using Setfield: @set using p7zip_jll: p7zip_jll using OffsetArrays: Origin +using SHA: sha256 @testset "find_end_of_central_directory_record unit tests" begin find_eocd = ZipArchives.find_end_of_central_directory_record @@ -149,8 +150,8 @@ end data_b64 = "UEsDBD8AAgAOAHJb0FaLksVmIgAAABAAAAAJAAAAbHptYV9kYXRhCQQFAF0AAIAAADoaCWd+rnMR0beE5IbQKkMGbV//6/YgAFBLAQI/AD8AAgAOAHJb0FaLksVmIgAAABAAAAAJAAAAAAAAAAAAAACAAQAAAABsem1hX2RhdGFQSwUGAAAAAAEAAQA3AAAASQAAAAAA" data = base64decode(data_b64) r = ZipReader(data) - @test_throws ArgumentError("invalid compression method: 14. Only Store(0) and Deflate(8) supported for now") zip_test_entry(r, 1) - @test_throws ArgumentError("invalid compression method: 14. Only Store(0) and Deflate(8) supported for now") zip_openentry(r, 1) + @test_throws ArgumentError("invalid compression method: 14. Only Store(0), Deflate(8), and Deflate64(9) supported for now") zip_test_entry(r, 1) + @test_throws ArgumentError("invalid compression method: 14. Only Store(0), Deflate(8), and Deflate64(9) supported for now") zip_openentry(r, 1) @test zip_iscompressed(r, 1) @test zip_names(r) == ["lzma_data"] @test zip_compression_method(r, 1) === 0x000e @@ -233,8 +234,9 @@ end if zip_isdir(r, i) @test isdir(joinpath(tmpout,name)) else - entry_data = zip_readentry(r, i) - @test read(joinpath(tmpout,name)) == entry_data + sevenziphash = open(sha256, joinpath(tmpout,name)) + ziphash = zip_openentry(sha256, r, i) + @test sevenziphash == ziphash end end end