From 36fd5ca72d328c2580cb7c4b1e97f96fa6137190 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:31:02 +0200 Subject: [PATCH 1/6] add mimetype to BoxAlgo --- src/squiz_box/box/package.d | 9 +++++++++ src/squiz_box/box/tar.d | 8 ++++++++ src/squiz_box/box/zip.d | 2 ++ 3 files changed, 19 insertions(+) diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index 3079758..d2ddb15 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -46,6 +46,7 @@ template isBoxAlgo(A) BoxEntry[] boxEntries; const(ubyte)[] bytes = algo.box(boxEntries).join(); UnboxEntry[] unboxEntries = algo.unbox(only(bytes), No.removePrefix).array; + string mt = algo.mimetype; })); } @@ -71,6 +72,9 @@ interface BoxAlgo { return unbox(inputRangeObject(bytes), removePrefix); } + + /// The mimetype of the compressed archive + @property string mimetype() const; } static assert(isBoxAlgo!BoxAlgo); @@ -94,6 +98,11 @@ private class CBoxAlgo(A) : BoxAlgo if (isBoxAlgo!A) { return inputRangeObject(algo.unbox(bytes, removePrefix)); } + + @property string mimetype() const + { + return algo.mimetype; + } } /// Build a BoxAlgo interface from a compile-time known box algo structure. diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 0efd347..acc19e5 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -25,6 +25,8 @@ struct TarAlgo auto dataInput = new ByteRangeCursor!I(input); return TarUnbox(dataInput, removePrefix); } + + enum mimetype = "application/x-tar"; } static assert(isBoxAlgo!TarAlgo); @@ -46,6 +48,8 @@ struct TarGzAlgo auto dataInput = new ByteRangeCursor!II(ii); return TarUnbox(dataInput, removePrefix); } + + enum mimetype = "application/x-gtar"; } static assert(isBoxAlgo!TarGzAlgo); @@ -69,6 +73,8 @@ version (HaveSquizBzip2) auto dataInput = new ByteRangeCursor!II(ii); return TarUnbox(dataInput, removePrefix); } + + enum mimetype = "application/x-gtar"; } static assert(isBoxAlgo!TarBzip2Algo); @@ -93,6 +99,8 @@ version (HaveSquizLzma) auto dataInput = new ByteRangeCursor!II(ii); return TarUnbox(dataInput, removePrefix); } + + enum mimetype = "application/x-gtar"; } static assert(isBoxAlgo!TarXzAlgo); diff --git a/src/squiz_box/box/zip.d b/src/squiz_box/box/zip.d index 16d5b89..b09c437 100644 --- a/src/squiz_box/box/zip.d +++ b/src/squiz_box/box/zip.d @@ -27,6 +27,8 @@ struct ZipAlgo auto stream = new ByteRangeCursor!I(input); return ZipUnbox!Cursor(stream, removePrefix); } + + enum mimetype = "application/zip"; } static assert(isBoxAlgo!ZipAlgo); From e7b4c324a20cea14185176c8b2071c01b2fc6dc5 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:31:37 +0200 Subject: [PATCH 2/6] disambiguate non-const `ubyte[]` range --- src/squiz_box/box/package.d | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index d2ddb15..e4e9e35 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -57,7 +57,7 @@ interface BoxAlgo ByteRange box(BoxEntryRange entries, size_t chunkSize = defaultChunkSize); /// ditto - ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize) + final ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I && !is(I == BoxEntryRange)) { return box(inputRangeObject(entries), chunkSize); @@ -67,9 +67,19 @@ interface BoxAlgo UnboxEntryRange unbox(ByteRange bytes, Flag!"removePrefix" removePrefix = No.removePrefix); /// ditto - UnboxEntryRange unbox(I)(I bytes, Flag!"removePrefix" removePrefix = No.removePrefix) - if (isByteRange!I && !is(I == ByteRange)) - { + final UnboxEntryRange unbox(I)(I bytes, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I && !is(I : ByteRange)) + { + // It is necessary to disambiguate `!is(I : ByteRange) with non-const `ubyte[]` range. + // Otherwise we can have infinite recursion and stack overflow at runtime. + // The assertion could be in the template constraints, but the static assertion gives + // opportunity of a helpful message. + // TODO: add an overload accepting a non-const `ubyte[]` range. Can be tested with + // requests `ReceiveAsRange` + enum message = "Squiz-Box requires range of `const(ubyte)[]` but received `ubyte[]`. " + ~ "Consider typecasting your range with `.map!(c => cast(const(ubyte)[])c)`"; + static assert(!is(ElementType!I == ubyte[]), message); + return unbox(inputRangeObject(bytes), removePrefix); } From 6bcd2041360c8360b477af361952e9d297322c4c Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:32:02 +0200 Subject: [PATCH 3/6] tar skip extended headers --- src/squiz_box/box/tar.d | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index acc19e5..148ee99 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -191,8 +191,8 @@ enum Typeflag : ubyte directory = '5', fifo = '6', contiguousFile = '7', - posixExtended = 'g', - extended = 'x', + extendedGlobal = 'g', + extendedFile = 'x', gnuLongname = 'L', gnuLonglink = 'K', } @@ -572,9 +572,10 @@ struct TarInfo case Typeflag.directory: case Typeflag.fifo: case Typeflag.contiguousFile: - case Typeflag.posixExtended: - case Typeflag.extended: return decodeHeader(blk); + case Typeflag.extendedGlobal: + case Typeflag.extendedFile: + return skipExtendedDecodeHeader(cursor, blk); case Typeflag.gnuLongname: case Typeflag.gnuLonglink: return decodeGnuLongHeader(cursor, blk); @@ -620,6 +621,14 @@ struct TarInfo return info; } + private static TarInfo skipExtendedDecodeHeader(Cursor cursor, scope ref BlockInfo blk) + { + const sz = next512(blk.size); + cursor.ffw(sz); + + return TarInfo.decode(cursor); + } + private static TarInfo decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk) { auto data = new char[next512(blk.size)]; From 413ed427e1135e15a9d051451a150fdf1fa9f8ee Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:54:00 +0200 Subject: [PATCH 4/6] assertion message: extraction dir must exist --- src/squiz_box/box/package.d | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index e4e9e35..39d997a 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -374,7 +374,8 @@ interface UnboxEntry : ArchiveEntry import std.stdio : File; import std.string : startsWith; - assert(exists(baseDirectory) && isDir(baseDirectory)); + assert(exists(baseDirectory) && isDir(baseDirectory), + "extracting to " ~ baseDirectory ~ ": must be a directory"); enforce( !this.isBomb, @@ -697,8 +698,7 @@ class InfoBoxEntry : BoxEntry /// Create a BoxEntry from the provided info. /// This allows to create archives out of generated data, without any backing file on disk. -InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data) -if (isByteRange!I) +InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data) if (isByteRange!I) in (info.type == EntryType.regular || data.empty, "symlinks and directories can't have data") { import std.datetime : Clock; From 23f4dea91f83ab95ac99c28309487414b472672b Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:37:35 +0200 Subject: [PATCH 5/6] add download/upload examples --- ReadMe.md | 69 +++++++++++++++++++++++++++++++++++++++ examples/box_upload.d | 66 +++++++++++++++++++++++++++++++++++++ examples/download_unbox.d | 67 +++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100755 examples/box_upload.d create mode 100755 examples/download_unbox.d diff --git a/ReadMe.md b/ReadMe.md index f5d18a0..bbd87e3 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -226,6 +226,75 @@ dirEntries(root, SpanMode.breadth, false) .writeBinaryFile(filename); ``` +### Download, list and extract archive + +This examples uses [`requests`](https://github.com/ikod/dlang-requests) to download +an archive from the web, list the archive content and extract it with a single expression. +(`std.net.curl.byChunk` would also work and woudn't require the const casting) + +Thanks to D ranges laziness, the archive is extracted as the data download progresses. +As such, it is possible to download and extract very large archives with minimal memory footprint +(and without creating an intermediate file on disk). + +```d +import squiz_box; +import requests; + +const url = "https://github.com/dlang/dmd/archive/master.tar.gz"; +const dest = "."; + +// Algorithm matched at runtime with url (using extension) +auto algo = boxAlgo(url); + +size_t downloadSz; + +auto rq = Request(); +rq.useStreaming = true; +rq.get(url).receiveAsRange() + .map!(c => cast(const(ubyte)[])c) // type-casting to const is necessary + .tee!(c => downloadSz += c.length) // trace download size + .unbox(algo) + .tee!(e => writeln(buildPath(dest, e.path))) // list archive content + .each!(e => e.extractTo(dest)); // extract +``` + +### Create archive, list and upload to web + +This examples creates an archive and uses [`requests`](https://github.com/ikod/dlang-requests) to upload +it on the web. +As in the previous example, the data is uploaded as the archive creation progresses. + +```d +import squiz_box; +import requests; + +const postTo = "https://httpbin.org/post"; +const fmt = ".tar.xz"; +const src = "..."; +const prefix; + +size_t uploadSz; + +// Algorithm matched at runtime (using extension) +auto algo = boxAlgo(fmt); + +const exclusion = [".git", ".dub", ".vscode", "libsquiz-box.a", "build"]; + +auto archiveChunks = dirEntries(src, SpanMode.breadth, false) + .filter!(e => !e.isDir) + .filter!(e => !exclusion.any!(ex => e.name.canFind(ex))) + .tee!(e => writeln(e.name)) + .map!(e => fileEntry(e.name, src, prefix)) + .box(algo) + .tee!(c => uploadSz += c.length); + +auto rq = Request(); +auto resp = rq.post(postTo, archiveChunks, algo.mimetype); +enforce(resp.code < 300, format!"%s responded %s"(postTo, resp.code)); + +writefln!"POST %s - status %s (posted %s bytes)"(postTo, resp.code, uploadSz); +``` + ### Full control over the streaming process Sometimes, D ranges are not practical. Think of a receiver thread that diff --git a/examples/box_upload.d b/examples/box_upload.d new file mode 100755 index 0000000..6d3f17c --- /dev/null +++ b/examples/box_upload.d @@ -0,0 +1,66 @@ +#!/usr/bin/env dub +/+ dub.sdl: + name "box_upload" + description "an example for squiz-box: create archive and upload to the web" + dependency "squiz-box" path=".." + dependency "requests" version="~>2.1.1" ++/ + +module examples.box_upload; + +import squiz_box; +import requests; + +import std.algorithm; +import std.exception; +import std.getopt; +import std.format; +import std.file; +import std.path; +import std.range; +import std.stdio; + +void main(string[] args) +{ + string postTo = "https://httpbin.org/post"; + string fmt = ".tar.xz"; + string src = ".."; + string prefix; + + auto opts = getopt(args, + "post-to", &postTo, + "format", &fmt, + "src", &src, + "prefix", &prefix, + ); + + if (opts.helpWanted) + { + defaultGetoptPrinter("Squiz-box example, create archive, list and upload", opts.options); + } + + // Algorithm matched at runtime (using extension) + auto algo = boxAlgo(fmt); + + size_t numFiles; + size_t dataSz; + + const exclusion = [".git", ".dub", ".vscode", "libsquiz-box.a", "build"]; + + auto archiveChunks = dirEntries(src, SpanMode.breadth, false) + .filter!(e => !e.isDir) + .filter!(e => !exclusion.any!(ex => e.name.canFind(ex))) + .tee!(e => stdout.writeln(e.name)) + .tee!(e => numFiles += 1) + .map!(e => fileEntry(e.name, src, prefix)) + .box(algo) + .tee!(c => stderr.writefln!"uploaded %s bytes"(c.length)) + .tee!(c => dataSz += c.length); + + auto rq = Request(); + auto resp = rq.post(postTo, archiveChunks, algo.mimetype); + enforce(resp.code < 300, format!"%s responded %s"(postTo, resp.code)); + + writefln!"POST %s - status %s"(postTo, resp.code); + writefln!"Archived %s files. Uploaded %s bytes"(numFiles, dataSz); +} diff --git a/examples/download_unbox.d b/examples/download_unbox.d new file mode 100755 index 0000000..e6e4671 --- /dev/null +++ b/examples/download_unbox.d @@ -0,0 +1,67 @@ +#!/usr/bin/env dub +/+ dub.sdl: + name "download_unbox" + description "an example for squiz-box: download, list and extract archive" + dependency "squiz-box" path=".." + dependency "requests" version="~>2.1.1" ++/ + +module examples.download_unbox; + +import squiz_box; +import requests; + +import std.algorithm; +import std.getopt; +import std.file; +import std.path; +import std.range; +import std.stdio; + +void main(string[] args) +{ + string url = "https://github.com/dlang/dmd/archive/master.tar.gz"; + string dest; + + auto opts = getopt(args, + "url", "URL of archive to download", &url, + "dest", "The destination directory. Extracted files will disappear if not specified.", &dest, + ); + + if (opts.helpWanted) + { + defaultGetoptPrinter("Squiz-box, download, list and extract archive", opts.options); + } + + const outDir = dest.length ? dest : buildPath(tempDir, "squiz-box-example"); + + if (!exists(outDir)) + mkdirRecurse(outDir); + + scope(success) + { + if (!dest) + rmdirRecurse(outDir); + } + + // Algorithm matched at runtime with url (using extension) + auto algo = boxAlgo(url); + + writefln!"GET %s"(url); + + size_t dataSz; + size_t numFiles; + + auto rq = Request(); + rq.useStreaming = true; + rq.get(url).receiveAsRange() + .map!(c => cast(const(ubyte)[])c) + .tee!(c => stderr.writefln!"received %s bytes"(c.length)) + .tee!(c => dataSz += c.length) + .unbox(algo) + .tee!(e => stdout.writeln(buildPath(dest, e.path))) + .tee!(e => numFiles += 1) + .each!(e => e.extractTo(outDir)); + + writefln!"Downloaded %s bytes. Extracted %s files."(dataSz, numFiles); +} From 5f7b203fd33538f39d2903595720f55ac775b9cc Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Thu, 17 Aug 2023 00:55:24 +0200 Subject: [PATCH 6/6] test examples in CI --- .github/workflows/ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 131d0a4..f162808 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,3 +50,11 @@ jobs: - name: Test Dub package run: dub run --arch=x86_64 working-directory: .github/dub_test + + - name: Test download_unbox Example + run: ./download_unbox.d + working-directory: examples + + - name: Test box_upload Example + run: ./box_upload.d + working-directory: examples