Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Examples #21

Merged
merged 6 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,11 @@ jobs:
- name: Test Dub package
run: dub run --arch=x86_64
working-directory: .github/dub_test

- name: Test download_unbox Example
run: ./download_unbox.d
working-directory: examples

- name: Test box_upload Example
run: ./box_upload.d
working-directory: examples
69 changes: 69 additions & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,75 @@ dirEntries(root, SpanMode.breadth, false)
.writeBinaryFile(filename);
```

### Download, list and extract archive

This examples uses [`requests`](https://github.com/ikod/dlang-requests) to download
an archive from the web, list the archive content and extract it with a single expression.
(`std.net.curl.byChunk` would also work and woudn't require the const casting)

Thanks to D ranges laziness, the archive is extracted as the data download progresses.
As such, it is possible to download and extract very large archives with minimal memory footprint
(and without creating an intermediate file on disk).

```d
import squiz_box;
import requests;

const url = "https://github.com/dlang/dmd/archive/master.tar.gz";
const dest = ".";

// Algorithm matched at runtime with url (using extension)
auto algo = boxAlgo(url);

size_t downloadSz;

auto rq = Request();
rq.useStreaming = true;
rq.get(url).receiveAsRange()
.map!(c => cast(const(ubyte)[])c) // type-casting to const is necessary
.tee!(c => downloadSz += c.length) // trace download size
.unbox(algo)
.tee!(e => writeln(buildPath(dest, e.path))) // list archive content
.each!(e => e.extractTo(dest)); // extract
```

### Create archive, list and upload to web

This examples creates an archive and uses [`requests`](https://github.com/ikod/dlang-requests) to upload
it on the web.
As in the previous example, the data is uploaded as the archive creation progresses.

```d
import squiz_box;
import requests;

const postTo = "https://httpbin.org/post";
const fmt = ".tar.xz";
const src = "...";
const prefix;

size_t uploadSz;

// Algorithm matched at runtime (using extension)
auto algo = boxAlgo(fmt);

const exclusion = [".git", ".dub", ".vscode", "libsquiz-box.a", "build"];

auto archiveChunks = dirEntries(src, SpanMode.breadth, false)
.filter!(e => !e.isDir)
.filter!(e => !exclusion.any!(ex => e.name.canFind(ex)))
.tee!(e => writeln(e.name))
.map!(e => fileEntry(e.name, src, prefix))
.box(algo)
.tee!(c => uploadSz += c.length);

auto rq = Request();
auto resp = rq.post(postTo, archiveChunks, algo.mimetype);
enforce(resp.code < 300, format!"%s responded %s"(postTo, resp.code));

writefln!"POST %s - status %s (posted %s bytes)"(postTo, resp.code, uploadSz);
```

### Full control over the streaming process

Sometimes, D ranges are not practical. Think of a receiver thread that
Expand Down
66 changes: 66 additions & 0 deletions examples/box_upload.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env dub
/+ dub.sdl:
name "box_upload"
description "an example for squiz-box: create archive and upload to the web"
dependency "squiz-box" path=".."
dependency "requests" version="~>2.1.1"
+/

module examples.box_upload;

import squiz_box;
import requests;

import std.algorithm;
import std.exception;
import std.getopt;
import std.format;
import std.file;
import std.path;
import std.range;
import std.stdio;

void main(string[] args)
{
string postTo = "https://httpbin.org/post";
string fmt = ".tar.xz";
string src = "..";
string prefix;

auto opts = getopt(args,
"post-to", &postTo,
"format", &fmt,
"src", &src,
"prefix", &prefix,
);

if (opts.helpWanted)
{
defaultGetoptPrinter("Squiz-box example, create archive, list and upload", opts.options);
}

// Algorithm matched at runtime (using extension)
auto algo = boxAlgo(fmt);

size_t numFiles;
size_t dataSz;

const exclusion = [".git", ".dub", ".vscode", "libsquiz-box.a", "build"];

auto archiveChunks = dirEntries(src, SpanMode.breadth, false)
.filter!(e => !e.isDir)
.filter!(e => !exclusion.any!(ex => e.name.canFind(ex)))
.tee!(e => stdout.writeln(e.name))
.tee!(e => numFiles += 1)
.map!(e => fileEntry(e.name, src, prefix))
.box(algo)
.tee!(c => stderr.writefln!"uploaded %s bytes"(c.length))
.tee!(c => dataSz += c.length);

auto rq = Request();
auto resp = rq.post(postTo, archiveChunks, algo.mimetype);
enforce(resp.code < 300, format!"%s responded %s"(postTo, resp.code));

writefln!"POST %s - status %s"(postTo, resp.code);
writefln!"Archived %s files. Uploaded %s bytes"(numFiles, dataSz);
}
67 changes: 67 additions & 0 deletions examples/download_unbox.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env dub
/+ dub.sdl:
name "download_unbox"
description "an example for squiz-box: download, list and extract archive"
dependency "squiz-box" path=".."
dependency "requests" version="~>2.1.1"
+/

module examples.download_unbox;

import squiz_box;
import requests;

import std.algorithm;
import std.getopt;
import std.file;
import std.path;
import std.range;
import std.stdio;

void main(string[] args)
{
string url = "https://github.com/dlang/dmd/archive/master.tar.gz";
string dest;

auto opts = getopt(args,
"url", "URL of archive to download", &url,
"dest", "The destination directory. Extracted files will disappear if not specified.", &dest,
);

if (opts.helpWanted)
{
defaultGetoptPrinter("Squiz-box, download, list and extract archive", opts.options);
}

const outDir = dest.length ? dest : buildPath(tempDir, "squiz-box-example");

if (!exists(outDir))
mkdirRecurse(outDir);

scope(success)
{
if (!dest)
rmdirRecurse(outDir);
}

// Algorithm matched at runtime with url (using extension)
auto algo = boxAlgo(url);

writefln!"GET %s"(url);

size_t dataSz;
size_t numFiles;

auto rq = Request();
rq.useStreaming = true;
rq.get(url).receiveAsRange()
.map!(c => cast(const(ubyte)[])c)
.tee!(c => stderr.writefln!"received %s bytes"(c.length))
.tee!(c => dataSz += c.length)
.unbox(algo)
.tee!(e => stdout.writeln(buildPath(dest, e.path)))
.tee!(e => numFiles += 1)
.each!(e => e.extractTo(outDir));

writefln!"Downloaded %s bytes. Extracted %s files."(dataSz, numFiles);
}
33 changes: 26 additions & 7 deletions src/squiz_box/box/package.d
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ template isBoxAlgo(A)
BoxEntry[] boxEntries;
const(ubyte)[] bytes = algo.box(boxEntries).join();
UnboxEntry[] unboxEntries = algo.unbox(only(bytes), No.removePrefix).array;
string mt = algo.mimetype;
}));
}

Expand All @@ -56,7 +57,7 @@ interface BoxAlgo
ByteRange box(BoxEntryRange entries, size_t chunkSize = defaultChunkSize);

/// ditto
ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize)
final ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize)
if (isBoxEntryRange!I && !is(I == BoxEntryRange))
{
return box(inputRangeObject(entries), chunkSize);
Expand All @@ -66,11 +67,24 @@ interface BoxAlgo
UnboxEntryRange unbox(ByteRange bytes, Flag!"removePrefix" removePrefix = No.removePrefix);

/// ditto
UnboxEntryRange unbox(I)(I bytes, Flag!"removePrefix" removePrefix = No.removePrefix)
if (isByteRange!I && !is(I == ByteRange))
{
final UnboxEntryRange unbox(I)(I bytes, Flag!"removePrefix" removePrefix = No.removePrefix)
if (isByteRange!I && !is(I : ByteRange))
{
// It is necessary to disambiguate `!is(I : ByteRange) with non-const `ubyte[]` range.
// Otherwise we can have infinite recursion and stack overflow at runtime.
// The assertion could be in the template constraints, but the static assertion gives
// opportunity of a helpful message.
// TODO: add an overload accepting a non-const `ubyte[]` range. Can be tested with
// requests `ReceiveAsRange`
enum message = "Squiz-Box requires range of `const(ubyte)[]` but received `ubyte[]`. "
~ "Consider typecasting your range with `.map!(c => cast(const(ubyte)[])c)`";
static assert(!is(ElementType!I == ubyte[]), message);

return unbox(inputRangeObject(bytes), removePrefix);
}

/// The mimetype of the compressed archive
@property string mimetype() const;
}

static assert(isBoxAlgo!BoxAlgo);
Expand All @@ -94,6 +108,11 @@ private class CBoxAlgo(A) : BoxAlgo if (isBoxAlgo!A)
{
return inputRangeObject(algo.unbox(bytes, removePrefix));
}

@property string mimetype() const
{
return algo.mimetype;
}
}

/// Build a BoxAlgo interface from a compile-time known box algo structure.
Expand Down Expand Up @@ -355,7 +374,8 @@ interface UnboxEntry : ArchiveEntry
import std.stdio : File;
import std.string : startsWith;

assert(exists(baseDirectory) && isDir(baseDirectory));
assert(exists(baseDirectory) && isDir(baseDirectory),
"extracting to " ~ baseDirectory ~ ": must be a directory");

enforce(
!this.isBomb,
Expand Down Expand Up @@ -678,8 +698,7 @@ class InfoBoxEntry : BoxEntry

/// Create a BoxEntry from the provided info.
/// This allows to create archives out of generated data, without any backing file on disk.
InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data)
if (isByteRange!I)
InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data) if (isByteRange!I)
in (info.type == EntryType.regular || data.empty, "symlinks and directories can't have data")
{
import std.datetime : Clock;
Expand Down
25 changes: 21 additions & 4 deletions src/squiz_box/box/tar.d
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct TarAlgo
auto dataInput = new ByteRangeCursor!I(input);
return TarUnbox(dataInput, removePrefix);
}

enum mimetype = "application/x-tar";
}

static assert(isBoxAlgo!TarAlgo);
Expand All @@ -46,6 +48,8 @@ struct TarGzAlgo
auto dataInput = new ByteRangeCursor!II(ii);
return TarUnbox(dataInput, removePrefix);
}

enum mimetype = "application/x-gtar";
}

static assert(isBoxAlgo!TarGzAlgo);
Expand All @@ -69,6 +73,8 @@ version (HaveSquizBzip2)
auto dataInput = new ByteRangeCursor!II(ii);
return TarUnbox(dataInput, removePrefix);
}

enum mimetype = "application/x-gtar";
}

static assert(isBoxAlgo!TarBzip2Algo);
Expand All @@ -93,6 +99,8 @@ version (HaveSquizLzma)
auto dataInput = new ByteRangeCursor!II(ii);
return TarUnbox(dataInput, removePrefix);
}

enum mimetype = "application/x-gtar";
}

static assert(isBoxAlgo!TarXzAlgo);
Expand Down Expand Up @@ -183,8 +191,8 @@ enum Typeflag : ubyte
directory = '5',
fifo = '6',
contiguousFile = '7',
posixExtended = 'g',
extended = 'x',
extendedGlobal = 'g',
extendedFile = 'x',
gnuLongname = 'L',
gnuLonglink = 'K',
}
Expand Down Expand Up @@ -564,9 +572,10 @@ struct TarInfo
case Typeflag.directory:
case Typeflag.fifo:
case Typeflag.contiguousFile:
case Typeflag.posixExtended:
case Typeflag.extended:
return decodeHeader(blk);
case Typeflag.extendedGlobal:
case Typeflag.extendedFile:
return skipExtendedDecodeHeader(cursor, blk);
case Typeflag.gnuLongname:
case Typeflag.gnuLonglink:
return decodeGnuLongHeader(cursor, blk);
Expand Down Expand Up @@ -612,6 +621,14 @@ struct TarInfo
return info;
}

private static TarInfo skipExtendedDecodeHeader(Cursor cursor, scope ref BlockInfo blk)
{
const sz = next512(blk.size);
cursor.ffw(sz);

return TarInfo.decode(cursor);
}

private static TarInfo decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk)
{
auto data = new char[next512(blk.size)];
Expand Down
2 changes: 2 additions & 0 deletions src/squiz_box/box/zip.d
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ struct ZipAlgo
auto stream = new ByteRangeCursor!I(input);
return ZipUnbox!Cursor(stream, removePrefix);
}

enum mimetype = "application/zip";
}

static assert(isBoxAlgo!ZipAlgo);
Expand Down