Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP - Reading a file #24

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions example/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
<title></title>
</head>
<body>
<!-- for the new blob reading methods -->
<script src="https://cdn.jsdelivr.net/gh/eligrey/Blob.js/Blob.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/polyfill.min.js"></script>
<script type="module">
import Zip from '../write.js'
import Reader from '../read.js'

const { readable, writable } = new Zip()
const writer = writable.getWriter()
Expand All @@ -24,10 +27,9 @@
let o = 0; chunks.forEach(c => {uint8.set(c, o); o += c.length})

const hash = await crypto.subtle.digest('sha-1', uint8)
const expected = '18282460561088092989-11685288625477285951386915632'
const expected = '831739241-296827476-199608789416608489701811849431'
const blob = new Blob([uint8], { type: 'application/zip' })
const a = document.createElement('a')

a.href = URL.createObjectURL(blob)
a.innerText = a.download = 'Archive.zip'

Expand All @@ -44,6 +46,14 @@
pump()

writer.write({
comment: 'dog',
name: '/dog.txt',
lastModified: new Date(0),
stream: () => new Response('woff').body
})

writer.write({
comment: 'cat',
name: '/cat.txt',
lastModified: new Date(0),
stream: () => new Response('mjau').body
Expand Down
79 changes: 79 additions & 0 deletions example/reading.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<!DOCTYPE html>
<html lang="en" dir="ltr">
<head>
<meta charset="utf-8">
<title></title>
</head>
<body>
<!-- for the new blob reading methods -->
<script src="https://cdn.jsdelivr.net/gh/eligrey/Blob.js/Blob.js"></script>
<script type="module">
import reader from '../read.js'

const jszip = 'https://cdn.jsdelivr.net/gh/Stuk/jszip/test/ref/'
const urls = [
'/test/fixture/dog_cat.zip', // ok
jszip + 'all-stream.zip', // problem reading
jszip + 'all.7zip.zip', // problem reading
jszip + 'all.windows.zip', // problem reading a compressed image
jszip + 'all.zip', // problem reading
jszip + 'all_appended_bytes.zip', // problem reading
jszip + 'all_missing_bytes.zip', // fail as it should
jszip + 'all_prepended_bytes.zip', // problem reading
jszip + 'archive_comment.zip', // Ok
jszip + 'backslash.zip', // ok
jszip + 'data_descriptor.zip', // problem reading
jszip + 'deflate-stream.zip',
jszip + 'deflate.zip',
jszip + 'empty.zip', // ok
jszip + 'encrypted.zip',
jszip + 'extra_attributes.zip', // ???
jszip + 'folder.zip', // ok
jszip + 'image.zip', // ok
jszip + 'local_encoding_in_name.zip', // unsolvable?
jszip + 'nested.zip', // ok
jszip + 'nested_data_descriptor.zip',
jszip + 'nested_zip64.zip',
jszip + 'pile_of_poo.zip',
jszip + 'slashes_and_izarc.zip',
jszip + 'store-stream.zip',
jszip + 'store.zip',
jszip + 'subfolder.zip',
jszip + 'text.zip',
jszip + 'utf8.zip',
jszip + 'utf8_in_name.zip',
jszip + 'winrar_utf8_in_name.zip',
jszip + 'zip64.zip',
jszip + 'zip64_appended_bytes.zip',
jszip + 'zip64_missing_bytes.zip'
]

let url = urls[22]
console.log(url)
fetch(url).then(async res => {
const zip = await res.blob()
const entries = []

// console.log(urls[4])
for await (const entry of reader(zip)) {
const { comment, name, directory, compressionMethod, size, compressedSize } = entry
console.log(entry)
entries.push({
comment,
name,
directory,
compressionMethod,
size,
compressedSize,
ab: await entry.arrayBuffer(),
body: await entry.text(),
url: URL.createObjectURL(new Blob([await entry.arrayBuffer()], {type: 'd/d'}))
})

}

console.table(entries)
})
</script>
</body>
</html>
173 changes: 173 additions & 0 deletions read.js
Original file line number Diff line number Diff line change
@@ -1 +1,174 @@
// TODO: later
const ERR_BAD_FORMAT = 'File format is not recognized.'
const ZIP_COMMENT_MAX = 65536
const EOCDR_MIN = 22
const EOCDR_MAX = EOCDR_MIN + ZIP_COMMENT_MAX

const decoder = new TextDecoder()

class Entry {
constructor (dataView) {
if (dataView.getUint32(0) !== 0x504b0102) {
throw new Error('ERR_BAD_FORMAT')
}
this.dataView = dataView
}
get version () {
return this.dataView.getUint16(6, true)
}
get bitFlag () {
return this.dataView.getUint16(8, true)
}
get encrypted () {
return (this.bitFlag & 1) === 1
}
get compressionMethod () {
return this.dataView.getUint16(10, true)
}
get lastModDateRaw () {
return this.dataView.getUint32(12, true)
}
get crc32 () {
return this.dataView.getUint32(16, true)
}
get filenameLength () {
return this.dataView.getUint16(28, true)
}
get extraFieldLength () {
return this.dataView.getUint16(30, true)
}
get commentLength () {
return this.dataView.getUint16(32, true)
}
get directory () {
return (this.dataView.getUint8(38) & 16) === 16
}
get offset () {
return this.dataView.getUint16(42, true)
}
get zip64 () {
return this.size === 0xFFFFFFFF ||
this.uncompressedSize === 0xFFFFFFFF
}

get comment () {
const dv = this.dataView
const uint8 = new Uint8Array(dv.buffer, dv.byteOffset + 46 + this.filenameLength + this.extraFieldLength, this.commentLength)
return decoder.decode(uint8)
}

// File like IDL methods

get lastModifiedDate () {
// TODO: conversion
return new Date(this.lastModDate)
}

get lastModified () {

}

get name () {
const dv = this.dataView
const uint8 = new Uint8Array(dv.buffer, dv.byteOffset + 46, this.filenameLength)
return decoder.decode(uint8)
}

get size () {
return this.dataView.getUint32(24, true)
}

stream () {
// TODO: Investigate
// From my understanding jszip tells me that extraFieldLength **might**
// vary from local and central dir?
// - wtf?!
// one guess is that if extraFieldLength is defined it will then also
// have a 4 byte added signature? reason:
//
// 4.3.11 Archive extra data record:
//
// archive extra data signature 4 bytes (0x08064b50)
// extra field length 4 bytes
// extra field data (variable size)
//
// But i can also be wrong. An example file i tried to read was
// https://cdn.jsdelivr.net/gh/Stuk/jszip/test/ref/extra_attributes.zip
// in the central dir the length was 24
// in local header it was 28
const extra = this.extraFieldLength

const start = this.offset + this.filenameLength + 30 + (extra ? extra + 4 : 0)
const end = start + this.compressedSize

return this
._fileLike
.slice(start, end)
.stream()
// .pipeThrought(inflate) // TODO: optional inflate
// .pipeThrought(crc) // TODO: crc32 validate
}

arrayBuffer () {
return new Response(this.stream()).arrayBuffer()
}

text () {
return new Response(this.stream()).text()
}
}

async function * seekEOCDR (fileLike) {
// "End of central directory record" is the last part of a zip archive, and is at least 22 bytes long.
// Zip file comment is the last part of EOCDR and has max length of 64KB,
// so we only have to search the last 64K + 22 bytes of a archive for EOCDR signature (0x06054b50).
if (fileLike.size < EOCDR_MIN) throw new Error(ERR_BAD_FORMAT)

// In most cases, the EOCDR is EOCDR_MIN bytes long
const dv = await doSeek(EOCDR_MIN) ||
await doSeek(Math.min(EOCDR_MAX, fileLike.size))

if (!dv) throw new Error(ERR_BAD_FORMAT)

const datalength = dv.getUint32(16, true)
const fileslength = dv.getUint16(8, true)

if (datalength < 0 || datalength >= fileLike.size) {
throw new Error(ERR_BAD_FORMAT)
}

// const bytes = await fileLike.slice(fileLike.size - datalength).arrayBuffer()
const bytes = new Uint8Array(await fileLike.slice(datalength).arrayBuffer())

const uint16e = (b, n) => b[n] | (b[n + 1] << 8)

for (let i = 0, index = 0; i < fileslength; i++) {
const size =
uint16e(bytes, 28) + // filenameLength
uint16e(bytes, 30) + // extraFieldLength
uint16e(bytes, 32) + // commentLength
46

yield new Entry(
new DataView(bytes.buffer, index, size),
fileLike
)

index += size
}

// seek last length bytes of file for EOCDR
async function doSeek (length) {
const ab = await fileLike.slice(fileLike.size - length).arrayBuffer()
const bytes = new Uint8Array(ab)
for (let i = bytes.length - EOCDR_MIN; i >= 0; i--) {
if (bytes[i] === 0x50 && bytes[i + 1] === 0x4b && bytes[i + 2] === 0x05 && bytes[i + 3] === 0x06) {
return new DataView(bytes.buffer, i, EOCDR_MIN)
}
}

return null
}
}

export default seekEOCDR
Binary file added test/fixture/dog_cat.zip
Binary file not shown.
4 changes: 0 additions & 4 deletions write.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ class ZipTransformer {
const { header } = zipObject;
const data = getDataHelper(30 + nameBuf.length);

if (entry.level !== 0 && !entry.directory) {
header.view.setUint16(4, 0x0800);
}

header.view.setUint32(0, 0x14000808);
header.view.setUint16(6, (((date.getHours() << 6) | date.getMinutes()) << 5) | date.getSeconds() / 2, true);
header.view.setUint16(8, ((((date.getFullYear() - 1980) << 4) | (date.getMonth() + 1)) << 5) | date.getDate(), true);
Expand Down