Skip to content

Commit

Permalink
Merge pull request #30 from fasterthanlime/stored-entry
Browse files Browse the repository at this point in the history
chore!: Refactor StoredEntry a little bit
  • Loading branch information
fasterthanlime authored Jan 26, 2024
2 parents d932b77 + 0f59509 commit ab5e9d1
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 53 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ jobs:
uses: taiki-e/install-action@v2
with:
tool: just,nextest,cargo-llvm-cov
- name: Run cargo doc, deny warnings
run: |
export RUSTDOCFLAGS="-D warnings"
cargo doc --all-features --no-deps
- name: Run cargo clippy
run: |
cargo clippy --all-targets --all-features
Expand Down
27 changes: 16 additions & 11 deletions crates/jean/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use clap::{Parser, Subcommand};
use humansize::{format_size, BINARY};
use rc_zip::{prelude::*, EntryContents};
use std::path::PathBuf;
use std::time::Duration;
use std::{borrow::Cow, fmt};

use std::{
borrow::Cow,
collections::HashSet,
fmt,
fs::File,
io::{self, Read},
path::PathBuf,
time::Duration,
};

struct Optional<T>(Option<T>);
Expand Down Expand Up @@ -73,7 +76,6 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
println!("Comment:\n{}", comment);
}

use std::collections::HashSet;
let mut creator_versions = HashSet::<rc_zip::Version>::new();
let mut reader_versions = HashSet::<rc_zip::Version>::new();
let mut methods = HashSet::<rc_zip::Method>::new();
Expand All @@ -96,8 +98,8 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
rc_zip::EntryContents::File => {
methods.insert(entry.method());
num_files += 1;
compressed_size += entry.compressed_size;
uncompressed_size += entry.uncompressed_size;
compressed_size += entry.inner.compressed_size;
uncompressed_size += entry.inner.uncompressed_size;
}
}
}
Expand Down Expand Up @@ -135,7 +137,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
} else {
Cow::from(entry.name().truncate_path(55))
},
size = format_size(entry.uncompressed_size, BINARY),
size = format_size(entry.inner.uncompressed_size, BINARY),
);
if verbose {
print!(
Expand Down Expand Up @@ -169,7 +171,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let mut uncompressed_size: u64 = 0;
for entry in reader.entries() {
if let EntryContents::File = entry.contents() {
uncompressed_size += entry.uncompressed_size;
uncompressed_size += entry.inner.uncompressed_size;
}
}

Expand Down Expand Up @@ -250,10 +252,13 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let mut entry_writer = File::create(path)?;
let entry_reader = entry.reader();
let before_entry_bytes = done_bytes;
let mut progress_reader =
ProgressRead::new(entry_reader, entry.uncompressed_size, |prog| {
let mut progress_reader = ProgressRead::new(
entry_reader,
entry.inner.uncompressed_size,
|prog| {
pbar.set_position(before_entry_bytes + prog.done);
});
},
);

let copied_bytes = std::io::copy(&mut progress_reader, &mut entry_writer)?;
done_bytes = before_entry_bytes + copied_bytes;
Expand Down
84 changes: 66 additions & 18 deletions crates/rc-zip/src/format/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::format::*;
/// along with a list of [entries][StoredEntry].
///
/// It is obtained via an [ArchiveReader](crate::reader::ArchiveReader), or via a higher-level API
/// like the [ReadZip](crate::reader::ReadZip) trait.
/// like the [ReadZip](crate::reader::sync::ReadZip) trait.
pub struct Archive {
pub(crate) size: u64,
pub(crate) encoding: Encoding,
Expand Down Expand Up @@ -87,12 +87,6 @@ pub struct StoredEntry {
/// This contains the entry's name, timestamps, comment, compression method.
pub entry: Entry,

/// CRC-32 hash as found in the central directory.
///
/// Note that this may be zero, and the actual CRC32 might be in the local header, or (more
/// commonly) in the data descriptor instead.
pub crc32: u32,

/// Offset of the local file header in the zip file
///
/// ```text
Expand All @@ -108,14 +102,6 @@ pub struct StoredEntry {
/// ```
pub header_offset: u64,

/// Size in bytes, after compression
pub compressed_size: u64,

/// Size in bytes, before compression
///
/// This will be zero for directories.
pub uncompressed_size: u64,

/// External attributes (zip)
pub external_attrs: u32,

Expand Down Expand Up @@ -151,19 +137,77 @@ pub struct StoredEntry {
/// but they are also made available here raw.
pub extra_fields: Vec<ExtraField>,

pub inner: StoredEntryInner,
}

#[derive(Clone, Copy, Debug)]
pub struct StoredEntryInner {
/// CRC-32 hash as found in the central directory.
///
/// Note that this may be zero, and the actual CRC32 might be in the local header, or (more
/// commonly) in the data descriptor instead.
pub crc32: u32,

/// Size in bytes, after compression
pub compressed_size: u64,

/// Size in bytes, before compression
///
/// This will be zero for directories.
pub uncompressed_size: u64,

/// True if this entry was read from a zip64 archive
pub is_zip64: bool,
}

impl StoredEntry {
/// Returns the entry's name
/// Returns the entry's name. See also
/// [sanitized_name()](StoredEntry::sanitized_name), which returns a
/// sanitized version of the name.
///
/// This should be a relative path, separated by `/`. However, there are zip files in the wild
/// with all sorts of evil variants, so, be conservative in what you accept.
/// This should be a relative path, separated by `/`. However, there are zip
/// files in the wild with all sorts of evil variants, so, be conservative
/// in what you accept.
pub fn name(&self) -> &str {
self.entry.name.as_ref()
}

/// Returns a sanitized version of the entry's name, if it
/// seems safe. In particular, if this method feels like the
/// entry name is trying to do a zip slip (cf.
/// <https://snyk.io/research/zip-slip-vulnerability>), it'll return
/// None.
///
/// Other than that, it will strip any leading slashes on non-Windows OSes.
pub fn sanitized_name(&self) -> Option<&str> {
let name = self.name();

// refuse entries with traversed/absolute path to mitigate zip slip
if name.contains("..") {
return None;
}

#[cfg(windows)]
{
if name.contains(":\\") || name.starts_with("\\") {
return None;
}
Some(name)
}

#[cfg(not(windows))]
{
// strip absolute prefix on entries pointing to root path
let mut entry_chars = name.chars();
let mut name = name;
while name.starts_with('/') {
entry_chars.next();
name = entry_chars.as_str()
}
Some(name)
}
}

/// The entry's comment, if any.
///
/// When reading a zip file, an empty comment results in None.
Expand All @@ -172,6 +216,7 @@ impl StoredEntry {
}

/// The compression method used for this entry
#[inline(always)]
pub fn method(&self) -> Method {
self.entry.method
}
Expand All @@ -184,20 +229,23 @@ impl StoredEntry {
/// epoch, if something went really wrong.
///
/// If you're reading this after the year 2038, or after the year 2108, godspeed.
#[inline(always)]
pub fn modified(&self) -> DateTime<Utc> {
self.entry.modified
}

/// This entry's "created" timestamp, if available.
///
/// See [StoredEntry::modified()] for caveats.
#[inline(always)]
pub fn created(&self) -> Option<&DateTime<Utc>> {
self.entry.created.as_ref()
}

/// This entry's "last accessed" timestamp, if available.
///
/// See [StoredEntry::modified()] for caveats.
#[inline(always)]
pub fn accessed(&self) -> Option<&DateTime<Utc>> {
self.entry.accessed.as_ref()
}
Expand Down
10 changes: 6 additions & 4 deletions crates/rc-zip/src/format/directory_header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,12 @@ impl DirectoryHeader {
reader_version: self.reader_version,
flags: self.flags,

crc32: self.crc32,
compressed_size,
uncompressed_size,
inner: StoredEntryInner {
crc32: self.crc32,
compressed_size,
uncompressed_size,
is_zip64,
},
header_offset,

uid,
Expand All @@ -240,7 +243,6 @@ impl DirectoryHeader {
extra_fields,

external_attrs: self.external_attrs,
is_zip64,
})
}
}
4 changes: 2 additions & 2 deletions crates/rc-zip/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
//!
//! ### Reading
//!
//! [ArchiveReader](ArchiveReader) is your first stop. It
//! [ArchiveReader](reader::ArchiveReader) is your first stop. It
//! ensures we are dealing with a valid zip archive, and reads the central
//! directory. It does not perform I/O itself, but rather, it is a state machine
//! that asks for reads at specific offsets.
//!
//! An [Archive](Archive) contains a full list of [entries](types::StoredEntry),
//! An [Archive] contains a full list of [entries](StoredEntry),
//! which you can then extract.
//!
//! ### Writing
Expand Down
24 changes: 8 additions & 16 deletions crates/rc-zip/src/reader/sync/entry_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,8 @@ where
rd: EOFNormalizer<R>,
eof: bool,
state: State,
// entry info required for extraction
// copy them over to reduce the amount of necessary lifetimes/references
compressed_size: u64,
uncompressed_size: u64,
inner: StoredEntryInner,
method: Method,
is_zip64: bool,
crc32: u32,
}

impl<R> io::Read for EntryReader<R>
Expand All @@ -76,7 +71,7 @@ where

trace!("local file header: {:#?}", header);
transition!(self.state => (S::ReadLocalHeader { buffer }) {
let limited_reader = LimitedReader::new(buffer, self.compressed_size);
let limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
Method::Store => Box::new(StoreDecoder::new(limited_reader)),
Method::Deflate => Box::new(deflate::Decoder::new(limited_reader)),
Expand Down Expand Up @@ -158,7 +153,7 @@ where
buffer.available_space()
);

match DataDescriptorRecord::parse(buffer.data(), self.is_zip64) {
match DataDescriptorRecord::parse(buffer.data(), self.inner.is_zip64) {
Ok((_remaining, descriptor)) => {
trace!("data descriptor = {:#?}", descriptor);
transition!(self.state => (S::ReadDataDescriptor { metrics, header, .. }) {
Expand Down Expand Up @@ -195,16 +190,16 @@ where
ref header,
ref descriptor,
} => {
let expected_crc32 = if self.crc32 != 0 {
self.crc32
let expected_crc32 = if self.inner.crc32 != 0 {
self.inner.crc32
} else if let Some(descriptor) = descriptor.as_ref() {
descriptor.crc32
} else {
header.crc32
};

let expected_size = if self.uncompressed_size != 0 {
self.uncompressed_size
let expected_size = if self.inner.uncompressed_size != 0 {
self.inner.uncompressed_size
} else if let Some(descriptor) = descriptor.as_ref() {
descriptor.uncompressed_size
} else {
Expand Down Expand Up @@ -252,11 +247,8 @@ where
state: State::ReadLocalHeader {
buffer: circular::Buffer::with_capacity(Self::DEFAULT_BUFFER_SIZE),
},
compressed_size: entry.compressed_size,
uncompressed_size: entry.uncompressed_size,
method: entry.method(),
is_zip64: entry.is_zip64,
crc32: entry.crc32,
inner: entry.inner,
}
}
}
4 changes: 2 additions & 2 deletions crates/rc-zip/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ fn test_fsm() {
let sync_archive = bs.read_zip().unwrap();
for (se, e) in sync_archive.entries().zip(archive.entries()) {
assert_eq!(se.name(), e.name());
assert_eq!(se.compressed_size, e.compressed_size);
assert_eq!(se.uncompressed_size, e.uncompressed_size);
assert_eq!(se.inner.compressed_size, e.inner.compressed_size);
assert_eq!(se.inner.uncompressed_size, e.inner.uncompressed_size);
}
}

0 comments on commit ab5e9d1

Please sign in to comment.