Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore!: Refactor StoredEntry a little bit #30

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ jobs:
uses: taiki-e/install-action@v2
with:
tool: just,nextest,cargo-llvm-cov
- name: Run cargo doc, deny warnings
run: |
export RUSTDOCFLAGS="-D warnings"
cargo doc --all-features --no-deps
- name: Run cargo clippy
run: |
cargo clippy --all-targets --all-features
Expand Down
27 changes: 16 additions & 11 deletions crates/jean/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use clap::{Parser, Subcommand};
use humansize::{format_size, BINARY};
use rc_zip::{prelude::*, EntryContents};
use std::path::PathBuf;
use std::time::Duration;
use std::{borrow::Cow, fmt};

use std::{
borrow::Cow,
collections::HashSet,
fmt,
fs::File,
io::{self, Read},
path::PathBuf,
time::Duration,
};

struct Optional<T>(Option<T>);
Expand Down Expand Up @@ -73,7 +76,6 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
println!("Comment:\n{}", comment);
}

use std::collections::HashSet;
let mut creator_versions = HashSet::<rc_zip::Version>::new();
let mut reader_versions = HashSet::<rc_zip::Version>::new();
let mut methods = HashSet::<rc_zip::Method>::new();
Expand All @@ -96,8 +98,8 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
rc_zip::EntryContents::File => {
methods.insert(entry.method());
num_files += 1;
compressed_size += entry.compressed_size;
uncompressed_size += entry.uncompressed_size;
compressed_size += entry.inner.compressed_size;
uncompressed_size += entry.inner.uncompressed_size;
}
}
}
Expand Down Expand Up @@ -135,7 +137,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
} else {
Cow::from(entry.name().truncate_path(55))
},
size = format_size(entry.uncompressed_size, BINARY),
size = format_size(entry.inner.uncompressed_size, BINARY),
);
if verbose {
print!(
Expand Down Expand Up @@ -169,7 +171,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let mut uncompressed_size: u64 = 0;
for entry in reader.entries() {
if let EntryContents::File = entry.contents() {
uncompressed_size += entry.uncompressed_size;
uncompressed_size += entry.inner.uncompressed_size;
}
}

Expand Down Expand Up @@ -250,10 +252,13 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let mut entry_writer = File::create(path)?;
let entry_reader = entry.reader();
let before_entry_bytes = done_bytes;
let mut progress_reader =
ProgressRead::new(entry_reader, entry.uncompressed_size, |prog| {
let mut progress_reader = ProgressRead::new(
entry_reader,
entry.inner.uncompressed_size,
|prog| {
pbar.set_position(before_entry_bytes + prog.done);
});
},
);

let copied_bytes = std::io::copy(&mut progress_reader, &mut entry_writer)?;
done_bytes = before_entry_bytes + copied_bytes;
Expand Down
84 changes: 66 additions & 18 deletions crates/rc-zip/src/format/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::format::*;
/// along with a list of [entries][StoredEntry].
///
/// It is obtained via an [ArchiveReader](crate::reader::ArchiveReader), or via a higher-level API
/// like the [ReadZip](crate::reader::ReadZip) trait.
/// like the [ReadZip](crate::reader::sync::ReadZip) trait.
pub struct Archive {
pub(crate) size: u64,
pub(crate) encoding: Encoding,
Expand Down Expand Up @@ -87,12 +87,6 @@ pub struct StoredEntry {
/// This contains the entry's name, timestamps, comment, compression method.
pub entry: Entry,

/// CRC-32 hash as found in the central directory.
///
/// Note that this may be zero, and the actual CRC32 might be in the local header, or (more
/// commonly) in the data descriptor instead.
pub crc32: u32,

/// Offset of the local file header in the zip file
///
/// ```text
Expand All @@ -108,14 +102,6 @@ pub struct StoredEntry {
/// ```
pub header_offset: u64,

/// Size in bytes, after compression
pub compressed_size: u64,

/// Size in bytes, before compression
///
/// This will be zero for directories.
pub uncompressed_size: u64,

/// External attributes (zip)
pub external_attrs: u32,

Expand Down Expand Up @@ -151,19 +137,77 @@ pub struct StoredEntry {
/// but they are also made available here raw.
pub extra_fields: Vec<ExtraField>,

pub inner: StoredEntryInner,
}

#[derive(Clone, Copy, Debug)]
pub struct StoredEntryInner {
/// CRC-32 hash as found in the central directory.
///
/// Note that this may be zero, and the actual CRC32 might be in the local header, or (more
/// commonly) in the data descriptor instead.
pub crc32: u32,

/// Size in bytes, after compression
pub compressed_size: u64,

/// Size in bytes, before compression
///
/// This will be zero for directories.
pub uncompressed_size: u64,

/// True if this entry was read from a zip64 archive
pub is_zip64: bool,
}

impl StoredEntry {
/// Returns the entry's name
/// Returns the entry's name. See also
/// [sanitized_name()](StoredEntry::sanitized_name), which returns a
/// sanitized version of the name.
///
/// This should be a relative path, separated by `/`. However, there are zip files in the wild
/// with all sorts of evil variants, so, be conservative in what you accept.
/// This should be a relative path, separated by `/`. However, there are zip
/// files in the wild with all sorts of evil variants, so, be conservative
/// in what you accept.
pub fn name(&self) -> &str {
self.entry.name.as_ref()
}

/// Returns a sanitized version of the entry's name, if it
/// seems safe. In particular, if this method feels like the
/// entry name is trying to do a zip slip (cf.
/// <https://snyk.io/research/zip-slip-vulnerability>), it'll return
/// None.
///
/// Other than that, it will strip any leading slashes on non-Windows OSes.
pub fn sanitized_name(&self) -> Option<&str> {
let name = self.name();

// refuse entries with traversed/absolute path to mitigate zip slip
if name.contains("..") {
return None;
}

#[cfg(windows)]
{
if name.contains(":\\") || name.starts_with("\\") {
return None;
}
Some(name)
}

#[cfg(not(windows))]
{
// strip absolute prefix on entries pointing to root path
let mut entry_chars = name.chars();
let mut name = name;
while name.starts_with('/') {
entry_chars.next();
name = entry_chars.as_str()
}
Some(name)
}
}

/// The entry's comment, if any.
///
/// When reading a zip file, an empty comment results in None.
Expand All @@ -172,6 +216,7 @@ impl StoredEntry {
}

/// The compression method used for this entry
#[inline(always)]
pub fn method(&self) -> Method {
self.entry.method
}
Expand All @@ -184,20 +229,23 @@ impl StoredEntry {
/// epoch, if something went really wrong.
///
/// If you're reading this after the year 2038, or after the year 2108, godspeed.
#[inline(always)]
pub fn modified(&self) -> DateTime<Utc> {
self.entry.modified
}

/// This entry's "created" timestamp, if available.
///
/// See [StoredEntry::modified()] for caveats.
#[inline(always)]
pub fn created(&self) -> Option<&DateTime<Utc>> {
self.entry.created.as_ref()
}

/// This entry's "last accessed" timestamp, if available.
///
/// See [StoredEntry::modified()] for caveats.
#[inline(always)]
pub fn accessed(&self) -> Option<&DateTime<Utc>> {
self.entry.accessed.as_ref()
}
Expand Down
10 changes: 6 additions & 4 deletions crates/rc-zip/src/format/directory_header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,12 @@ impl DirectoryHeader {
reader_version: self.reader_version,
flags: self.flags,

crc32: self.crc32,
compressed_size,
uncompressed_size,
inner: StoredEntryInner {
crc32: self.crc32,
compressed_size,
uncompressed_size,
is_zip64,
},
header_offset,

uid,
Expand All @@ -240,7 +243,6 @@ impl DirectoryHeader {
extra_fields,

external_attrs: self.external_attrs,
is_zip64,
})
}
}
4 changes: 2 additions & 2 deletions crates/rc-zip/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
//!
//! ### Reading
//!
//! [ArchiveReader](ArchiveReader) is your first stop. It
//! [ArchiveReader](reader::ArchiveReader) is your first stop. It
//! ensures we are dealing with a valid zip archive, and reads the central
//! directory. It does not perform I/O itself, but rather, it is a state machine
//! that asks for reads at specific offsets.
//!
//! An [Archive](Archive) contains a full list of [entries](types::StoredEntry),
//! An [Archive] contains a full list of [entries](StoredEntry),
//! which you can then extract.
//!
//! ### Writing
Expand Down
24 changes: 8 additions & 16 deletions crates/rc-zip/src/reader/sync/entry_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,8 @@ where
rd: EOFNormalizer<R>,
eof: bool,
state: State,
// entry info required for extraction
// copy them over to reduce the amount of necessary lifetimes/references
compressed_size: u64,
uncompressed_size: u64,
inner: StoredEntryInner,
method: Method,
is_zip64: bool,
crc32: u32,
}

impl<R> io::Read for EntryReader<R>
Expand All @@ -76,7 +71,7 @@ where

trace!("local file header: {:#?}", header);
transition!(self.state => (S::ReadLocalHeader { buffer }) {
let limited_reader = LimitedReader::new(buffer, self.compressed_size);
let limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
Method::Store => Box::new(StoreDecoder::new(limited_reader)),
Method::Deflate => Box::new(deflate::Decoder::new(limited_reader)),
Expand Down Expand Up @@ -158,7 +153,7 @@ where
buffer.available_space()
);

match DataDescriptorRecord::parse(buffer.data(), self.is_zip64) {
match DataDescriptorRecord::parse(buffer.data(), self.inner.is_zip64) {
Ok((_remaining, descriptor)) => {
trace!("data descriptor = {:#?}", descriptor);
transition!(self.state => (S::ReadDataDescriptor { metrics, header, .. }) {
Expand Down Expand Up @@ -195,16 +190,16 @@ where
ref header,
ref descriptor,
} => {
let expected_crc32 = if self.crc32 != 0 {
self.crc32
let expected_crc32 = if self.inner.crc32 != 0 {
self.inner.crc32
} else if let Some(descriptor) = descriptor.as_ref() {
descriptor.crc32
} else {
header.crc32
};

let expected_size = if self.uncompressed_size != 0 {
self.uncompressed_size
let expected_size = if self.inner.uncompressed_size != 0 {
self.inner.uncompressed_size
} else if let Some(descriptor) = descriptor.as_ref() {
descriptor.uncompressed_size
} else {
Expand Down Expand Up @@ -252,11 +247,8 @@ where
state: State::ReadLocalHeader {
buffer: circular::Buffer::with_capacity(Self::DEFAULT_BUFFER_SIZE),
},
compressed_size: entry.compressed_size,
uncompressed_size: entry.uncompressed_size,
method: entry.method(),
is_zip64: entry.is_zip64,
crc32: entry.crc32,
inner: entry.inner,
}
}
}
4 changes: 2 additions & 2 deletions crates/rc-zip/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ fn test_fsm() {
let sync_archive = bs.read_zip().unwrap();
for (se, e) in sync_archive.entries().zip(archive.entries()) {
assert_eq!(se.name(), e.name());
assert_eq!(se.compressed_size, e.compressed_size);
assert_eq!(se.uncompressed_size, e.uncompressed_size);
assert_eq!(se.inner.compressed_size, e.inner.compressed_size);
assert_eq!(se.inner.uncompressed_size, e.inner.uncompressed_size);
}
}
Loading