diff --git a/Cargo.toml b/Cargo.toml index f781b1e..aedf05a 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ plist = "1.7.0" regex = "1.11.1" base64 = "0.22.1" chrono = "0.4.38" +walkdir = "2.5.0" [dev-dependencies] simplelog = "0.12.2" diff --git a/benches/big_sur_benchmark.rs b/benches/big_sur_benchmark.rs index 63f80e7..2d5df65 100644 --- a/benches/big_sur_benchmark.rs +++ b/benches/big_sur_benchmark.rs @@ -5,11 +5,12 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; +use std::{fs::File, path::PathBuf}; use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ dsc::SharedCacheStrings, + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, timesync::TimesyncBoot, unified_log::UnifiedLogData, @@ -17,7 +18,8 @@ use macos_unifiedlogs::{ }; fn big_sur_parse_log(path: &str) { - let _ = parse_log(&path).unwrap(); + let handle = File::open(PathBuf::from(path).as_path()).unwrap(); + let _ = parse_log(handle).unwrap(); } fn bench_build_log( @@ -49,20 +51,17 @@ fn big_sur_single_log_benchpress(c: &mut Criterion) { fn big_sur_build_log_benchbress(c: &mut Criterion) { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000004.tracev3"); let exclude_missing = false; + let handle = File::open(test_path.as_path()).unwrap(); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let log_data = parse_log(handle).unwrap(); c.bench_function("Benching Building One Big Sur Log", |b| { b.iter(|| { diff --git a/benches/high_sierra_benchmark.rs b/benches/high_sierra_benchmark.rs index c91a43b..9e82d0c 100644 --- a/benches/high_sierra_benchmark.rs +++ b/benches/high_sierra_benchmark.rs @@ -5,18 +5,21 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; +use std::{fs::File, path::PathBuf}; use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ dsc::SharedCacheStrings, + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, timesync::TimesyncBoot, unified_log::UnifiedLogData, uuidtext::UUIDText, }; + fn high_sierra_parse_log(path: &str) { - let _ = parse_log(&path).unwrap(); + let handle = File::open(PathBuf::from(path).as_path()).unwrap(); + let _ = parse_log(handle).unwrap(); } fn bench_build_log( @@ -49,20 +52,16 @@ fn high_sierra_single_log_benchpress(c: &mut Criterion) { fn high_sierra_build_log_benchbress(c: &mut Criterion) { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); let exclude_missing = false; + let handle = File::open(test_path.as_path()).unwrap(); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let log_data = parse_log(handle).unwrap(); c.bench_function("Benching Building One High Sierra Log", |b| { b.iter(|| { diff --git a/benches/monterey_benchmark.rs b/benches/monterey_benchmark.rs index 954ffa0..b8b581c 100644 --- a/benches/monterey_benchmark.rs +++ b/benches/monterey_benchmark.rs @@ -5,18 +5,20 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; +use std::{fs::File, path::PathBuf}; use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ dsc::SharedCacheStrings, + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, timesync::TimesyncBoot, unified_log::UnifiedLogData, uuidtext::UUIDText, }; fn monterey_parse_log(path: &str) { - let _ = parse_log(&path).unwrap(); + let handle = File::open(PathBuf::from(path).as_path()).unwrap(); + let _ = parse_log(handle).unwrap(); } fn bench_build_log( @@ -48,20 +50,17 @@ fn monterey_single_log_benchpress(c: &mut Criterion) { fn monterey_build_log_benchbress(c: &mut Criterion) { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_monterey.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000004.tracev3"); let exclude_missing = false; + let handle = File::open(test_path.as_path()).unwrap(); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let log_data = parse_log(handle).unwrap(); c.bench_function("Benching Building One Monterey Log", |b| { b.iter(|| { diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 06a1479..9647b2e 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -1,7 +1,2 @@ [workspace] -members = [ - "unifiedlog_parser", - "unifiedlog_parser_json", - "parse_tracev3", - "unifiedlog_iterator", -] +members = ["unifiedlog_parser"] diff --git a/examples/parse_tracev3/Cargo.toml b/examples/parse_tracev3/Cargo.toml deleted file mode 100644 index b0e0104..0000000 --- a/examples/parse_tracev3/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "parse_tracev3" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -simplelog = "0.12.2" -serde_json = "1.0.122" -log = "0.4.22" -macos-unifiedlogs = {path = "../../"} \ No newline at end of file diff --git a/examples/parse_tracev3/src/main.rs b/examples/parse_tracev3/src/main.rs deleted file mode 100644 index 350f99e..0000000 --- a/examples/parse_tracev3/src/main.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2022 Mandiant, Inc. All Rights Reserved -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and limitations under the License. - -use log::LevelFilter; -use macos_unifiedlogs::dsc::SharedCacheStrings; -use macos_unifiedlogs::parser::{build_log, parse_log}; -use macos_unifiedlogs::timesync::TimesyncBoot; -use macos_unifiedlogs::unified_log::LogData; -use macos_unifiedlogs::uuidtext::UUIDText; - -use simplelog::{Config, SimpleLogger}; -use std::env; -use std::error::Error; -use std::fs::OpenOptions; -use std::io::Write; -use std::path::Path; - -fn main() { - println!("Starting Unified Log parser..."); - // Set logging to Error only, since we are parsing only a tracev3, we wont have enough data to build the whole log - SimpleLogger::init(LevelFilter::Error, Config::default()) - .expect("Failed to initialize simple logger"); - - let args: Vec = env::args().collect(); - if args.len() == 2 { - let archive_path = &args[1]; - parse_trace_file(archive_path); - } else { - println!("Expected an argument for a tracev3 file") - } -} - -// Parse single tracev3 file -fn parse_trace_file(path: &str) { - let log_data = parse_log(path).unwrap(); - let filename = Path::new(path); - // Pass empty UUID, UUID cache, timesync files - let string_results: Vec = Vec::new(); - let shared_strings_results: Vec = Vec::new(); - let timesync_data: Vec = Vec::new(); - let exclude_missing = false; - - // We only get minimal data since we dont have the log metadata - let (results, _) = build_log( - &log_data, - &string_results, - &shared_strings_results, - ×ync_data, - exclude_missing, - ); - output(&results, filename.file_name().unwrap().to_str().unwrap()).unwrap(); - println!( - "\nParsed file: {} to {}.json", - path, - filename.file_name().unwrap().to_str().unwrap() - ) -} - -// Create JSON file -fn output(results: &Vec, output_name: &str) -> Result<(), Box> { - let mut json_file = OpenOptions::new() - .append(true) - .create(true) - .open(format!("{}.json", output_name))?; - - let serde_data = serde_json::to_string(&results)?; - - json_file.write_all(serde_data.as_bytes())?; - - Ok(()) -} diff --git a/examples/unifiedlog_iterator/Cargo.toml b/examples/unifiedlog_iterator/Cargo.toml deleted file mode 100644 index a2dcea6..0000000 --- a/examples/unifiedlog_iterator/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "unifiedlog_iterator" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -simplelog = "0.12.2" -csv = "1.3.0" -chrono = "0.4.38" -log = "0.4.22" -serde_json = "1.0.122" -macos-unifiedlogs = {path = "../../"} -clap = {version = "4.5.18", features = ["derive"]} \ No newline at end of file diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs deleted file mode 100644 index 3e08d64..0000000 --- a/examples/unifiedlog_iterator/src/main.rs +++ /dev/null @@ -1,473 +0,0 @@ -// Copyright 2022 Mandiant, Inc. All Rights Reserved -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and limitations under the License. - -use chrono::{SecondsFormat, TimeZone, Utc}; -use log::LevelFilter; -use macos_unifiedlogs::dsc::SharedCacheStrings; -use macos_unifiedlogs::iterator::UnifiedLogIterator; -use macos_unifiedlogs::parser::{ - build_log, collect_shared_strings, collect_shared_strings_system, collect_strings, - collect_strings_system, collect_timesync, collect_timesync_system, -}; -use macos_unifiedlogs::timesync::TimesyncBoot; -use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; -use macos_unifiedlogs::uuidtext::UUIDText; -use simplelog::{Config, SimpleLogger}; -use std::error::Error; -use std::fs::OpenOptions; -use std::io::Write; -use std::path::PathBuf; -use std::{fs, io}; - -use clap::Parser; -use csv::Writer; - -#[derive(Parser, Debug)] -#[clap(version, about, long_about = None)] -struct Args { - /// Run on live system - #[clap(short, long, default_value = "false")] - live: String, - - /// Path to logarchive formatted directory - #[clap(short, long, default_value = "")] - input: String, - - /// Path to output file. Any directories must already exist - #[clap(short, long, default_value = "")] - output: String, - - /// Output format. Options: csv, jsonl. Default is autodetect. - #[clap(short, long, default_value = "auto")] - format: String, - - /// Append to output file - /// If false, will overwrite output file - #[clap(short, long, default_value = "false")] - append: bool, -} - -fn main() { - eprintln!("Starting Unified Log parser..."); - // Set logging level to warning - SimpleLogger::init(LevelFilter::Warn, Config::default()) - .expect("Failed to initialize simple logger"); - - let args = Args::parse(); - let output_format = if args.format.is_empty() || args.format == "auto" { - std::path::Path::new(&args.output) - .extension() - .and_then(std::ffi::OsStr::to_str) - .unwrap_or("csv") - .to_string() - } else { - args.format.clone() - }; - - let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); - - if !args.input.is_empty() { - parse_log_archive(&args.input, &mut writer); - } else if args.live != "false" { - parse_live_system(&mut writer); - } -} - -// Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut OutputWriter) { - let mut archive_path = PathBuf::from(path); - - // Parse all UUID files which contain strings and other metadata - let string_results = collect_strings(&archive_path.display().to_string()).unwrap(); - - archive_path.push("dsc"); - // Parse UUID cache files which also contain strings and other metadata - let shared_strings_results = - collect_shared_strings(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); - - archive_path.push("timesync"); - // Parse all timesync files - let timesync_data = collect_timesync(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); - - // Keep UUID, UUID cache, timesync files in memory while we parse all tracev3 files - // Allows for faster lookups - parse_trace_file( - &string_results, - &shared_strings_results, - ×ync_data, - path, - writer, - ); - - eprintln!("\nFinished parsing Unified Log data."); -} - -// Parse a live macOS system -fn parse_live_system(writer: &mut OutputWriter) { - let strings = collect_strings_system().unwrap(); - let shared_strings = collect_shared_strings_system().unwrap(); - let timesync_data = collect_timesync_system().unwrap(); - - parse_trace_file( - &strings, - &shared_strings, - ×ync_data, - "/private/var/db/diagnostics", - writer, - ); - - eprintln!("\nFinished parsing Unified Log data."); -} - -// Use the provided strings, shared strings, timesync data to parse the Unified Log data at provided path. -// Currently, expect the path to follow macOS log collect structure -fn parse_trace_file( - string_results: &[UUIDText], - shared_strings_results: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], - path: &str, - writer: &mut OutputWriter, -) { - // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) - // Some log entries have Oversize strings located in different tracev3 files. - // This is very rare. Seen in ~20 log entries out of ~700,000. Seen in ~700 out of ~18 million - let mut oversize_strings = UnifiedLogData { - header: Vec::new(), - catalog_data: Vec::new(), - oversize: Vec::new(), - }; - - let mut missing_data: Vec = Vec::new(); - - let mut archive_path = PathBuf::from(path); - archive_path.push("Persist"); - - let mut log_count = 0; - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Persist directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - if data.path().exists() { - let count = iterate_chunks( - &full_path, - &mut missing_data, - string_results, - shared_strings_results, - timesync_data, - writer, - &mut oversize_strings, - ); - log_count += count; - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - } - } - - archive_path.pop(); - archive_path.push("Special"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Special directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - if data.path().exists() { - let count = iterate_chunks( - &full_path, - &mut missing_data, - string_results, - shared_strings_results, - timesync_data, - writer, - &mut oversize_strings, - ); - log_count += count; - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - } - } - - archive_path.pop(); - archive_path.push("Signpost"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Signpost directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - if data.path().exists() { - let count = iterate_chunks( - &full_path, - &mut missing_data, - string_results, - shared_strings_results, - timesync_data, - writer, - &mut oversize_strings, - ); - log_count += count; - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - } - } - archive_path.pop(); - archive_path.push("HighVolume"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in HighVolume directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - if data.path().exists() { - let count = iterate_chunks( - &full_path, - &mut missing_data, - string_results, - shared_strings_results, - timesync_data, - writer, - &mut oversize_strings, - ); - log_count += count; - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - } - } - archive_path.pop(); - - archive_path.push("logdata.LiveData.tracev3"); - - // Check if livedata exists. We only have it if 'log collect' was used - if archive_path.exists() { - eprintln!("Parsing: logdata.LiveData.tracev3"); - - let count = iterate_chunks( - &archive_path.display().to_string(), - &mut missing_data, - string_results, - shared_strings_results, - timesync_data, - writer, - &mut oversize_strings, - ); - log_count += count; - archive_path.pop(); - } - - let include_missing = false; - println!("Oversize cache size: {}", oversize_strings.oversize.len()); - println!("Logs with missing Oversize strings: {}", missing_data.len()); - println!("Checking Oversize cache one more time..."); - - // Since we have all Oversize entries now. Go through any log entries that we were not able to build before - for mut leftover_data in missing_data { - // Add all of our previous oversize data to logs for lookups - leftover_data.oversize = oversize_strings.oversize.clone(); - - // Exclude_missing = false - // If we fail to find any missing data its probably due to the logs rolling - // Ex: tracev3A rolls, tracev3B references Oversize entry in tracev3A will trigger missing data since tracev3A is gone - let (results, _) = build_log( - &leftover_data, - string_results, - shared_strings_results, - timesync_data, - include_missing, - ); - log_count += results.len(); - - output(&results, writer).unwrap(); - } - eprintln!("Parsed {} log entries", log_count); -} - -fn iterate_chunks( - path: &str, - missing: &mut Vec, - strings_data: &[UUIDText], - shared_strings: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], - writer: &mut OutputWriter, - oversize_strings: &mut UnifiedLogData, -) -> usize { - let log_bytes = fs::read(path).unwrap(); - let log_iterator = UnifiedLogIterator { - data: log_bytes, - header: Vec::new(), - }; - - // Exclude missing data from returned output. Keep separate until we parse all oversize entries. - // Then after parsing all logs, go through all missing data and check all parsed oversize entries again - let exclude_missing = true; - - let mut count = 0; - for mut chunk in log_iterator { - chunk.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &chunk, - strings_data, - shared_strings, - timesync_data, - exclude_missing, - ); - count += results.len(); - oversize_strings.oversize = chunk.oversize; - output(&results, writer).unwrap(); - if missing_logs.catalog_data.is_empty() - && missing_logs.header.is_empty() - && missing_logs.oversize.is_empty() - { - continue; - } - // Track possible missing log data due to oversize strings being in another file - missing.push(missing_logs); - } - - count -} - -pub struct OutputWriter { - writer: OutputWriterEnum, -} - -enum OutputWriterEnum { - Csv(Box>>), - Json(Box), -} - -impl OutputWriter { - pub fn new( - output_path: &str, - output_format: &str, - append: bool, - ) -> Result> { - let writer: Box = if !output_path.is_empty() { - Box::new( - OpenOptions::new() - .write(true) - .create(true) - .truncate(!append) - .append(append) - .open(output_path)?, - ) - } else { - Box::new(io::stdout()) - }; - - let writer_enum = match output_format { - "csv" => { - let mut csv_writer = Writer::from_writer(writer); - // Write CSV headers - csv_writer.write_record([ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - csv_writer.flush()?; - OutputWriterEnum::Csv(Box::new(csv_writer)) - } - "jsonl" => OutputWriterEnum::Json(writer), - _ => { - eprintln!("Unsupported output format: {}", output_format); - std::process::exit(1); - } - }; - - Ok(OutputWriter { - writer: writer_enum, - }) - } - - pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { - match &mut self.writer { - OutputWriterEnum::Csv(csv_writer) => { - let date_time = Utc.timestamp_nanos(record.time as i64); - csv_writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - record.event_type.to_owned(), - record.log_type.to_owned(), - record.subsystem.to_owned(), - record.thread_id.to_string(), - record.pid.to_string(), - record.euid.to_string(), - record.library.to_owned(), - record.library_uuid.to_owned(), - record.activity_id.to_string(), - record.category.to_owned(), - record.process.to_owned(), - record.process_uuid.to_owned(), - record.message.to_owned(), - record.raw_message.to_owned(), - record.boot_uuid.to_owned(), - record.timezone_name.to_owned(), - ])?; - } - OutputWriterEnum::Json(json_writer) => { - writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; - } - } - Ok(()) - } - - pub fn flush(&mut self) -> Result<(), Box> { - match &mut self.writer { - OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, - OutputWriterEnum::Json(json_writer) => json_writer.flush()?, - } - Ok(()) - } -} - -// Append or create csv file -fn output(results: &Vec, writer: &mut OutputWriter) -> Result<(), Box> { - for data in results { - writer.write_record(data)?; - } - writer.flush()?; - Ok(()) -} diff --git a/examples/unifiedlog_parser/Cargo.toml b/examples/unifiedlog_parser/Cargo.toml index 92e3c9c..a2dcea6 100644 --- a/examples/unifiedlog_parser/Cargo.toml +++ b/examples/unifiedlog_parser/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "unifiedlog_parser" +name = "unifiedlog_iterator" version = "0.1.0" edition = "2021" @@ -10,5 +10,6 @@ simplelog = "0.12.2" csv = "1.3.0" chrono = "0.4.38" log = "0.4.22" +serde_json = "1.0.122" macos-unifiedlogs = {path = "../../"} clap = {version = "4.5.18", features = ["derive"]} \ No newline at end of file diff --git a/examples/unifiedlog_parser/src/main.rs b/examples/unifiedlog_parser/src/main.rs old mode 100755 new mode 100644 index 8ce6e9e..cf560a5 --- a/examples/unifiedlog_parser/src/main.rs +++ b/examples/unifiedlog_parser/src/main.rs @@ -8,37 +8,99 @@ use chrono::{SecondsFormat, TimeZone, Utc}; use log::LevelFilter; use macos_unifiedlogs::dsc::SharedCacheStrings; +use macos_unifiedlogs::filesystem::{LiveSystemProvider, LogarchiveProvider}; +use macos_unifiedlogs::iterator::UnifiedLogIterator; use macos_unifiedlogs::parser::{ - build_log, collect_shared_strings, collect_shared_strings_system, collect_strings, - collect_strings_system, collect_timesync, collect_timesync_system, parse_log, + build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log, }; use macos_unifiedlogs::timesync::TimesyncBoot; +use macos_unifiedlogs::traits::FileProvider; use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; use macos_unifiedlogs::uuidtext::UUIDText; use simplelog::{Config, SimpleLogger}; use std::error::Error; -use std::{fs, io}; -use std::fs::OpenOptions; -use std::io::Write; -use std::path::PathBuf; +use std::fmt::Display; +use std::fs; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; -use clap::Parser; +use clap::{builder, Parser, ValueEnum}; use csv::Writer; +#[derive(Clone, Debug)] +enum RuntimeError { + FileOpen { path: String, message: String }, + FileParse { path: String, message: String }, +} + +impl Display for RuntimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + RuntimeError::FileOpen { path, message } => { + f.write_str(&format!("Failed to open source file {}: {}", path, message)) + } + RuntimeError::FileParse { path, message } => { + f.write_str(&format!("Failed to parse {}: {}", path, message)) + } + } + } +} + #[derive(Parser, Debug)] #[clap(version, about, long_about = None)] struct Args { - /// Run on live system - #[clap(short, long, default_value = "false")] - live: String, + /// Mode of operation + #[clap(short, long)] + mode: Mode, - /// Path to logarchive formatted directory - #[clap(short, long, default_value = "")] - input: String, + /// Path to logarchive formatted directory (log-archive mode) or tracev3 file (single-file + /// mode) + #[clap(short, long)] + input: Option, /// Path to output file. Any directories must already exist - #[clap(short, long, default_value = "")] - output: String, + #[clap(short, long)] + output: Option, + + /// Output format. Options: csv, jsonl. Default is jsonl. + #[clap(short, long, default_value = Format::Jsonl)] + format: Format, + + /// Append to output file + /// If false, will overwrite output file + #[clap(short, long, default_value = "false")] + append: bool, +} + +#[derive(Parser, Debug, Clone, ValueEnum)] +enum Mode { + Live, + LogArchive, + SingleFile, +} + +#[derive(Parser, Debug, Clone, ValueEnum)] +enum Format { + Csv, + Jsonl, +} + +impl From for builder::OsStr { + fn from(value: Format) -> Self { + match value { + Format::Csv => "csv".into(), + Format::Jsonl => "jsonl".into(), + } + } +} + +impl From for &str { + fn from(value: Format) -> Self { + match value { + Format::Csv => "csv", + Format::Jsonl => "jsonl", + } + } } fn main() { @@ -48,34 +110,77 @@ fn main() { .expect("Failed to initialize simple logger"); let args = Args::parse(); - let mut writer = construct_writer(&args.output).unwrap(); - // Create headers for CSV file - output_header(&mut writer).unwrap(); - - if args.input != "" { - parse_log_archive(&args.input, &mut writer); - } else if args.live != "false" { - parse_live_system(&mut writer); + let output_format = args.format; + + let handle: Box = if let Some(path) = args.output { + Box::new( + fs::OpenOptions::new() + .append(true) + .create(true) + .open(path) + .unwrap(), + ) + } else { + Box::new(std::io::stdout()) + }; + + let mut writer = OutputWriter::new(Box::new(handle), output_format.into()).unwrap(); + + match (args.mode, args.input) { + (Mode::Live, None) => { + parse_live_system(&mut writer); + } + (Mode::LogArchive, Some(path)) => { + parse_log_archive(&path, &mut writer); + } + (Mode::SingleFile, Some(path)) => { + parse_single_file(&path, &mut writer); + } + _ => { + eprintln!("log-archive and single-file modes require an --input argument"); + } + } +} + +fn parse_single_file(path: &Path, writer: &mut OutputWriter) { + let results = match fs::File::open(path) + .map_err(|e| RuntimeError::FileOpen { + path: path.to_string_lossy().to_string(), + message: e.to_string(), + }) + .and_then(|mut reader| { + parse_log(&mut reader).map_err(|err| RuntimeError::FileParse { + path: path.to_string_lossy().to_string(), + message: format!("{}", err), + }) + }) + .and_then(|ref log| { + let (results, _) = build_log(log, &[], &[], &[], false); + Ok(results) + }) { + Ok(reader) => reader, + Err(e) => { + eprintln!("Failed to parse {:?}: {}", path, e); + return; + } + }; + for row in results { + if let Err(e) = writer.write_record(&row) { + eprintln!("Error writing record: {}", e); + }; } } // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut Writer>) { - let mut archive_path = PathBuf::from(path); +fn parse_log_archive(path: &Path, writer: &mut OutputWriter) { + let provider = LogarchiveProvider::new(path); // Parse all UUID files which contain strings and other metadata - let string_results = collect_strings(&archive_path.display().to_string()).unwrap(); - - archive_path.push("dsc"); + let string_results = collect_strings(&provider).unwrap(); // Parse UUID cache files which also contain strings and other metadata - let shared_strings_results = - collect_shared_strings(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); - - archive_path.push("timesync"); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); // Parse all timesync files - let timesync_data = collect_timesync(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); + let timesync_data = collect_timesync(&provider).unwrap(); // Keep UUID, UUID cache, timesync files in memory while we parse all tracev3 files // Allows for faster lookups @@ -83,7 +188,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { &string_results, &shared_strings_results, ×ync_data, - path, + &provider, writer, ); @@ -91,30 +196,24 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { } // Parse a live macOS system -fn parse_live_system(writer: &mut Writer>) { - let strings = collect_strings_system().unwrap(); - let shared_strings = collect_shared_strings_system().unwrap(); - let timesync_data = collect_timesync_system().unwrap(); +fn parse_live_system(writer: &mut OutputWriter) { + let provider = LiveSystemProvider::default(); + let strings = collect_strings(&provider).unwrap(); + let shared_strings = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); - parse_trace_file( - &strings, - &shared_strings, - ×ync_data, - "/private/var/db/diagnostics", - writer - ); + parse_trace_file(&strings, &shared_strings, ×ync_data, &provider, writer); eprintln!("\nFinished parsing Unified Log data."); } // Use the provided strings, shared strings, timesync data to parse the Unified Log data at provided path. -// Currently, expect the path to follow macOS log collect structure fn parse_trace_file( string_results: &[UUIDText], shared_strings_results: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], - path: &str, - writer: &mut Writer> + provider: &dyn FileProvider, + writer: &mut OutputWriter, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -125,201 +224,31 @@ fn parse_trace_file( oversize: Vec::new(), }; - // Exclude missing data from returned output. Keep separate until we parse all oversize entries. - // Then at end, go through all missing data and check all parsed oversize entries again - let mut exclude_missing = true; let mut missing_data: Vec = Vec::new(); - let mut archive_path = PathBuf::from(path); - archive_path.push("Persist"); - + // Loop through all tracev3 files in Persist directory let mut log_count = 0; - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Persist directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - let log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - - // Get all constructed logs and any log data that failed to get constrcuted (exclude_missing = true) - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - // Track Oversize entries - oversize_strings - .oversize - .append(&mut log_data.oversize.to_owned()); - - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - output(&results, writer).unwrap(); - } - } - - archive_path.pop(); - archive_path.push("Special"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Special directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - - // Append our old Oversize entries in case these logs point to other Oversize entries the previous tracev3 files - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - // Track Oversize entries - oversize_strings.oversize = log_data.oversize; - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - - output(&results, writer).unwrap(); - } - } - - archive_path.pop(); - archive_path.push("Signpost"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Signpost directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - - // Append our old Oversize entries in case these logs point to other Oversize entries the previous tracev3 files - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - - // Signposts have not been seen with Oversize entries, but we track them in case a log entry refers to them - oversize_strings.oversize = log_data.oversize; - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - - output(&results, writer).unwrap(); - } - } - archive_path.pop(); - archive_path.push("HighVolume"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in HighVolume directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - eprintln!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - eprintln!("File {} no longer on disk", full_path); - continue; - }; - - // Append our old Oversize entries in case these logs point to other Oversize entries the previous tracev3 files - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - - // Track Oversize entries - oversize_strings.oversize = log_data.oversize; - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - - output(&results, writer).unwrap(); - } - } - archive_path.pop(); - - archive_path.push("logdata.LiveData.tracev3"); - - // Check if livedata exists. We only have it if 'log collect' was used - if archive_path.exists() { - eprintln!("Parsing: logdata.LiveData.tracev3"); - let mut log_data = parse_log(&archive_path.display().to_string()).unwrap(); - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, + for mut source in provider.tracev3_files() { + log_count += iterate_chunks( + source.reader(), + &mut missing_data, string_results, shared_strings_results, timesync_data, - exclude_missing, + writer, + &mut oversize_strings, ); - // Track missing data - missing_data.push(missing_logs); - log_count += results.len(); - - output(&results, writer).unwrap(); - // Track oversize entries - oversize_strings.oversize = log_data.oversize; - archive_path.pop(); + println!("count: {}", log_count); } - - exclude_missing = false; + let include_missing = false; println!("Oversize cache size: {}", oversize_strings.oversize.len()); - println!("Logs with missing oversize strings: {}", missing_data.len()); + println!("Logs with missing Oversize strings: {}", missing_data.len()); println!("Checking Oversize cache one more time..."); // Since we have all Oversize entries now. Go through any log entries that we were not able to build before for mut leftover_data in missing_data { // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize = oversize_strings.oversize.clone(); + leftover_data.oversize = oversize_strings.oversize.clone(); // Exclude_missing = false // If we fail to find any missing data its probably due to the logs rolling @@ -329,7 +258,7 @@ fn parse_trace_file( string_results, shared_strings_results, timesync_data, - exclude_missing, + include_missing, ); log_count += results.len(); @@ -338,66 +267,150 @@ fn parse_trace_file( eprintln!("Parsed {} log entries", log_count); } -fn construct_writer(output_path: &str) -> Result>, Box> { - let writer = if output_path != "" { - Box::new(OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?) as Box - } else { - Box::new(io::stdout()) as Box +fn iterate_chunks( + mut reader: impl Read, + missing: &mut Vec, + strings_data: &[UUIDText], + shared_strings: &[SharedCacheStrings], + timesync_data: &[TimesyncBoot], + writer: &mut OutputWriter, + oversize_strings: &mut UnifiedLogData, +) -> usize { + let mut buf = Vec::new(); + + if let Err(e) = reader.read_to_end(&mut buf) { + log::error!("Failed to read tracev3 file: {:?}", e); + return 0; + } + + let log_iterator = UnifiedLogIterator { + data: buf, + header: Vec::new(), }; - Ok(Writer::from_writer(writer)) + + // Exclude missing data from returned output. Keep separate until we parse all oversize entries. + // Then after parsing all logs, go through all missing data and check all parsed oversize entries again + let exclude_missing = true; + + let mut count = 0; + for mut chunk in log_iterator { + chunk.oversize.append(&mut oversize_strings.oversize); + let (results, missing_logs) = build_log( + &chunk, + strings_data, + shared_strings, + timesync_data, + exclude_missing, + ); + count += results.len(); + oversize_strings.oversize = chunk.oversize; + output(&results, writer).unwrap(); + if missing_logs.catalog_data.is_empty() + && missing_logs.header.is_empty() + && missing_logs.oversize.is_empty() + { + continue; + } + // Track possible missing log data due to oversize strings being in another file + missing.push(missing_logs); + } + + count } -// Create csv file and create headers -fn output_header(writer: &mut Writer>) -> Result<(), Box> { - writer.write_record(&[ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - writer.flush()?; - Ok(()) +pub struct OutputWriter { + writer: OutputWriterEnum, +} + +enum OutputWriterEnum { + Csv(Writer>), + Json(Box), +} + +impl OutputWriter { + pub fn new(writer: Box, output_format: &str) -> Result> { + let writer_enum = match output_format { + "csv" => { + let mut csv_writer = Writer::from_writer(writer); + // Write CSV headers + csv_writer.write_record([ + "Timestamp", + "Event Type", + "Log Type", + "Subsystem", + "Thread ID", + "PID", + "EUID", + "Library", + "Library UUID", + "Activity ID", + "Category", + "Process", + "Process UUID", + "Message", + "Raw Message", + "Boot UUID", + "System Timezone Name", + ])?; + csv_writer.flush()?; + OutputWriterEnum::Csv(csv_writer) + } + "jsonl" => OutputWriterEnum::Json(writer), + _ => { + eprintln!("Unsupported output format: {}", output_format); + std::process::exit(1); + } + }; + + Ok(OutputWriter { + writer: writer_enum, + }) + } + + pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => { + let date_time = Utc.timestamp_nanos(record.time as i64); + csv_writer.write_record(&[ + date_time.to_rfc3339_opts(SecondsFormat::Millis, true), + record.event_type.to_owned(), + record.log_type.to_owned(), + record.subsystem.to_owned(), + record.thread_id.to_string(), + record.pid.to_string(), + record.euid.to_string(), + record.library.to_owned(), + record.library_uuid.to_owned(), + record.activity_id.to_string(), + record.category.to_owned(), + record.process.to_owned(), + record.process_uuid.to_owned(), + record.message.to_owned(), + record.raw_message.to_owned(), + record.boot_uuid.to_owned(), + record.timezone_name.to_owned(), + ])?; + } + OutputWriterEnum::Json(json_writer) => { + writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, + OutputWriterEnum::Json(json_writer) => json_writer.flush()?, + } + Ok(()) + } } // Append or create csv file -fn output(results: &Vec, writer: &mut Writer>) -> Result<(), Box> { +fn output(results: &Vec, writer: &mut OutputWriter) -> Result<(), Box> { for data in results { - let date_time = Utc.timestamp_nanos(data.time as i64); - writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - data.event_type.to_owned(), - data.log_type.to_owned(), - data.subsystem.to_owned(), - data.thread_id.to_string(), - data.pid.to_string(), - data.euid.to_string(), - data.library.to_owned(), - data.library_uuid.to_owned(), - data.activity_id.to_string(), - data.category.to_owned(), - data.process.to_owned(), - data.process_uuid.to_owned(), - data.message.to_owned(), - data.raw_message.to_owned(), - data.boot_uuid.to_owned(), - data.timezone_name.to_owned(), - ])?; + writer.write_record(data)?; } writer.flush()?; Ok(()) diff --git a/examples/unifiedlog_parser_json/Cargo.toml b/examples/unifiedlog_parser_json/Cargo.toml deleted file mode 100644 index a309a52..0000000 --- a/examples/unifiedlog_parser_json/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "unifiedlog_parser_json" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -simplelog = "0.12.2" -serde_json = "1.0.122" -log = "0.4.22" -macos-unifiedlogs = {path = "../../"} -clap = {version = "4.5.15", features = ["derive"]} \ No newline at end of file diff --git a/examples/unifiedlog_parser_json/src/main.rs b/examples/unifiedlog_parser_json/src/main.rs deleted file mode 100644 index d5fb1b3..0000000 --- a/examples/unifiedlog_parser_json/src/main.rs +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2022 Mandiant, Inc. All Rights Reserved -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and limitations under the License. - -use log::LevelFilter; -use macos_unifiedlogs::dsc::SharedCacheStrings; -use macos_unifiedlogs::parser::{ - build_log, collect_shared_strings, collect_shared_strings_system, collect_strings, - collect_strings_system, collect_timesync, collect_timesync_system, parse_log, -}; -use macos_unifiedlogs::timesync::TimesyncBoot; -use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; -use macos_unifiedlogs::uuidtext::UUIDText; -use simplelog::{Config, SimpleLogger}; -use std::error::Error; -use std::fs; -use std::fs::OpenOptions; -use std::io::Write; -use std::path::PathBuf; - -use clap::Parser; - -#[derive(Parser, Debug)] -#[clap(version, about, long_about = None)] -struct Args { - /// Run on live system - #[clap(short, long, default_value = "false")] - live: String, - - /// Path to logarchive formatted directory - #[clap(short, long, default_value = "")] - input: String, - - /// Path to output directory. Any directories must already exist - #[clap(short, long, default_value = ".")] - output: String, -} - -fn main() { - println!("Starting Unified Log parser..."); - // Set logging level to warning - SimpleLogger::init(LevelFilter::Warn, Config::default()) - .expect("Failed to initialize simple logger"); - - let args = Args::parse(); - - if args.input != "" && args.output != "" { - parse_log_archive(&args.input); - } else if args.live != "false" { - parse_live_system(); - } -} - -// Parse a provided directory path. Currently expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str) { - let mut archive_path = PathBuf::from(path); - - // Parse all UUID files which contain strings and other metadata - let string_results = collect_strings(&archive_path.display().to_string()).unwrap(); - - archive_path.push("dsc"); - // Parse UUID cache files which also contain strings and other metadata - let shared_strings_results = - collect_shared_strings(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); - - archive_path.push("timesync"); - // Parse all timesync files - let timesync_data = collect_timesync(&archive_path.display().to_string()).unwrap(); - archive_path.pop(); - - // Keep UUID, UUID cache, timesync files in memory while we parse all tracev3 files - // Allows for faster lookups - parse_trace_file( - &string_results, - &shared_strings_results, - ×ync_data, - path, - ); - - println!("\nFinished parsing Unified Log data. Saved results to json files"); -} - -// Parse a live macOS system -fn parse_live_system() { - let strings = collect_strings_system().unwrap(); - let shared_strings = collect_shared_strings_system().unwrap(); - let timesync_data = collect_timesync_system().unwrap(); - - parse_trace_file( - &strings, - &shared_strings, - ×ync_data, - "/private/var/db/diagnostics", - ); - - println!("\nFinished parsing Unified Log data. Saved results to json files"); -} - -// Use the provided strings, shared strings, timesync data to parse the Unified Log data at provided path. -// Currently expect the path to follow macOS log collect structure -fn parse_trace_file( - string_results: &[UUIDText], - shared_strings_results: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], - path: &str, -) { - // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) - // Some log entries have Oversize strings located in different tracev3 files. - // This is very rare. Seen in ~20 log entries out of ~700,000. Seen in ~700 out of ~18 million - let mut oversize_strings = UnifiedLogData { - header: Vec::new(), - catalog_data: Vec::new(), - oversize: Vec::new(), - }; - - // Exclude missing data from returned output. Keep separate until we parse all oversize entries. - // Then at end, go through all missing data and check all parsed oversize entries again - let mut exclude_missing = true; - let mut missing_data: Vec = Vec::new(); - - let mut archive_path = PathBuf::from(path); - archive_path.push("Persist"); - - let mut log_count = 0; - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Persist directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - println!("Parsing: {}", full_path); - - let log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - println!("File {} no longer on disk", full_path); - continue; - }; - - // Get all constructed logs and any log data that failed to get constrcuted (exclude_missing = true) - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - // Take all Oversize entries and add to tracker - oversize_strings - .oversize - .append(&mut log_data.oversize.clone()); - - // Add log entries that failed to find strings to missing tracker - // We will try parsing them again at the end once we have all Oversize entries - missing_data.push(missing_logs); - log_count += results.len(); - output( - &results, - &format!("persist_{}", data.file_name().to_str().unwrap()), - ) - .unwrap(); - } - } - - archive_path.pop(); - archive_path.push("Special"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Special directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - println!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - println!("File {} no longer on disk", full_path); - continue; - }; - // Append all previously parsed Oversize entries from tracker to current parsed tracev3 file - log_data.oversize.append(&mut oversize_strings.oversize); - - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - // Take all Oversize entries and add to tracker - oversize_strings.oversize = log_data.oversize; - - // Add log entries that failed to find strings to missing tracker - // We will try parsing them again at the end once we have all Oversize entries - missing_data.push(missing_logs); - log_count += results.len(); - - output( - &results, - &format!("special_{}", data.file_name().to_str().unwrap()), - ) - .unwrap(); - } - } - - archive_path.pop(); - archive_path.push("Signpost"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in Signpost directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - println!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - println!("File {} no longer on disk", full_path); - continue; - }; - - // Append our old Oversize entries in case these logs point to other Oversize entries the previous tracev3 files - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - - // Signposts have not been seen with Oversize entries, but we track them in case a log entry refers to them - oversize_strings.oversize = log_data.oversize; - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - - output( - &results, - &format!("signpost_{}", data.file_name().to_str().unwrap()), - ) - .unwrap(); - } - } - archive_path.pop(); - archive_path.push("HighVolume"); - - if archive_path.exists() { - let paths = fs::read_dir(&archive_path).unwrap(); - - // Loop through all tracev3 files in HighVolume directory - for log_path in paths { - let data = log_path.unwrap(); - let full_path = data.path().display().to_string(); - println!("Parsing: {}", full_path); - - let mut log_data = if data.path().exists() { - parse_log(&full_path).unwrap() - } else { - println!("File {} no longer on disk", full_path); - continue; - }; - - // Append our old Oversize entries in case these logs point to other Oversize entries the previous tracev3 files - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - - // Track Oversize entries - oversize_strings.oversize = log_data.oversize; - // Track missing logs - missing_data.push(missing_logs); - log_count += results.len(); - - output( - &results, - &format!("highvolume_{}", data.file_name().to_str().unwrap()), - ) - .unwrap(); - } - } - archive_path.pop(); - - archive_path.push("logdata.LiveData.tracev3"); - - // Check if livedata exists. We only have it if 'log collect' was used - if archive_path.exists() { - println!("Parsing: logdata.LiveData.tracev3"); - let mut log_data = parse_log(&archive_path.display().to_string()).unwrap(); - log_data.oversize.append(&mut oversize_strings.oversize); - let (results, missing_logs) = build_log( - &log_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - missing_data.push(missing_logs); - log_count += results.len(); - - output(&results, "liveData").unwrap(); - oversize_strings.oversize = log_data.oversize; - archive_path.pop(); - } - - // Include all log entries now, if any logs are missing data its because the data has rolled - exclude_missing = false; - for mut leftover_data in missing_data { - // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize - .append(&mut oversize_strings.oversize.clone()); - - // Exclude_missing = false - // If we fail to find any missing data its probably due to the logs rolling - // Ex: tracev3A rolls, tracev3B references Oversize entry in tracev3A will trigger missing data since tracev3A is gone - let (results, _) = build_log( - &leftover_data, - string_results, - shared_strings_results, - timesync_data, - exclude_missing, - ); - log_count += results.len(); - - output(&results, "dataFoundInMultipleLogFiles").unwrap(); - } - println!("Parsed {} log entries", log_count); -} - -// Create JSON files in JSONL format -fn output(results: &Vec, output_name: &str) -> Result<(), Box> { - let args = Args::parse(); - let mut json_file = OpenOptions::new() - .append(true) - .create(true) - .open(format!("{}/{}.json", args.output, output_name))?; - - for log_data in results.iter() { - let serde_data = serde_json::to_string(log_data)?; - json_file.write_all(serde_data.as_bytes())?; - json_file.write_all(b"\n")?; // Add a newline after each JSON entry - } - - Ok(()) -} diff --git a/src/catalog.rs b/src/catalog.rs index 314f540..9bd8196 100755 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -634,7 +634,7 @@ mod tests { assert_eq!(subsystems.subsystem_offset, 0); assert_eq!(subsystems.category_offset, 19); - let (_, subsystems) = CatalogChunk::parse_process_info_subystem(&data).unwrap(); + let (_, subsystems) = CatalogChunk::parse_process_info_subystem(data).unwrap(); assert_eq!(subsystems.identifer, 78); assert_eq!(subsystems.subsystem_offset, 0); assert_eq!(subsystems.category_offset, 47); @@ -667,7 +667,7 @@ mod tests { assert_eq!(subchunk.number_string_offsets, 3); assert_eq!(subchunk.string_offsets, [0, 19, 47]); - let (data, subchunk) = CatalogChunk::parse_catalog_subchunk(&data).unwrap(); + let (data, subchunk) = CatalogChunk::parse_catalog_subchunk(data).unwrap(); assert_eq!(subchunk.start, 820274802743600); assert_eq!(subchunk.end, 820313668399715); assert_eq!(subchunk.uncompressed_size, 61552); @@ -677,7 +677,7 @@ mod tests { assert_eq!(subchunk.number_string_offsets, 3); assert_eq!(subchunk.string_offsets, [0, 19, 47]); - let (_, subchunk) = CatalogChunk::parse_catalog_subchunk(&data).unwrap(); + let (_, subchunk) = CatalogChunk::parse_catalog_subchunk(data).unwrap(); assert_eq!(subchunk.start, 820313685231257); assert_eq!(subchunk.end, 820374429029888); assert_eq!(subchunk.uncompressed_size, 65536); diff --git a/src/chunks/firehose/activity.rs b/src/chunks/firehose/activity.rs index 2522b7a..f71d11b 100755 --- a/src/chunks/firehose/activity.rs +++ b/src/chunks/firehose/activity.rs @@ -223,6 +223,7 @@ impl FirehoseActivity { #[cfg(test)] mod tests { use super::FirehoseActivity; + use crate::filesystem::LogarchiveProvider; use crate::parser::{collect_shared_strings, collect_strings, parse_log}; use std::path::PathBuf; @@ -260,15 +261,16 @@ mod tests { fn test_get_firehose_activity_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.pop(); test_path.push("Persist/0000000000000004.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let activity_type = 0x2; @@ -280,7 +282,7 @@ mod tests { &firehose.firehose_activity, &string_results, &shared_strings_results, - firehose.format_string_location as u64, + u64::from(firehose.format_string_location), &preamble.first_number_proc_id, &preamble.second_number_proc_id, &catalog_data.catalog, diff --git a/src/chunks/firehose/firehose_log.rs b/src/chunks/firehose/firehose_log.rs index b33462c..d38100e 100755 --- a/src/chunks/firehose/firehose_log.rs +++ b/src/chunks/firehose/firehose_log.rs @@ -2747,8 +2747,8 @@ mod tests { assert_eq!(firehose.base_continous_time, 4197166166425); let mut firehouse_result_count = firehose.public_data.len(); - while data.len() != 0 { - let (test_data, firehose) = FirehosePreamble::parse_firehose_preamble(&data).unwrap(); + while !data.is_empty() { + let (test_data, firehose) = FirehosePreamble::parse_firehose_preamble(data).unwrap(); data = test_data; firehouse_result_count += firehose.public_data.len(); } @@ -2786,10 +2786,7 @@ mod tests { assert_eq!(firehose.firehose_non_activity.private_strings_offset, 0); assert_eq!(firehose.firehose_non_activity.private_strings_size, 0); assert_eq!(firehose.firehose_non_activity.unknown_message_string_ref, 0); - assert_eq!( - firehose.firehose_non_activity.firehose_formatters.main_exe, - false - ); + assert!(!firehose.firehose_non_activity.firehose_formatters.main_exe); assert_eq!(firehose.firehose_non_activity.subsystem_value, 14); assert_eq!(firehose.firehose_non_activity.ttl_value, 0); @@ -2979,19 +2976,17 @@ mod tests { 14968 ); - assert_eq!( + assert!( firehose.public_data[0] .firehose_non_activity .firehose_formatters - .main_exe, - true + .main_exe ); - assert_eq!( - firehose.public_data[0] + assert!( + !firehose.public_data[0] .firehose_non_activity .firehose_formatters - .shared_cache, - false + .shared_cache ); assert_eq!( firehose.public_data[0] @@ -3007,12 +3002,11 @@ mod tests { .large_shared_cache, 0 ); - assert_eq!( - firehose.public_data[0] + assert!( + !firehose.public_data[0] .firehose_non_activity .firehose_formatters - .absolute, - false + .absolute ); assert_eq!( firehose.public_data[0] @@ -3021,19 +3015,17 @@ mod tests { .uuid_relative, String::new() ); - assert_eq!( - firehose.public_data[0] + assert!( + !firehose.public_data[0] .firehose_non_activity .firehose_formatters - .main_plugin, - false + .main_plugin ); - assert_eq!( - firehose.public_data[0] + assert!( + !firehose.public_data[0] .firehose_non_activity .firehose_formatters - .pc_style, - false + .pc_style ); assert_eq!( firehose.public_data[0] @@ -3396,8 +3388,8 @@ mod tests { assert_eq!(firehose.base_continous_time, 0); let mut firehouse_result_count = firehose.public_data.len(); - while data.len() != 0 { - let (test_data, firehose) = FirehosePreamble::parse_firehose_preamble(&data).unwrap(); + while !data.is_empty() { + let (test_data, firehose) = FirehosePreamble::parse_firehose_preamble(data).unwrap(); data = test_data; firehouse_result_count += firehose.public_data.len(); } diff --git a/src/chunks/firehose/flags.rs b/src/chunks/firehose/flags.rs index e128950..db7945e 100644 --- a/src/chunks/firehose/flags.rs +++ b/src/chunks/firehose/flags.rs @@ -154,7 +154,7 @@ mod tests { let test_flags = 514; let (_, results) = FirehoseFormatters::firehose_formatter_flags(&test_data, &test_flags).unwrap(); - assert_eq!(results.main_exe, true); + assert!(results.main_exe); } #[test] @@ -167,7 +167,7 @@ mod tests { let test_flags = 516; let (_, results) = FirehoseFormatters::firehose_formatter_flags(&test_data, &test_flags).unwrap(); - assert_eq!(results.shared_cache, true); + assert!(results.shared_cache); } #[test] @@ -183,7 +183,7 @@ mod tests { let test_flags = 8; let (_, results) = FirehoseFormatters::firehose_formatter_flags(&test_data, &test_flags).unwrap(); - assert_eq!(results.absolute, true); + assert!(results.absolute); assert_eq!(results.main_exe_alt_index, 65408); } diff --git a/src/chunks/firehose/message.rs b/src/chunks/firehose/message.rs index 623ad2f..7b84355 100644 --- a/src/chunks/firehose/message.rs +++ b/src/chunks/firehose/message.rs @@ -622,6 +622,7 @@ mod tests { use crate::{ chunks::firehose::message::MessageData, + filesystem::LogarchiveProvider, parser::{collect_shared_strings, collect_strings, parse_log}, }; @@ -629,15 +630,16 @@ mod tests { fn test_extract_shared_strings_nonactivity() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.pop(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 1331408102; let test_first_proc_id = 45; @@ -669,15 +671,16 @@ mod tests { fn test_extract_shared_strings_nonactivity_bad_offset() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.pop(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let bad_offset = 7; let test_first_proc_id = 45; @@ -706,15 +709,14 @@ mod tests { fn test_extract_shared_strings_nonactivity_dynamic() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 2420246585; let test_first_proc_id = 32; @@ -746,10 +748,12 @@ mod tests { fn test_extract_format_strings_nonactivity() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 14960; let test_first_proc_id = 45; @@ -777,10 +781,13 @@ mod tests { fn test_extract_format_strings_nonactivity_bad_offset() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let bad_offset = 1; let test_first_proc_id = 45; @@ -807,10 +814,12 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 2147519968; let test_first_proc_id = 38; @@ -839,10 +848,12 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let bad_offset = 55; let test_first_proc_id = 38; @@ -868,10 +879,14 @@ mod tests { fn test_extract_absolute_strings_nonactivity() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 396912; let test_absolute_offset = 280925241119206; @@ -898,10 +913,13 @@ mod tests { fn test_extract_absolute_strings_nonactivity_bad_offset() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 396912; let bad_offset = 12; @@ -929,10 +947,12 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_offset = 102; let test_absolute_offset = 102; @@ -963,10 +983,12 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let bad_offset = 111; let test_absolute_offset = 102; @@ -999,9 +1021,13 @@ mod tests { fn test_extract_alt_uuid_strings() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); + test_path.push("Persist/0000000000000005.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let first_proc_id = 105; let second_proc_id = 240; @@ -1034,7 +1060,8 @@ mod tests { test_path.push("tests/test_data/system_logs_big_sur.logarchive"); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let test_first_proc_id = 136; let test_second_proc_id = 342; @@ -1052,11 +1079,12 @@ mod tests { fn test_get_uuid_image_path() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let strings = collect_strings(&provider).unwrap(); let test_uuid = "B736DF1625F538248E9527A8CEC4991E"; - let (_, image_path) = MessageData::get_uuid_image_path(&test_uuid, &strings).unwrap(); + let (_, image_path) = MessageData::get_uuid_image_path(test_uuid, &strings).unwrap(); assert_eq!(image_path, "/usr/libexec/opendirectoryd"); } diff --git a/src/chunks/firehose/nonactivity.rs b/src/chunks/firehose/nonactivity.rs index 4108f08..2402888 100755 --- a/src/chunks/firehose/nonactivity.rs +++ b/src/chunks/firehose/nonactivity.rs @@ -233,7 +233,10 @@ impl FirehoseNonActivity { #[cfg(test)] mod tests { use super::FirehoseNonActivity; - use crate::parser::{collect_shared_strings, collect_strings, parse_log}; + use crate::{ + filesystem::LogarchiveProvider, + parser::{collect_shared_strings, collect_strings, parse_log}, + }; use std::path::PathBuf; #[test] @@ -260,8 +263,8 @@ mod tests { nonactivity_results.firehose_formatters.uuid_relative, String::from("") ); - assert_eq!(nonactivity_results.firehose_formatters.main_exe, false); - assert_eq!(nonactivity_results.firehose_formatters.absolute, false); + assert!(!nonactivity_results.firehose_formatters.main_exe); + assert!(!nonactivity_results.firehose_formatters.absolute); assert_eq!(nonactivity_results.subsystem_value, 41); assert_eq!(nonactivity_results.ttl_value, 0); assert_eq!(nonactivity_results.data_ref_value, 0); @@ -277,15 +280,15 @@ mod tests { fn test_get_firehose_non_activity_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.push("Persist/0000000000000004.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(&test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let activity_type = 0x4; @@ -298,7 +301,7 @@ mod tests { &firehose.firehose_non_activity, &string_results, &shared_strings_results, - firehose.format_string_location as u64, + u64::from(firehose.format_string_location), &preamble.first_number_proc_id, &preamble.second_number_proc_id, &catalog_data.catalog, diff --git a/src/chunks/firehose/signpost.rs b/src/chunks/firehose/signpost.rs index 403ed2a..b6ba466 100755 --- a/src/chunks/firehose/signpost.rs +++ b/src/chunks/firehose/signpost.rs @@ -251,6 +251,7 @@ impl FirehoseSignpost { #[cfg(test)] mod tests { use crate::chunks::firehose::signpost::FirehoseSignpost; + use crate::filesystem::LogarchiveProvider; use crate::parser::{collect_shared_strings, collect_strings, parse_log}; use std::path::PathBuf; @@ -270,14 +271,14 @@ mod tests { assert_eq!(results.ttl_value, 0); assert_eq!(results.data_ref_value, 0); - assert_eq!(results.firehose_formatters.main_exe, true); - assert_eq!(results.firehose_formatters.shared_cache, false); + assert!(results.firehose_formatters.main_exe); + assert!(!results.firehose_formatters.shared_cache); assert_eq!(results.firehose_formatters.has_large_offset, 0); assert_eq!(results.firehose_formatters.large_shared_cache, 0); - assert_eq!(results.firehose_formatters.absolute, false); + assert!(!results.firehose_formatters.absolute); assert_eq!(results.firehose_formatters.uuid_relative, String::new()); - assert_eq!(results.firehose_formatters.main_plugin, false); - assert_eq!(results.firehose_formatters.pc_style, false); + assert!(!results.firehose_formatters.main_plugin); + assert!(!results.firehose_formatters.pc_style); assert_eq!(results.firehose_formatters.main_exe_alt_index, 0); } @@ -285,15 +286,15 @@ mod tests { fn test_get_firehose_signpost_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); test_path.push("Signpost/0000000000000001.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + + let handle = std::fs::File::open(&test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let activity_type = 0x6; @@ -305,7 +306,7 @@ mod tests { &firehose.firehose_signpost, &string_results, &shared_strings_results, - firehose.format_string_location as u64, + u64::from(firehose.format_string_location), &preamble.first_number_proc_id, &preamble.second_number_proc_id, &catalog_data.catalog, diff --git a/src/chunks/firehose/trace.rs b/src/chunks/firehose/trace.rs index 0fef533..5bc5789 100755 --- a/src/chunks/firehose/trace.rs +++ b/src/chunks/firehose/trace.rs @@ -158,6 +158,7 @@ mod tests { use crate::{ chunks::firehose::trace::FirehoseTrace, + filesystem::LogarchiveProvider, parser::{collect_strings, parse_log}, }; @@ -204,10 +205,14 @@ mod tests { fn test_get_firehose_trace_strings() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + + let string_results = collect_strings(&provider).unwrap(); test_path.push("logdata.LiveData.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + + let log_data = parse_log(handle).unwrap(); let activity_type = 0x3; @@ -217,7 +222,7 @@ mod tests { if firehose.unknown_log_activity_type == activity_type { let (_, message_data) = FirehoseTrace::get_firehose_trace_strings( &string_results, - firehose.format_string_location as u64, + u64::from(firehose.format_string_location), &preamble.first_number_proc_id, &preamble.second_number_proc_id, &catalog_data.catalog, diff --git a/src/decoders/dns.rs b/src/decoders/dns.rs index 8d70d5e..348dc63 100644 --- a/src/decoders/dns.rs +++ b/src/decoders/dns.rs @@ -635,7 +635,7 @@ mod tests { fn test_get_service_binding() { let test_data = "AAEAAAEAAwJoMgAEAAhoEJRAaBCVQAAGACAmBkcAAAAAAAAAAABoEJRAJgZHAAAAAAAAAAAAaBCVQA=="; - let result = get_service_binding(&test_data).unwrap(); + let result = get_service_binding(test_data).unwrap(); assert_eq!(result, "rdata: 1 . alpn=h2, ipv4 hint:104.16.148.64,104.16.149.64, ipv6 hint:2606:4700::6810:9440,2606:4700::6810:9540"); } diff --git a/src/filesystem.rs b/src/filesystem.rs new file mode 100644 index 0000000..cf5594f --- /dev/null +++ b/src/filesystem.rs @@ -0,0 +1,249 @@ +use crate::traits::{FileProvider, SourceFile}; +use std::fs::File; +use std::path::{Component, Path, PathBuf}; +use walkdir::WalkDir; + +pub struct LocalFile { + reader: File, + source: String, +} + +impl LocalFile { + fn new(path: &Path) -> std::io::Result { + Ok(Self { + reader: File::open(path)?, + source: path.as_os_str().to_string_lossy().to_string(), + }) + } +} + +impl SourceFile for LocalFile { + fn reader(&mut self) -> Box<&mut dyn std::io::Read> { + Box::new(&mut self.reader) + } + + fn source_path(&self) -> &str { + self.source.as_str() + } +} + +/// Provides an implementation of [`FileProvider`] that enumerates the +/// required files at the correct paths on a live macOS system. These files are only present on +/// macOS Sierra (10.12) and above. The implemented methods emit error log messages if any are +/// encountered while enumerating files or creating readers, but are otherwise infallible. +#[derive(Default, Clone, Debug)] +pub struct LiveSystemProvider {} + +static TRACE_FOLDERS: &[&str] = &["HighVolume", "Special", "Signpost", "Persist"]; + +#[derive(Debug, PartialEq)] +pub enum LogFileType { + TraceV3, + UUIDText, + Dsc, + Timesync, + Invalid, +} + +fn only_hex_chars(val: &str) -> bool { + val.chars().all(|c| c.is_ascii_hexdigit()) +} + +impl From<&Path> for LogFileType { + fn from(path: &Path) -> Self { + let components = path.components().collect::>>(); + let n = components.len(); + + if let (Some(&Component::Normal(parent)), Some(&Component::Normal(filename))) = + (components.get(n - 2), components.get(n - 1)) + { + let parent_s = parent.to_str().unwrap_or_default(); + let filename_s = filename.to_str().unwrap_or_default(); + + if filename_s == "logdata.LiveData.tracev3" + || (filename_s.ends_with(".tracev3") && TRACE_FOLDERS.contains(&parent_s)) + { + return Self::TraceV3; + } + + if filename_s.len() == 30 + && only_hex_chars(filename_s) + && parent_s.len() == 2 + && only_hex_chars(parent_s) + { + return Self::UUIDText; + } + + if filename_s.len() == 32 && only_hex_chars(filename_s) && parent_s == "dsc" { + return Self::Dsc; + } + + if filename_s.ends_with(".timesync") && parent_s == "timesync" { + return Self::Timesync; + } + } + + Self::Invalid + } +} + +impl FileProvider for LiveSystemProvider { + fn tracev3_files(&self) -> Box>> { + let path = PathBuf::from("/private/var/db/diagnostics"); + Box::new( + WalkDir::new(path) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::TraceV3)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } + + fn uuidtext_files(&self) -> Box>> { + let path = PathBuf::from("/private/var/db/uuidtext"); + Box::new( + WalkDir::new(path) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::UUIDText)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } + + fn dsc_files(&self) -> Box>> { + let path = PathBuf::from("/private/var/db/uuidtext/dsc"); + Box::new(WalkDir::new(path).into_iter().filter_map(|entry| { + if !matches!( + LogFileType::from(entry.as_ref().ok()?.path()), + LogFileType::Dsc + ) { + return None; + } + Some(Box::new(LocalFile::new(entry.ok()?.path()).ok()?) as Box) + })) + } + + fn timesync_files(&self) -> Box>> { + let path = PathBuf::from("/private/var/db/diagnostics/timesync"); + Box::new( + WalkDir::new(path) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::Timesync)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } +} + +pub struct LogarchiveProvider { + base: PathBuf, +} + +impl LogarchiveProvider { + pub fn new(path: &Path) -> Self { + Self { + base: path.to_path_buf(), + } + } +} + +impl FileProvider for LogarchiveProvider { + fn tracev3_files(&self) -> Box>> { + Box::new( + WalkDir::new(&self.base) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::TraceV3)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } + + fn uuidtext_files(&self) -> Box>> { + Box::new( + WalkDir::new(&self.base) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::UUIDText)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } + + fn dsc_files(&self) -> Box>> { + Box::new( + WalkDir::new(&self.base) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::Dsc)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } + + fn timesync_files(&self) -> Box>> { + Box::new( + WalkDir::new(&self.base) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| matches!(LogFileType::from(entry.path()), LogFileType::Timesync)) + .filter_map(|entry| { + Some(Box::new(LocalFile::new(entry.path()).ok()?) as Box) + }), + ) + } +} + +#[cfg(test)] +mod tests { + use super::LogFileType; + use std::path::PathBuf; + + #[test] + fn test_only_hex() { + use super::only_hex_chars; + + let cases = vec![ + "A7563E1D7A043ED29587044987205172", + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD", + ]; + + for case in cases { + assert!(only_hex_chars(case)); + } + } + + #[test] + fn test_validate_uuidtext_path() { + let valid_cases = vec![ + "/private/var/db/uuidtext/dsc/A7563E1D7A043ED29587044987205172", + "/private/var/db/uuidtext/dsc/DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD", + "./dsc/A7563E1D7A043ED29587044987B05172", + ]; + + for case in valid_cases { + let path = PathBuf::from(case); + println!("{:?}", path.components()); + let file_type = LogFileType::from(path.as_path()); + assert_eq!(file_type, LogFileType::Dsc); + } + } + + #[test] + fn test_validate_dsc_path() {} + + #[test] + fn test_validate_timesync_path() {} + + #[test] + fn test_validate_tracev3_path() {} +} diff --git a/src/iterator.rs b/src/iterator.rs index 93c02e8..edf8e09 100644 --- a/src/iterator.rs +++ b/src/iterator.rs @@ -125,6 +125,7 @@ fn nom_bytes<'a>(data: &'a [u8], size: &u64) -> nom::IResult<&'a [u8], &'a [u8]> mod tests { use super::UnifiedLogIterator; use crate::{ + filesystem::LogarchiveProvider, iterator::nom_bytes, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync}, }; @@ -170,16 +171,11 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); let buffer_results = fs::read(test_path.to_str().unwrap()).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index c22603d..a594a7c 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,12 +38,14 @@ mod chunkset; mod decoders; pub mod dsc; mod error; +pub mod filesystem; mod header; pub mod iterator; mod message; pub mod parser; mod preamble; pub mod timesync; +pub mod traits; pub mod unified_log; mod util; pub mod uuidtext; diff --git a/src/message.rs b/src/message.rs index 612f2db..ed5fdce 100755 --- a/src/message.rs +++ b/src/message.rs @@ -1366,7 +1366,7 @@ mod tests { test_format, test_width, test_precision, - &test_type, + test_type, plus_minus, hashtag, ); @@ -1385,7 +1385,7 @@ mod tests { test_format, test_width, test_precision, - &test_type, + test_type, plus_minus, hashtag, ); diff --git a/src/parser.rs b/src/parser.rs index dbfdafa..67effeb 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -10,45 +10,26 @@ use log::{error, info}; use crate::dsc::SharedCacheStrings; use crate::error::ParserError; use crate::timesync::TimesyncBoot; +use crate::traits::FileProvider; use crate::unified_log::{LogData, UnifiedLogData}; use crate::uuidtext::UUIDText; -use std::fs; - -/// Parse the UUID files on a live system -pub fn collect_strings_system() -> Result, ParserError> { - let uuidtext_path = String::from("/private/var/db/uuidtext"); - collect_strings(&uuidtext_path) -} - -/// Parse the dsc (shared cache strings) files on a live system -pub fn collect_shared_strings_system() -> Result, ParserError> { - let dsc_path = String::from("/private/var/db/uuidtext/dsc"); - collect_shared_strings(&dsc_path) -} - -/// Parse the timesync files on a live system -pub fn collect_timesync_system() -> Result, ParserError> { - let timesync = String::from("/private/var/db/diagnostics/timesync"); - collect_timesync(×ync) -} +use std::io::Read; +use std::path::PathBuf; /// Parse a tracev3 file and return the deconstructed log data -pub fn parse_log(full_path: &str) -> Result { - let buffer_results = fs::read(full_path); +pub fn parse_log(mut reader: impl Read) -> Result { + let mut buf = Vec::new(); + if let Err(e) = reader.read_to_end(&mut buf) { + error!( + "[macos-unifiedlogs] Failed to read the tracev3 file: {:?}", + e + ); + return Err(ParserError::Read); + } - let buffer = match buffer_results { - Ok(results) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read the tracev3 file {}: {:?}", - full_path, err - ); - return Err(ParserError::Read); - } - }; - info!("Read {} bytes for file {}", buffer.len(), full_path); + info!("Read {} bytes from tracev3 file", buf.len()); - let log_data_results = LogData::parse_unified_log(&buffer); + let log_data_results = LogData::parse_unified_log(&buf); match log_data_results { Ok((_, log_data)) => Ok(log_data), Err(err) => { @@ -97,255 +78,91 @@ pub fn build_log( } /// Parse all UUID files in provided directory. The directory should follow the same layout as the live system (ex: path/to/files//) -pub fn collect_strings(path: &str) -> Result, ParserError> { - let paths = fs::read_dir(path).map_err(|err| { - error!("[macos-unifiedlogs] Failed to read directory path: {err:?}"); - ParserError::Dir - })?; - - let entries = paths - .flat_map(|path| { - path.map_err( - |err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",), - ) - .ok() - }) - .collect::>(); - - let mut uuidtext_vec: Vec = Vec::with_capacity(entries.len()); +pub fn collect_strings(provider: &dyn FileProvider) -> Result, ParserError> { + let mut uuidtext_vec: Vec = Vec::new(); // Start process to read a directory containing subdirectories that contain the uuidtext files - for dir_entry in entries { - let type_results = dir_entry.file_type(); - let entry_type = match type_results { - Ok(dir_type) => dir_type, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to get directory entry type: {:?}", - err - ); - continue; - } - }; - - if entry_type.is_file() { + for mut source in provider.uuidtext_files() { + let mut buf = Vec::new(); + let path = source.source_path().to_owned(); + if let Err(e) = source.reader().read_to_end(&mut buf) { + error!( + "[macos-unifiedlogs] Failed to read uuidfile {}: {:?}", + path, e + ); continue; - } - - let directory_results = dir_entry.file_name().into_string(); - let directory = match directory_results { - Ok(directory_path) => directory_path, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to convert path {:?} to string", - err - ); - continue; - } }; - // Currently expect the subdirectories to be structured like a live system (or .logarchive) - // they should be /private/var/db/uuidtext/<2 char values>/ (/private/var/db/uuidtext/1F/470CAE74D83AA1A6637FD0C5B1D365) - let first_two_uuid_chars = 2; - if directory.len() != first_two_uuid_chars { - continue; - } + info!("Read {} bytes for file {}", buf.len(), path); - let dir_path = dir_entry.path(); - let uuidtext_path_results = fs::read_dir(dir_path); - let uuidtext_path = match uuidtext_path_results { - Ok(uuid_path) => uuid_path, + let uuid_results = UUIDText::parse_uuidtext(&buf); + let mut uuidtext_data = match uuid_results { + Ok((_, results)) => results, Err(err) => { error!( - "[macos-unifiedlogs] Failed to read directory path for UUID files: {:?}", - err + "[macos-unifiedlogs] Failed to parse UUID file {}: {:?}", + path, err ); continue; } }; - // Read all uuidtext files in directory - for uuid_data in uuidtext_path { - let uuidtext_full_path = match uuid_data { - Ok(uuid_entry) => uuid_entry, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to get directory uuid entry: {:?}", - err - ); - continue; - } - }; - - let full_path = uuidtext_full_path.path(); - let buffer_results = fs::read(&full_path); - let buffer = match buffer_results { - Ok(results) => results, - Err(err) => { - error!("[macos-unifiedlogs] Failed to read UUID file: {:?}", err); - continue; - } - }; - info!( - "Read {} bytes for file {}", - buffer.len(), - full_path.display().to_string() - ); - - let uuid_results = UUIDText::parse_uuidtext(&buffer); - let mut uuidtext_data = match uuid_results { - Ok((_, results)) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to parse UUID file {}: {:?}", - full_path.display().to_string(), - err - ); - continue; - } - }; - - // Track the uuidtext filename, this will be referenced by log entries via the Catalog (or log entry) - let uuid_file_name = uuidtext_full_path.file_name().into_string(); - match uuid_file_name { - // Only the last 14 characters of the UUID name are saved here. Limited chance of UUID collisions on a real system - Ok(uuid_file_string) => uuidtext_data.uuid = uuid_file_string, - Err(err) => { - error!("[macos-unifiedlogs] Failed to convert UUID filename {:?} to string. Unable to do base format string lookups", err); - continue; - } - } - - uuidtext_vec.push(uuidtext_data) - } + uuidtext_data.uuid = PathBuf::from(path) + .file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or_default() + .to_string(); + uuidtext_vec.push(uuidtext_data) } Ok(uuidtext_vec) } /// Parse all dsc uuid files in provided directory -pub fn collect_shared_strings(path: &str) -> Result, ParserError> { - let paths_results = fs::read_dir(path); - - let paths = match paths_results { - Ok(results) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read dsc directory {}: {:?}", - path, err - ); - return Err(ParserError::Path); - } - }; - +pub fn collect_shared_strings( + provider: &dyn FileProvider, +) -> Result, ParserError> { let mut shared_strings_vec: Vec = Vec::new(); // Start process to read and parse uuid files related to dsc - for path in paths { - let data = match path { - Ok(path_results) => path_results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to get dsc directory entry: {:?}", - err - ); - continue; - } - }; + for mut source in provider.dsc_files() { + let mut buf = Vec::new(); + if let Err(e) = source.reader().read_to_end(&mut buf) { + error!("[macos-unifiedlogs] Failed to read dsc file: {:?}", e); + continue; + } - let full_path = data.path(); - let buffer_results = fs::read(&full_path); - let buffer = match buffer_results { - Ok(results) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read dsc file {}: {:?}", - full_path.display().to_string(), - err - ); - continue; + match SharedCacheStrings::parse_dsc(&buf) { + Ok((_, mut results)) => { + results.dsc_uuid = PathBuf::from(source.source_path()) + .file_name() + .map(|fname| fname.to_string_lossy()) + .unwrap_or_default() + .to_string(); + shared_strings_vec.push(results); } - }; - - let shared_strings_data_results = SharedCacheStrings::parse_dsc(&buffer); - let mut shared_strings_data = match shared_strings_data_results { - Ok((_, results)) => results, Err(err) => { - error!( - "[macos-unifiedlogs] Failed to parse dsc file {}: {:?}", - full_path.display().to_string(), - err - ); - continue; + error!("[macos-unifiedlogs] Failed to parse dsc file: {:?}", err); } }; - - // Track the uuid filename, this will be referenced by log entries via the Catalog (or log entry) - let dsc_filename = data.file_name().into_string(); - match dsc_filename { - Ok(dsc_file_string) => shared_strings_data.dsc_uuid = dsc_file_string, - Err(err) => { - error!("[macos-unifiedlogs] Failed to convert dsc filename {:?} to string. Unable to do base format string lookups", err); - continue; - } - } - shared_strings_vec.push(shared_strings_data); } Ok(shared_strings_vec) } /// Parse all timesync files in provided directory -pub fn collect_timesync(path: &str) -> Result, ParserError> { - let paths_results = fs::read_dir(path); - - let paths = match paths_results { - Ok(results) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read timesync directory {}: {:?}", - path, err - ); - return Err(ParserError::Path); - } - }; - +pub fn collect_timesync(provider: &dyn FileProvider) -> Result, ParserError> { let mut timesync_data_vec: Vec = Vec::new(); // Start process to read and parse all timesync files - for path in paths { - let data = match path { - Ok(path_results) => path_results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to get timesync directory entry: {:?}", - err - ); - continue; - } - }; - - let full_path = data.path(); - let buffer_results = fs::read(&full_path); - let buffer = match buffer_results { - Ok(results) => results, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read timesync file {}: {:?}", - full_path.display().to_string(), - err - ); - continue; - } - }; - info!( - "Read {} bytes from timesync file {}", - buffer.len(), - full_path.display().to_string() - ); + for mut source in provider.timesync_files() { + let mut buffer = Vec::new(); + if let Err(e) = source.reader().read_to_end(&mut buffer) { + error!("[macos-unifiedlogs] Failed to read timesync file: {:?}", e); + continue; + } let timesync_results = TimesyncBoot::parse_timesync_data(&buffer); match timesync_results { Ok((_, mut timesync)) => timesync_data_vec.append(&mut timesync), Err(err) => { error!( - "[macos-unifiedlogs] Failed to parse timesync file {}: {:?}", - full_path.display().to_string(), + "[macos-unifiedlogs] Failed to parse timesync file: {:?}", err ); continue; @@ -357,26 +174,25 @@ pub fn collect_timesync(path: &str) -> Result, ParserError> { #[cfg(test)] mod tests { + use crate::filesystem::{LiveSystemProvider, LogarchiveProvider}; use crate::parser::{ build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log, }; - #[cfg(target_os = "macos")] - use crate::parser::{ - collect_shared_strings_system, collect_strings_system, collect_timesync_system, - }; use std::path::PathBuf; #[test] #[cfg(target_os = "macos")] fn test_collect_strings_system() { - let uuidtext_results = collect_strings_system().unwrap(); + let system_provider = LiveSystemProvider::default(); + let uuidtext_results = collect_strings(&system_provider).unwrap(); assert!(uuidtext_results.len() > 100); } #[test] #[cfg(target_os = "macos")] fn test_collect_timesync_system() { - let timesync_results = collect_timesync_system().unwrap(); + let system_provider = LiveSystemProvider::default(); + let timesync_results = collect_timesync(&system_provider).unwrap(); assert!(timesync_results.len() > 1); } @@ -384,9 +200,12 @@ mod tests { #[cfg(target_os = "macos")] fn test_collect_timesync_archive() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - test_path.push("tests/test_data/system_logs_big_sur.logarchive/timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); + test_path.push("tests/test_data/system_logs_big_sur.logarchive"); + + let provider = LogarchiveProvider::new(test_path.as_path()); + + let timesync_data = collect_timesync(&provider).unwrap(); assert_eq!(timesync_data.len(), 5); assert_eq!(timesync_data[0].signature, 48048); assert_eq!(timesync_data[0].unknown, 0); @@ -406,7 +225,8 @@ mod tests { #[test] #[cfg(target_os = "macos")] fn test_collect_shared_strings_system() { - let shared_strings_results = collect_shared_strings_system().unwrap(); + let system_provider = LiveSystemProvider::default(); + let shared_strings_results = collect_shared_strings(&system_provider).unwrap(); assert!(shared_strings_results[0].ranges.len() > 1); assert!(shared_strings_results[0].uuids.len() > 1); assert!(shared_strings_results[0].number_ranges > 1); @@ -416,9 +236,9 @@ mod tests { #[test] fn test_shared_strings_archive() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - test_path.push("tests/test_data/system_logs_big_sur.logarchive/dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); + test_path.push("tests/test_data/system_logs_big_sur.logarchive"); + let provider = LogarchiveProvider::new(test_path.as_path()); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); assert_eq!(shared_strings_results.len(), 2); assert_eq!(shared_strings_results[0].number_uuids, 1976); assert_eq!(shared_strings_results[0].number_ranges, 2993); @@ -436,8 +256,9 @@ mod tests { fn test_collect_strings_archive() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); + let provider = LogarchiveProvider::new(test_path.as_path()); - let mut strings_results = collect_strings(&test_path.display().to_string()).unwrap(); + let mut strings_results = collect_strings(&provider).unwrap(); assert_eq!(strings_results.len(), 536); strings_results.sort_by(|a, b| a.uuid.cmp(&b.uuid)); @@ -463,7 +284,8 @@ mod tests { test_path.push("tests/test_data/system_logs_big_sur.logarchive"); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); assert_eq!(log_data.catalog_data[0].firehose.len(), 99); assert_eq!(log_data.catalog_data[0].simpledump.len(), 0); @@ -482,19 +304,15 @@ mod tests { fn test_build_log() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = std::fs::File::open(&test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( diff --git a/src/timesync.rs b/src/timesync.rs index 201a79d..6e5f1c8 100755 --- a/src/timesync.rs +++ b/src/timesync.rs @@ -239,6 +239,7 @@ impl TimesyncBoot { #[cfg(test)] mod tests { + use crate::filesystem::LogarchiveProvider; use crate::parser::collect_timesync; use crate::timesync::TimesyncBoot; use std::fs::File; @@ -352,9 +353,10 @@ mod tests { #[test] fn test_get_timestamp() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - test_path.push("tests/test_data/system_logs_big_sur.logarchive/timesync"); + test_path.push("tests/test_data/system_logs_big_sur.logarchive"); + let provider = LogarchiveProvider::new(test_path.as_path()); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); let boot_uuid = "A2A9017676CF421C84DC9BBD6263FEE7"; let firehose_preamble_continous_time = 2818326118; @@ -365,15 +367,16 @@ mod tests { firehose_preamble_continous_time, 1, ); - assert_eq!(results, 1642304803060378889.0); + assert_eq!(results, 1_642_304_803_060_379_000.0); } #[test] fn test_get_arm_timestamp() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - test_path.push("tests/test_data/system_logs_monterey.logarchive/timesync"); + test_path.push("tests/test_data/system_logs_monterey.logarchive"); + let provider = LogarchiveProvider::new(test_path.as_path()); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); let boot_uuid = "3E12B435814B4C62918CEBC0826F06B8"; let firehose_preamble_continous_time = 2818326118; @@ -392,7 +395,9 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_monterey.logarchive/timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); + let provider = LogarchiveProvider::new(test_path.as_path()); + + let timesync_data = collect_timesync(&provider).unwrap(); let boot_uuid = "3E12B435814B4C62918CEBC0826F06B8"; let firehose_preamble_continous_time = 9898326118; @@ -403,6 +408,6 @@ mod tests { firehose_preamble_continous_time, 0, ); - assert_eq!(results, 1650767813342574583.0); + assert_eq!(results, 1_650_767_813_342_574_600.0); } } diff --git a/src/traits.rs b/src/traits.rs new file mode 100644 index 0000000..d57448a --- /dev/null +++ b/src/traits.rs @@ -0,0 +1,38 @@ +/// Implementing this trait allows library consumers to provide the files required by the parser in +/// arbitrary formats, as long as they are provided as an iterator of items that implement [Read]. +/// +/// For help mapping files to the correct filetype, see the +/// [`LogFileType`](crate::filesystem::LogFileType) enum's +/// [From]<&[Path](std::path::Path)> implementation. +pub trait FileProvider { + /// Provides an iterator of `.tracev3` files from the + /// `/private/var/db/diagnostics/((HighVolume|Signpost|Trace|Special)/`, plus the + /// `livedata.LogData.tracev3` file if it was collected via `log collect`. + fn tracev3_files(&self) -> Box>>; + + /// Provides an iterator of `UUIDText` string files from the `/var/db/uuidtext/XX/` directories, + /// where the `XX` is any two uppercase hex characters, along with the filename (i.e., the + /// filename from the _source_ file. This should be a 30-character name containing only hex + /// digits. This should be a 30-character name containing only hex digits. It is important that + /// this is. accurate, or else strings will not be able to be referenced from the source file. + fn uuidtext_files(&self) -> Box>>; + + /// Provides an iterator of shared string files from the `/var/db/uuidtext/dsc` subdirectory, + /// along with the filename (i.e., the filename from the _source_ file). This should be a + /// 30-character name containing only hex digits. It is important that this is. accurate, or + /// else strings will not be able to be referenced from the source file. + fn dsc_files(&self) -> Box>>; + + /// Provides an iterator of `.timesync` files from the `/var/db/diagnostics/timesync` subdirectory. + fn timesync_files(&self) -> Box>>; +} + +/// Defines an interface for providing a single unified log file. Parsing unified logs requires the +/// name of the original file in order to reconstruct format strings. +pub trait SourceFile { + /// A reader for the given source file. + fn reader(&mut self) -> Box<&mut dyn std::io::Read>; + /// The source path of the file on the machine from which it was collected, distinct from any + /// secondary storage location where, for instance, a file backing the `reader` might exist. + fn source_path(&self) -> &str; +} diff --git a/src/unified_log.rs b/src/unified_log.rs index 1b10069..a3cf6c1 100755 --- a/src/unified_log.rs +++ b/src/unified_log.rs @@ -907,6 +907,7 @@ mod tests { use crate::{ chunks::firehose::firehose_log::Firehose, + filesystem::LogarchiveProvider, parser::{collect_shared_strings, collect_strings, collect_timesync, iter_log, parse_log}, unified_log::UnifiedLogCatalogData, }; @@ -982,20 +983,17 @@ mod tests { fn test_build_log() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = - collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000002.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let reader = std::fs::File::open(test_path).unwrap(); + let log_data = parse_log(reader).unwrap(); let exclude_missing = false; let (results, _) = LogData::build_log( @@ -1005,10 +1003,11 @@ mod tests { ×ync_data, exclude_missing, ); + assert_eq!(results.len(), 207366); assert_eq!(results[0].process, "/usr/libexec/lightsoutmanagementd"); assert_eq!(results[0].subsystem, "com.apple.lom"); - assert_eq!(results[0].time, 1642302326434850732.0); + assert_eq!(results[0].time, 1_642_302_326_434_850_800.0); assert_eq!(results[0].activity_id, 0); assert_eq!(results[0].library, "/usr/libexec/lightsoutmanagementd"); assert_eq!(results[0].library_uuid, "6C3ADF991F033C1C96C4ADFAA12D8CED"); @@ -1190,8 +1189,9 @@ mod tests { test_path.push( "tests/test_data/system_logs_big_sur.logarchive/Persist/0000000000000002.tracev3", ); + let reader = std::fs::File::open(test_path).unwrap(); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let log_data = parse_log(reader).unwrap(); LogData::add_missing( &log_data.catalog_data[0], diff --git a/tests/big_sur_tests.rs b/tests/big_sur_tests.rs index 1b35fe9..b7dec8b 100755 --- a/tests/big_sur_tests.rs +++ b/tests/big_sur_tests.rs @@ -5,35 +5,31 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::{fs, path::PathBuf}; +use std::{fs::File, path::PathBuf}; use macos_unifiedlogs::{ + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, + traits::FileProvider, unified_log::{LogData, UnifiedLogData}, }; use regex::Regex; -fn collect_logs(path: &str) -> Vec { - let paths = fs::read_dir(path).unwrap(); - - let mut log_data_vec: Vec = Vec::new(); - for path in paths { - let data = path.unwrap(); - let full_path = data.path().display().to_string(); - let log_data = parse_log(&full_path).unwrap(); - log_data_vec.push(log_data); - } - - return log_data_vec; +fn collect_logs(provider: &dyn FileProvider) -> Vec { + provider + .tracev3_files() + .map(|mut file| parse_log(file.reader()).unwrap()) + .collect() } #[test] fn test_parse_log_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - test_path.push("Persist/0000000000000004.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + + let handle = File::open(test_path.as_path()).unwrap(); + let log_data = parse_log(handle).unwrap(); assert_eq!(log_data.catalog_data[0].firehose.len(), 82); assert_eq!(log_data.catalog_data[0].simpledump.len(), 0); @@ -52,18 +48,15 @@ fn test_parse_log_big_sur() { fn test_big_sur_livedata() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + let results = parse_log(handle).unwrap(); test_path.pop(); let exclude_missing = false; @@ -107,19 +100,17 @@ fn test_big_sur_livedata() { fn test_build_log_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000004.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -159,40 +150,12 @@ fn test_build_log_big_sur() { fn test_parse_all_logs_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Signpost"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; @@ -317,35 +280,12 @@ fn test_parse_all_logs_big_sur() { fn test_parse_all_persist_logs_with_network_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; @@ -430,47 +370,19 @@ fn test_parse_all_persist_logs_with_network_big_sur() { assert_eq!(error_type, 215); assert_eq!(create_type, 687); assert_eq!(state_simple_dump, 34); - assert_eq!(signpost, 37); + assert_eq!(signpost, 62); } #[test] fn test_parse_all_logs_private_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur_private_enabled.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Signpost"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; @@ -511,40 +423,12 @@ fn test_parse_all_logs_private_big_sur() { fn test_parse_all_logs_private_with_public_mix_big_sur() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur_public_private_data_mix.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - test_path.push("Signpost"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; @@ -607,19 +491,17 @@ fn test_parse_all_logs_private_with_public_mix_big_sur() { fn test_parse_all_logs_private_with_public_mix_big_sur_single_file() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur_public_private_data_mix.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000009.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -661,19 +543,17 @@ fn test_parse_all_logs_private_with_public_mix_big_sur_single_file() { fn test_parse_all_logs_private_with_public_mix_big_sur_special_file() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur_public_private_data_mix.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Special/0000000000000008.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -716,24 +596,22 @@ fn test_parse_all_logs_private_with_public_mix_big_sur_special_file() { fn test_big_sur_missing_oversize_strings() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); // livedata may have oversize string data in other tracev3 on disk test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); test_path.pop(); let exclude_missing = false; let (data, _) = build_log( - &results, + &log_data, &string_results, &shared_strings_results, ×ync_data, @@ -756,30 +634,30 @@ fn test_big_sur_missing_oversize_strings() { fn test_big_sur_oversize_strings_in_another_file() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); // Get most recent Persist tracev3 file could contain oversize log entries test_path.push("Persist/0000000000000005.tracev3"); - let mut log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let mut log_data = parse_log(handle).unwrap(); test_path.pop(); test_path.pop(); // Get most recent Special tracev3 file that could contain oversize log entries test_path.push("Special/0000000000000005.tracev3"); - let mut special_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + let mut special_data = parse_log(handle).unwrap(); test_path.pop(); test_path.pop(); test_path.push("logdata.LiveData.tracev3"); - let mut results = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + let mut results = parse_log(handle).unwrap(); test_path.pop(); results.oversize.append(&mut log_data.oversize); diff --git a/tests/high_sierra_tests.rs b/tests/high_sierra_tests.rs index 8ffc50c..2bd0ae3 100755 --- a/tests/high_sierra_tests.rs +++ b/tests/high_sierra_tests.rs @@ -1,30 +1,24 @@ // Copyright 2022 Mandiant, Inc. All Rights Reserved -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::{fs, path::PathBuf}; +use std::{fs::File, path::PathBuf}; use macos_unifiedlogs::{ + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, - unified_log::{LogData, UnifiedLogData}, + traits::FileProvider, + unified_log::UnifiedLogData, }; use regex::Regex; -fn collect_logs(path: &str) -> Vec { - let paths = fs::read_dir(path).unwrap(); - - let mut log_data_vec: Vec = Vec::new(); - for path in paths { - let data = path.unwrap(); - let full_path = data.path().display().to_string(); - let log_data = parse_log(&full_path).unwrap(); - log_data_vec.push(log_data); - } - - return log_data_vec; +fn collect_logs(provider: &dyn FileProvider) -> Vec { + provider + .tracev3_files() + .map(|mut file| parse_log(file.reader()).unwrap()) + .collect() } #[test] @@ -33,7 +27,8 @@ fn test_parse_log_high_sierra() { test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); test_path.push("Persist/0000000000000001.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path).unwrap(); + let log_data = parse_log(handle).unwrap(); assert_eq!(log_data.catalog_data[0].firehose.len(), 172); assert_eq!(log_data.catalog_data[0].simpledump.len(), 0); @@ -52,19 +47,16 @@ fn test_parse_log_high_sierra() { fn test_build_log_high_sierra() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000001.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -103,19 +95,16 @@ fn test_build_log_high_sierra() { fn test_build_log_complex_format_high_sierra() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/0000000000000001.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -159,19 +148,16 @@ fn test_build_log_complex_format_high_sierra() { fn test_build_log_negative_number_high_sierra() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Special/0000000000000003.tracev3"); + let handle = File::open(test_path.as_path()).unwrap(); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -201,37 +187,13 @@ fn test_build_log_negative_number_high_sierra() { fn test_parse_all_logs_high_sierra() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_high_sierra.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); + let mut log_data_vec = Vec::new(); - let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; for logs in &log_data { let (mut data, _) = build_log( diff --git a/tests/monterey_tests.rs b/tests/monterey_tests.rs index 9153067..27097c3 100755 --- a/tests/monterey_tests.rs +++ b/tests/monterey_tests.rs @@ -5,26 +5,21 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::{fs, path::PathBuf}; +use std::{fs::File, path::PathBuf}; use macos_unifiedlogs::{ + filesystem::LogarchiveProvider, parser::{build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log}, + traits::FileProvider, unified_log::{LogData, UnifiedLogData}, }; use regex::Regex; -fn collect_logs(path: &str) -> Vec { - let paths = fs::read_dir(path).unwrap(); - - let mut log_data_vec: Vec = Vec::new(); - for path in paths { - let data = path.unwrap(); - let full_path = data.path().display().to_string(); - let log_data = parse_log(&full_path).unwrap(); - log_data_vec.push(log_data); - } - - return log_data_vec; +fn collect_logs(provider: &dyn FileProvider) -> Vec { + provider + .tracev3_files() + .map(|mut file| parse_log(file.reader()).unwrap()) + .collect() } #[test] @@ -33,7 +28,9 @@ fn test_parse_log_monterey() { test_path.push("tests/test_data/system_logs_monterey.logarchive"); test_path.push("Persist/000000000000000a.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); assert_eq!(log_data.catalog_data[0].firehose.len(), 17); assert_eq!(log_data.catalog_data[0].simpledump.len(), 383); @@ -52,19 +49,17 @@ fn test_parse_log_monterey() { fn test_build_log_monterey() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_monterey.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); test_path.push("Persist/000000000000000a.tracev3"); - let log_data = parse_log(&test_path.display().to_string()).unwrap(); + let handle = File::open(test_path.as_path()).unwrap(); + + let log_data = parse_log(handle).unwrap(); let exclude_missing = false; let (results, _) = build_log( @@ -104,35 +99,12 @@ fn test_build_log_monterey() { fn test_parse_all_logs_monterey() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_monterey.logarchive"); - let string_results = collect_strings(&test_path.display().to_string()).unwrap(); - - test_path.push("dsc"); - let shared_strings_results = collect_shared_strings(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("timesync"); - let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); - test_path.pop(); - - test_path.push("Persist"); - - let mut log_data = collect_logs(&test_path.display().to_string()); - test_path.pop(); - - test_path.push("HighVolume"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - - test_path.push("Special"); - let mut results = collect_logs(&test_path.display().to_string()); - log_data.append(&mut results); - test_path.pop(); - test_path.push("logdata.LiveData.tracev3"); - let results = parse_log(&test_path.display().to_string()).unwrap(); - log_data.push(results); - test_path.pop(); + let provider = LogarchiveProvider::new(test_path.as_path()); + let string_results = collect_strings(&provider).unwrap(); + let shared_strings_results = collect_shared_strings(&provider).unwrap(); + let timesync_data = collect_timesync(&provider).unwrap(); + let log_data = collect_logs(&provider); let mut log_data_vec: Vec = Vec::new(); let exclude_missing = false; @@ -148,7 +120,7 @@ fn test_parse_all_logs_monterey() { ); log_data_vec.append(&mut data); } - assert_eq!(log_data_vec.len(), 2299798); + assert_eq!(log_data_vec.len(), 2397109); let mut unknown_strings = 0; let mut invalid_offsets = 0;