From aeb58ae1ea1e1376c34d32335b77d4dc003e33f4 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 14:38:17 +0100 Subject: [PATCH 01/13] added a rustfmt.toml file according to the existing defaults in this repo. => really helpful when having this as a git submodule of another workspace ;-) --- rustfmt.toml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..57d44b4 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,5 @@ +newline_style = "Unix" +tab_spaces = 4 +max_width = 100 +chain_width = 60 +use_small_heuristics = "Default" From 897f0aa828cb66f75c24247dfeaf208052365b99 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 14:38:34 +0100 Subject: [PATCH 02/13] file case typo --- src/dsc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dsc.rs b/src/dsc.rs index 3391846..43e837e 100755 --- a/src/dsc.rs +++ b/src/dsc.rs @@ -317,7 +317,7 @@ mod tests { #[should_panic(expected = "Incomplete(Unknown)")] fn test_bad_file() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - test_path.push("tests/test_data/Bad Data/DSC/badfile"); + test_path.push("tests/test_data/Bad Data/DSC/Badfile"); let buffer = fs::read(test_path).unwrap(); let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap(); From ce00e4c3539ef0eb2703e5e4e508613138746ea9 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 14:39:33 +0100 Subject: [PATCH 03/13] - filesystem tests ignored on non macos systems - test values changed according to the downloadable test data --- src/parser.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 85af918..7942880 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -375,18 +375,21 @@ mod tests { use std::path::PathBuf; #[test] + #[cfg(target_os = "macos")] fn test_collect_strings_system() { let uuidtext_results = collect_strings_system().unwrap(); assert!(uuidtext_results.len() > 100); } #[test] + #[cfg(target_os = "macos")] fn test_collect_timesync_system() { let timesync_results = collect_timesync_system().unwrap(); assert!(timesync_results.len() > 1); } #[test] + #[cfg(target_os = "macos")] fn test_collect_timesync_archive() { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive/timesync"); @@ -409,6 +412,7 @@ mod tests { } #[test] + #[cfg(target_os = "macos")] fn test_collect_shared_strings_system() { let shared_strings_results = collect_shared_strings_system().unwrap(); assert!(shared_strings_results[0].ranges.len() > 1); @@ -444,9 +448,9 @@ mod tests { let strings_results = collect_strings(&test_path.display().to_string()).unwrap(); assert_eq!(strings_results.len(), 536); assert_eq!(strings_results[0].signature, 1719109785); - assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F"); + assert_eq!(strings_results[0].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1"); assert_eq!(strings_results[0].entry_descriptors.len(), 2); - assert_eq!(strings_results[0].footer_data.len(), 48096); + assert_eq!(strings_results[0].footer_data.len(), 238974); assert_eq!(strings_results[0].number_entries, 2); assert_eq!(strings_results[0].unknown_minor_version, 1); assert_eq!(strings_results[0].unknown_major_version, 2); From 9994b2410634f3c162e126285071a392f5e024c8 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 15:16:52 +0100 Subject: [PATCH 04/13] test_collect_strings_archive assertions back to previous values --- src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 7942880..5750375 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -448,9 +448,9 @@ mod tests { let strings_results = collect_strings(&test_path.display().to_string()).unwrap(); assert_eq!(strings_results.len(), 536); assert_eq!(strings_results[0].signature, 1719109785); - assert_eq!(strings_results[0].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1"); + assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F"); assert_eq!(strings_results[0].entry_descriptors.len(), 2); - assert_eq!(strings_results[0].footer_data.len(), 238974); + assert_eq!(strings_results[0].footer_data.len(), 48096); assert_eq!(strings_results[0].number_entries, 2); assert_eq!(strings_results[0].unknown_minor_version, 1); assert_eq!(strings_results[0].unknown_major_version, 2); From 6f76f6d59ba5cdb923854f0af3d928692a8697ed Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 15:40:44 +0100 Subject: [PATCH 05/13] collect_strings ordering --- src/parser.rs | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 5750375..9d1d45b 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -100,31 +100,24 @@ pub fn build_log( pub fn collect_strings(path: &str) -> Result, ParserError> { let paths_results = fs::read_dir(path); - let paths = match paths_results { - Ok(path) => path, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to read directory path: {:?}", - err - ); - return Err(ParserError::Dir); - } - }; - - let mut uuidtext_vec: Vec = Vec::new(); + let paths = paths_results.map_err(|err| { + error!("[macos-unifiedlogs] Failed to read directory path: {err:?}"); + ParserError::Dir + })?; + + let mut entries = paths + .flat_map(|path| { + path.inspect_err(|err| { + error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) + }) + .ok() + }) + .collect::>(); + entries.sort_by(|a, b| a.file_name().as_os_str().cmp(b.file_name().as_os_str())); + + let mut uuidtext_vec: Vec = Vec::with_capacity(entries.len()); // Start process to read a directory containing subdirectories that contain the uuidtext files - for path in paths { - let dir_entry = match path { - Ok(entry) => entry, - Err(err) => { - error!( - "[macos-unifiedlogs] Failed to get directory entry: {:?}", - err - ); - continue; - } - }; - + for dir_entry in entries { let type_results = dir_entry.file_type(); let entry_type = match type_results { Ok(dir_type) => dir_type, @@ -448,12 +441,18 @@ mod tests { let strings_results = collect_strings(&test_path.display().to_string()).unwrap(); assert_eq!(strings_results.len(), 536); assert_eq!(strings_results[0].signature, 1719109785); - assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F"); + assert_eq!(strings_results[0].uuid, "B6B65F4DC53ED38FEB0DDF61809853"); assert_eq!(strings_results[0].entry_descriptors.len(), 2); - assert_eq!(strings_results[0].footer_data.len(), 48096); + assert_eq!(strings_results[0].footer_data.len(), 1707); assert_eq!(strings_results[0].number_entries, 2); assert_eq!(strings_results[0].unknown_minor_version, 1); assert_eq!(strings_results[0].unknown_major_version, 2); + + assert_eq!(strings_results[1].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1"); + assert_eq!(strings_results[1].footer_data.len(), 238974); + + assert_eq!(strings_results[2].uuid, "2578ECF07936A6A882574764C7C785"); + assert_eq!(strings_results[2].footer_data.len(), 68714); } #[test] From 7c3ece8a434049ca4534c5373af535c49fb7bec9 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 14 Nov 2024 15:42:05 +0100 Subject: [PATCH 06/13] made clippy happy --- src/parser.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 9d1d45b..b526785 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -361,10 +361,12 @@ pub fn collect_timesync(path: &str) -> Result, ParserError> { #[cfg(test)] mod tests { use crate::parser::{ - build_log, collect_shared_strings, collect_shared_strings_system, collect_strings, - collect_strings_system, collect_timesync, collect_timesync_system, parse_log, + build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log, + }; + #[cfg(target_os = "macos")] + use crate::parser::{ + collect_shared_strings_system, collect_strings_system, collect_timesync_system, }; - use std::path::PathBuf; #[test] From d2acbb3606f0f12fff9fbd25988b7ece75625a30 Mon Sep 17 00:00:00 2001 From: jrx Date: Tue, 19 Nov 2024 09:48:56 +0100 Subject: [PATCH 07/13] parser tests => sort in the test --- src/parser.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b526785..09cc789 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -98,14 +98,12 @@ pub fn build_log( /// Parse all UUID files in provided directory. The directory should follow the same layout as the live system (ex: path/to/files//) pub fn collect_strings(path: &str) -> Result, ParserError> { - let paths_results = fs::read_dir(path); - - let paths = paths_results.map_err(|err| { + let paths = fs::read_dir(path).map_err(|err| { error!("[macos-unifiedlogs] Failed to read directory path: {err:?}"); ParserError::Dir })?; - let mut entries = paths + let entries = paths .flat_map(|path| { path.inspect_err(|err| { error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) @@ -113,7 +111,6 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { .ok() }) .collect::>(); - entries.sort_by(|a, b| a.file_name().as_os_str().cmp(b.file_name().as_os_str())); let mut uuidtext_vec: Vec = Vec::with_capacity(entries.len()); // Start process to read a directory containing subdirectories that contain the uuidtext files @@ -440,21 +437,24 @@ mod tests { let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); test_path.push("tests/test_data/system_logs_big_sur.logarchive"); - let strings_results = collect_strings(&test_path.display().to_string()).unwrap(); + let mut strings_results = collect_strings(&test_path.display().to_string()).unwrap(); assert_eq!(strings_results.len(), 536); + + strings_results.sort_by(|a, b| a.uuid.cmp(&b.uuid)); + assert_eq!(strings_results[0].signature, 1719109785); - assert_eq!(strings_results[0].uuid, "B6B65F4DC53ED38FEB0DDF61809853"); - assert_eq!(strings_results[0].entry_descriptors.len(), 2); - assert_eq!(strings_results[0].footer_data.len(), 1707); - assert_eq!(strings_results[0].number_entries, 2); + assert_eq!(strings_results[0].uuid, "004EAF1C2B310DA0383BE3D60B80E8"); + assert_eq!(strings_results[0].entry_descriptors.len(), 1); + assert_eq!(strings_results[0].footer_data.len(), 2847); + assert_eq!(strings_results[0].number_entries, 1); assert_eq!(strings_results[0].unknown_minor_version, 1); assert_eq!(strings_results[0].unknown_major_version, 2); - assert_eq!(strings_results[1].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1"); - assert_eq!(strings_results[1].footer_data.len(), 238974); + assert_eq!(strings_results[1].uuid, "00B3D870FB3AE8BDC1BA3A60D0B9A0"); + assert_eq!(strings_results[1].footer_data.len(), 2164); - assert_eq!(strings_results[2].uuid, "2578ECF07936A6A882574764C7C785"); - assert_eq!(strings_results[2].footer_data.len(), 68714); + assert_eq!(strings_results[2].uuid, "014C44534A3A748476ABD88D376918"); + assert_eq!(strings_results[2].footer_data.len(), 19011); } #[test] From 604d39c7de69cbe42a8c5562d634b6512d4dfb21 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 19 Nov 2024 15:33:52 +0100 Subject: [PATCH 08/13] chg: [unifiedlog_iterator] support jsonl output and more params --- examples/unifiedlog_iterator/src/main.rs | 189 +++++++++++++++-------- 1 file changed, 126 insertions(+), 63 deletions(-) diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index bd82e41..54bc75a 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -40,6 +40,15 @@ struct Args { /// Path to output file. Any directories must already exist #[clap(short, long, default_value = "")] output: String, + + /// Output format. Options: csv, jsonl. Default is autodetect. + #[clap(short, long, default_value = "auto")] + format: String, + + /// Append to output file + /// If false, will overwrite output file + #[clap(short, long, default_value = "false")] + append: bool, } fn main() { @@ -49,9 +58,18 @@ fn main() { .expect("Failed to initialize simple logger"); let args = Args::parse(); - let mut writer = construct_writer(&args.output).unwrap(); - // Create headers for CSV file - output_header(&mut writer).unwrap(); + let output_format = if args.format.is_empty() || args.format == "auto" { + std::path::Path::new(&args.output) + .extension() + .and_then(std::ffi::OsStr::to_str) + .unwrap_or("csv") + .to_string() + } else { + args.format.clone() + }; + + + let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); if args.input != "" { parse_log_archive(&args.input, &mut writer); @@ -61,7 +79,7 @@ fn main() { } // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut Writer>) { +fn parse_log_archive(path: &str, writer: &mut OutputWriter) { let mut archive_path = PathBuf::from(path); // Parse all UUID files which contain strings and other metadata @@ -92,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { } // Parse a live macOS system -fn parse_live_system(writer: &mut Writer>) { +fn parse_live_system(writer: &mut OutputWriter) { let strings = collect_strings_system().unwrap(); let shared_strings = collect_shared_strings_system().unwrap(); let timesync_data = collect_timesync_system().unwrap(); @@ -115,7 +133,7 @@ fn parse_trace_file( shared_strings_results: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], path: &str, - writer: &mut Writer>, + writer: &mut OutputWriter, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -302,7 +320,7 @@ fn iterate_chunks( strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], - writer: &mut Writer>, + writer: &mut OutputWriter, oversize_strings: &mut UnifiedLogData, ) -> usize { let log_bytes = fs::read(path).unwrap(); @@ -338,71 +356,116 @@ fn iterate_chunks( count } -fn construct_writer(output_path: &str) -> Result>, Box> { - let writer = if output_path != "" { - Box::new( - OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?, - ) as Box - } else { - Box::new(io::stdout()) as Box - }; - Ok(Writer::from_writer(writer)) +pub struct OutputWriter { + writer: OutputWriterEnum, } -// Create csv file and create headers -fn output_header(writer: &mut Writer>) -> Result<(), Box> { - writer.write_record(&[ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - writer.flush()?; - Ok(()) +enum OutputWriterEnum { + Csv(Writer>), + Json(Box), } +impl OutputWriter { + pub fn new(output_path: &str, output_format: &str, append: bool) -> Result> { + let writer: Box = if output_path != "" { + Box::new( + OpenOptions::new() + .write(true) + .create(true) + .truncate(!append) + .append(append) + .open(output_path)?, + ) + } else { + Box::new(io::stdout()) + }; + + let writer_enum = match output_format { + "csv" => { + let mut csv_writer = Writer::from_writer(writer); + // Write CSV headers + csv_writer.write_record(&[ + "Timestamp", + "Event Type", + "Log Type", + "Subsystem", + "Thread ID", + "PID", + "EUID", + "Library", + "Library UUID", + "Activity ID", + "Category", + "Process", + "Process UUID", + "Message", + "Raw Message", + "Boot UUID", + "System Timezone Name", + ])?; + csv_writer.flush()?; + OutputWriterEnum::Csv(csv_writer) + } + "jsonl" => OutputWriterEnum::Json(writer), + _ => { + eprintln!("Unsupported output format: {}", output_format); + std::process::exit(1); + }, + }; + + Ok(OutputWriter { + writer: writer_enum, + }) + } + + pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => { + let date_time = Utc.timestamp_nanos(record.time as i64); + csv_writer.write_record(&[ + date_time.to_rfc3339_opts(SecondsFormat::Millis, true), + record.event_type.to_owned(), + record.log_type.to_owned(), + record.subsystem.to_owned(), + record.thread_id.to_string(), + record.pid.to_string(), + record.euid.to_string(), + record.library.to_owned(), + record.library_uuid.to_owned(), + record.activity_id.to_string(), + record.category.to_owned(), + record.process.to_owned(), + record.process_uuid.to_owned(), + record.message.to_owned(), + record.raw_message.to_owned(), + record.boot_uuid.to_owned(), + record.timezone_name.to_owned(), + ])?; + } + OutputWriterEnum::Json(json_writer) => { + writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, + OutputWriterEnum::Json(json_writer) => json_writer.flush()?, + } + Ok(()) + } +} + + // Append or create csv file fn output( results: &Vec, - writer: &mut Writer>, + writer: &mut OutputWriter, ) -> Result<(), Box> { for data in results { - let date_time = Utc.timestamp_nanos(data.time as i64); - writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - data.event_type.to_owned(), - data.log_type.to_owned(), - data.subsystem.to_owned(), - data.thread_id.to_string(), - data.pid.to_string(), - data.euid.to_string(), - data.library.to_owned(), - data.library_uuid.to_owned(), - data.activity_id.to_string(), - data.category.to_owned(), - data.process.to_owned(), - data.process_uuid.to_owned(), - data.message.to_owned(), - data.raw_message.to_owned(), - data.boot_uuid.to_owned(), - data.timezone_name.to_owned(), - ])?; + writer.write_record(&data)?; } writer.flush()?; Ok(()) From 8e9ffb45839e32ed47546d977273d1e1ab7d3d41 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 19 Nov 2024 15:43:12 +0100 Subject: [PATCH 09/13] fix: [unifiedlog_iterator] fix forgotten serde dependency --- examples/unifiedlog_iterator/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/unifiedlog_iterator/Cargo.toml b/examples/unifiedlog_iterator/Cargo.toml index 24372c5..a2dcea6 100644 --- a/examples/unifiedlog_iterator/Cargo.toml +++ b/examples/unifiedlog_iterator/Cargo.toml @@ -10,5 +10,6 @@ simplelog = "0.12.2" csv = "1.3.0" chrono = "0.4.38" log = "0.4.22" +serde_json = "1.0.122" macos-unifiedlogs = {path = "../../"} clap = {version = "4.5.18", features = ["derive"]} \ No newline at end of file From 51e6026d2a933b445e1c01b4a20f4c883e853a97 Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 21 Nov 2024 16:20:39 +0100 Subject: [PATCH 10/13] shorter logpreamble parse implementation by more nom combinators usage --- Cargo.toml | 1 + src/lib.rs | 1 + src/preamble.rs | 58 ++++++++++++++++++++++++++----------------------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b29356e..9f0a356 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ simplelog = "0.12.2" csv = "1.3.0" chrono = "0.4.38" criterion = "0.5.1" +anyhow = "1.0.93" [[bench]] name = "high_sierra_benchmark" diff --git a/src/lib.rs b/src/lib.rs index 9a4d9f5..c22603d 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ clippy::checked_conversions, clippy::unnecessary_cast )] + mod catalog; mod chunks; mod chunkset; diff --git a/src/preamble.rs b/src/preamble.rs index ea8e0da..a6b1052 100644 --- a/src/preamble.rs +++ b/src/preamble.rs @@ -5,41 +5,41 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::mem::size_of; - use nom::{ - bytes::complete::take, number::complete::{le_u32, le_u64}, + sequence::tuple, + IResult, }; -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct LogPreamble { pub chunk_tag: u32, pub chunk_sub_tag: u32, pub chunk_data_size: u64, } + impl LogPreamble { /// Get the preamble (first 16 bytes of all Unified Log entries (chunks)) to detect the log (chunk) type. Ex: Firehose, Statedump, Simpledump, Catalog, etc - pub fn detect_preamble(data: &[u8]) -> nom::IResult<&[u8], LogPreamble> { - let mut preamble = LogPreamble { - chunk_tag: 0, - chunk_sub_tag: 0, - chunk_data_size: 0, - }; - - let (input, tag) = take(size_of::())(data)?; - let (input, sub_tag) = take(size_of::())(input)?; - let (input, data_size) = take(size_of::())(input)?; - - let (_, trace_tag) = le_u32(tag)?; - let (_, trace_sub_tag) = le_u32(sub_tag)?; - let (_, trace_data_size) = le_u64(data_size)?; - - preamble.chunk_tag = trace_tag; - preamble.chunk_sub_tag = trace_sub_tag; - preamble.chunk_data_size = trace_data_size; + /// Do not consume the input + pub fn detect_preamble(input: &[u8]) -> IResult<&[u8], Self> { + let (_, preamble) = Self::parse(input)?; Ok((input, preamble)) } + + /// Get the preamble (first 16 bytes of all Unified Log entries (chunks)) to detect the log (chunk) type. Ex: Firehose, Statedump, Simpledump, Catalog, etc + /// And consume the input + pub fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let (input, (chunk_tag, chunk_sub_tag, chunk_data_size)) = + tuple((le_u32, le_u32, le_u64))(input)?; + Ok(( + input, + LogPreamble { + chunk_tag, + chunk_sub_tag, + chunk_data_size, + }, + )) + } } #[cfg(test)] @@ -47,22 +47,26 @@ mod tests { use super::LogPreamble; #[test] - fn test_detect_preamble() { - let test_preamble_header = [ + fn test_detect_preamble() -> anyhow::Result<()> { + let test_preamble_header = &[ 0, 16, 0, 0, 17, 0, 0, 0, 208, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ]; - let (_, preamble_data) = LogPreamble::detect_preamble(&test_preamble_header).unwrap(); + let (output, preamble_data) = LogPreamble::detect_preamble(test_preamble_header)?; + assert_eq!(output, test_preamble_header); assert_eq!(preamble_data.chunk_tag, 0x1000); assert_eq!(preamble_data.chunk_sub_tag, 0x11); assert_eq!(preamble_data.chunk_data_size, 0xd0); - let test_catalog_chunk = [11, 96, 0, 0, 17, 0, 0, 0, 176, 31, 0, 0, 0, 0, 0, 0]; - let (_, preamble_data) = LogPreamble::detect_preamble(&test_catalog_chunk).unwrap(); + let test_catalog_chunk = &[11, 96, 0, 0, 17, 0, 0, 0, 176, 31, 0, 0, 0, 0, 0, 0]; + let (output, preamble_data) = LogPreamble::parse(test_catalog_chunk)?; + assert_eq!(output.len(), 0); assert_eq!(preamble_data.chunk_tag, 0x600b); assert_eq!(preamble_data.chunk_sub_tag, 0x11); assert_eq!(preamble_data.chunk_data_size, 0x1fb0); + + Ok(()) } } From ef252b1b8386536623925b495f14ec85cc4a86ae Mon Sep 17 00:00:00 2001 From: jrx Date: Thu, 21 Nov 2024 16:53:50 +0100 Subject: [PATCH 11/13] added Unicode-3.0 license allowed --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index e67fffc..498541b 100644 --- a/deny.toml +++ b/deny.toml @@ -105,6 +105,7 @@ allow = [ "BSL-1.0", "Unlicense", "Unicode-DFS-2016", + "Unicode-3.0", ] # List of explicitly disallowed licenses # See https://spdx.org/licenses/ for list of possible licenses From c9eb4be2c0afc86e9ac4f33096ca650498ccbc2c Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 27 Nov 2024 14:01:08 +0100 Subject: [PATCH 12/13] fix: [unifiedlog_iterator] syntax corrections + parser fix --- examples/unifiedlog_iterator/src/main.rs | 45 ++++++++++++------------ src/parser.rs | 2 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index 54bc75a..3e08d64 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -68,10 +68,9 @@ fn main() { args.format.clone() }; - let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); - if args.input != "" { + if !args.input.is_empty() { parse_log_archive(&args.input, &mut writer); } else if args.live != "false" { parse_live_system(&mut writer); @@ -160,7 +159,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -190,7 +189,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -220,7 +219,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -249,7 +248,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -273,7 +272,7 @@ fn parse_trace_file( if archive_path.exists() { eprintln!("Parsing: logdata.LiveData.tracev3"); - let count = iterate_chunks( + let count = iterate_chunks( &archive_path.display().to_string(), &mut missing_data, string_results, @@ -294,8 +293,7 @@ fn parse_trace_file( // Since we have all Oversize entries now. Go through any log entries that we were not able to build before for mut leftover_data in missing_data { // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize = oversize_strings.oversize.clone(); + leftover_data.oversize = oversize_strings.oversize.clone(); // Exclude_missing = false // If we fail to find any missing data its probably due to the logs rolling @@ -346,7 +344,10 @@ fn iterate_chunks( count += results.len(); oversize_strings.oversize = chunk.oversize; output(&results, writer).unwrap(); - if missing_logs.catalog_data.is_empty() && missing_logs.header.is_empty() && missing_logs.oversize.is_empty() { + if missing_logs.catalog_data.is_empty() + && missing_logs.header.is_empty() + && missing_logs.oversize.is_empty() + { continue; } // Track possible missing log data due to oversize strings being in another file @@ -361,13 +362,17 @@ pub struct OutputWriter { } enum OutputWriterEnum { - Csv(Writer>), + Csv(Box>>), Json(Box), } impl OutputWriter { - pub fn new(output_path: &str, output_format: &str, append: bool) -> Result> { - let writer: Box = if output_path != "" { + pub fn new( + output_path: &str, + output_format: &str, + append: bool, + ) -> Result> { + let writer: Box = if !output_path.is_empty() { Box::new( OpenOptions::new() .write(true) @@ -384,7 +389,7 @@ impl OutputWriter { "csv" => { let mut csv_writer = Writer::from_writer(writer); // Write CSV headers - csv_writer.write_record(&[ + csv_writer.write_record([ "Timestamp", "Event Type", "Log Type", @@ -404,13 +409,13 @@ impl OutputWriter { "System Timezone Name", ])?; csv_writer.flush()?; - OutputWriterEnum::Csv(csv_writer) + OutputWriterEnum::Csv(Box::new(csv_writer)) } "jsonl" => OutputWriterEnum::Json(writer), _ => { eprintln!("Unsupported output format: {}", output_format); std::process::exit(1); - }, + } }; Ok(OutputWriter { @@ -458,14 +463,10 @@ impl OutputWriter { } } - // Append or create csv file -fn output( - results: &Vec, - writer: &mut OutputWriter, -) -> Result<(), Box> { +fn output(results: &Vec, writer: &mut OutputWriter) -> Result<(), Box> { for data in results { - writer.write_record(&data)?; + writer.write_record(data)?; } writer.flush()?; Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 09cc789..618b752 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -105,7 +105,7 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { let entries = paths .flat_map(|path| { - path.inspect_err(|err| { + path.map_err(|err| { error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) }) .ok() From 200ee974f8d7f5a83764d34aabbc4336abeb18fc Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 27 Nov 2024 14:02:31 +0100 Subject: [PATCH 13/13] fix: [parser] cargo fmt fix --- src/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 618b752..dbfdafa 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -105,9 +105,9 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { let entries = paths .flat_map(|path| { - path.map_err(|err| { - error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) - }) + path.map_err( + |err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",), + ) .ok() }) .collect::>();