From aeb58ae1ea1e1376c34d32335b77d4dc003e33f4 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 14:38:17 +0100
Subject: [PATCH 01/13] added a rustfmt.toml file according to the existing
 defaults in this repo. => really helpful when having this as a git submodule
 of another workspace ;-)

---
 rustfmt.toml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 rustfmt.toml

diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..57d44b4
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,5 @@
+newline_style = "Unix"
+tab_spaces = 4
+max_width = 100
+chain_width = 60
+use_small_heuristics = "Default"

From 897f0aa828cb66f75c24247dfeaf208052365b99 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 14:38:34 +0100
Subject: [PATCH 02/13] file case typo

---
 src/dsc.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dsc.rs b/src/dsc.rs
index 3391846..43e837e 100755
--- a/src/dsc.rs
+++ b/src/dsc.rs
@@ -317,7 +317,7 @@ mod tests {
     #[should_panic(expected = "Incomplete(Unknown)")]
     fn test_bad_file() {
         let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-        test_path.push("tests/test_data/Bad Data/DSC/badfile");
+        test_path.push("tests/test_data/Bad Data/DSC/Badfile");
 
         let buffer = fs::read(test_path).unwrap();
         let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();

From ce00e4c3539ef0eb2703e5e4e508613138746ea9 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 14:39:33 +0100
Subject: [PATCH 03/13] - filesystem tests ignored on non macos systems - test
 values changed according to the downloadable test data

---
 src/parser.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 85af918..7942880 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -375,18 +375,21 @@ mod tests {
     use std::path::PathBuf;
 
     #[test]
+    #[cfg(target_os = "macos")]
     fn test_collect_strings_system() {
         let uuidtext_results = collect_strings_system().unwrap();
         assert!(uuidtext_results.len() > 100);
     }
 
     #[test]
+    #[cfg(target_os = "macos")]
     fn test_collect_timesync_system() {
         let timesync_results = collect_timesync_system().unwrap();
         assert!(timesync_results.len() > 1);
     }
 
     #[test]
+    #[cfg(target_os = "macos")]
     fn test_collect_timesync_archive() {
         let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
         test_path.push("tests/test_data/system_logs_big_sur.logarchive/timesync");
@@ -409,6 +412,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(target_os = "macos")]
     fn test_collect_shared_strings_system() {
         let shared_strings_results = collect_shared_strings_system().unwrap();
         assert!(shared_strings_results[0].ranges.len() > 1);
@@ -444,9 +448,9 @@ mod tests {
         let strings_results = collect_strings(&test_path.display().to_string()).unwrap();
         assert_eq!(strings_results.len(), 536);
         assert_eq!(strings_results[0].signature, 1719109785);
-        assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F");
+        assert_eq!(strings_results[0].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1");
         assert_eq!(strings_results[0].entry_descriptors.len(), 2);
-        assert_eq!(strings_results[0].footer_data.len(), 48096);
+        assert_eq!(strings_results[0].footer_data.len(), 238974);
         assert_eq!(strings_results[0].number_entries, 2);
         assert_eq!(strings_results[0].unknown_minor_version, 1);
         assert_eq!(strings_results[0].unknown_major_version, 2);

From 9994b2410634f3c162e126285071a392f5e024c8 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 15:16:52 +0100
Subject: [PATCH 04/13] test_collect_strings_archive assertions back to
 previous values

---
 src/parser.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 7942880..5750375 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -448,9 +448,9 @@ mod tests {
         let strings_results = collect_strings(&test_path.display().to_string()).unwrap();
         assert_eq!(strings_results.len(), 536);
         assert_eq!(strings_results[0].signature, 1719109785);
-        assert_eq!(strings_results[0].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1");
+        assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F");
         assert_eq!(strings_results[0].entry_descriptors.len(), 2);
-        assert_eq!(strings_results[0].footer_data.len(), 238974);
+        assert_eq!(strings_results[0].footer_data.len(), 48096);
         assert_eq!(strings_results[0].number_entries, 2);
         assert_eq!(strings_results[0].unknown_minor_version, 1);
         assert_eq!(strings_results[0].unknown_major_version, 2);

From 6f76f6d59ba5cdb923854f0af3d928692a8697ed Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 15:40:44 +0100
Subject: [PATCH 05/13] collect_strings ordering

---
 src/parser.rs | 51 +++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 5750375..9d1d45b 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -100,31 +100,24 @@ pub fn build_log(
 pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {
     let paths_results = fs::read_dir(path);
 
-    let paths = match paths_results {
-        Ok(path) => path,
-        Err(err) => {
-            error!(
-                "[macos-unifiedlogs] Failed to read directory path: {:?}",
-                err
-            );
-            return Err(ParserError::Dir);
-        }
-    };
-
-    let mut uuidtext_vec: Vec<UUIDText> = Vec::new();
+    let paths = paths_results.map_err(|err| {
+        error!("[macos-unifiedlogs] Failed to read directory path: {err:?}");
+        ParserError::Dir
+    })?;
+
+    let mut entries = paths
+        .flat_map(|path| {
+            path.inspect_err(|err| {
+                error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",)
+            })
+            .ok()
+        })
+        .collect::<Vec<_>>();
+    entries.sort_by(|a, b| a.file_name().as_os_str().cmp(b.file_name().as_os_str()));
+
+    let mut uuidtext_vec: Vec<UUIDText> = Vec::with_capacity(entries.len());
     // Start process to read a directory containing subdirectories that contain the uuidtext files
-    for path in paths {
-        let dir_entry = match path {
-            Ok(entry) => entry,
-            Err(err) => {
-                error!(
-                    "[macos-unifiedlogs] Failed to get directory entry: {:?}",
-                    err
-                );
-                continue;
-            }
-        };
-
+    for dir_entry in entries {
         let type_results = dir_entry.file_type();
         let entry_type = match type_results {
             Ok(dir_type) => dir_type,
@@ -448,12 +441,18 @@ mod tests {
         let strings_results = collect_strings(&test_path.display().to_string()).unwrap();
         assert_eq!(strings_results.len(), 536);
         assert_eq!(strings_results[0].signature, 1719109785);
-        assert_eq!(strings_results[0].uuid, "5283D7FC2531558F2C1ACE9AF26A0F");
+        assert_eq!(strings_results[0].uuid, "B6B65F4DC53ED38FEB0DDF61809853");
         assert_eq!(strings_results[0].entry_descriptors.len(), 2);
-        assert_eq!(strings_results[0].footer_data.len(), 48096);
+        assert_eq!(strings_results[0].footer_data.len(), 1707);
         assert_eq!(strings_results[0].number_entries, 2);
         assert_eq!(strings_results[0].unknown_minor_version, 1);
         assert_eq!(strings_results[0].unknown_major_version, 2);
+
+        assert_eq!(strings_results[1].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1");
+        assert_eq!(strings_results[1].footer_data.len(), 238974);
+
+        assert_eq!(strings_results[2].uuid, "2578ECF07936A6A882574764C7C785");
+        assert_eq!(strings_results[2].footer_data.len(), 68714);
     }
 
     #[test]

From 7c3ece8a434049ca4534c5373af535c49fb7bec9 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 14 Nov 2024 15:42:05 +0100
Subject: [PATCH 06/13] made clippy happy

---
 src/parser.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 9d1d45b..b526785 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -361,10 +361,12 @@ pub fn collect_timesync(path: &str) -> Result<Vec<TimesyncBoot>, ParserError> {
 #[cfg(test)]
 mod tests {
     use crate::parser::{
-        build_log, collect_shared_strings, collect_shared_strings_system, collect_strings,
-        collect_strings_system, collect_timesync, collect_timesync_system, parse_log,
+        build_log, collect_shared_strings, collect_strings, collect_timesync, parse_log,
+    };
+    #[cfg(target_os = "macos")]
+    use crate::parser::{
+        collect_shared_strings_system, collect_strings_system, collect_timesync_system,
     };
-
     use std::path::PathBuf;
 
     #[test]

From d2acbb3606f0f12fff9fbd25988b7ece75625a30 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Tue, 19 Nov 2024 09:48:56 +0100
Subject: [PATCH 07/13] parser tests => sort in the test

---
 src/parser.rs | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index b526785..09cc789 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -98,14 +98,12 @@ pub fn build_log(
 
 /// Parse all UUID files in provided directory. The directory should follow the same layout as the live system (ex: path/to/files/<two character UUID>/<remaining UUID name>)
 pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {
-    let paths_results = fs::read_dir(path);
-
-    let paths = paths_results.map_err(|err| {
+    let paths = fs::read_dir(path).map_err(|err| {
         error!("[macos-unifiedlogs] Failed to read directory path: {err:?}");
         ParserError::Dir
     })?;
 
-    let mut entries = paths
+    let entries = paths
         .flat_map(|path| {
             path.inspect_err(|err| {
                 error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",)
@@ -113,7 +111,6 @@ pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {
             .ok()
         })
         .collect::<Vec<_>>();
-    entries.sort_by(|a, b| a.file_name().as_os_str().cmp(b.file_name().as_os_str()));
 
     let mut uuidtext_vec: Vec<UUIDText> = Vec::with_capacity(entries.len());
     // Start process to read a directory containing subdirectories that contain the uuidtext files
@@ -440,21 +437,24 @@ mod tests {
         let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
         test_path.push("tests/test_data/system_logs_big_sur.logarchive");
 
-        let strings_results = collect_strings(&test_path.display().to_string()).unwrap();
+        let mut strings_results = collect_strings(&test_path.display().to_string()).unwrap();
         assert_eq!(strings_results.len(), 536);
+
+        strings_results.sort_by(|a, b| a.uuid.cmp(&b.uuid));
+
         assert_eq!(strings_results[0].signature, 1719109785);
-        assert_eq!(strings_results[0].uuid, "B6B65F4DC53ED38FEB0DDF61809853");
-        assert_eq!(strings_results[0].entry_descriptors.len(), 2);
-        assert_eq!(strings_results[0].footer_data.len(), 1707);
-        assert_eq!(strings_results[0].number_entries, 2);
+        assert_eq!(strings_results[0].uuid, "004EAF1C2B310DA0383BE3D60B80E8");
+        assert_eq!(strings_results[0].entry_descriptors.len(), 1);
+        assert_eq!(strings_results[0].footer_data.len(), 2847);
+        assert_eq!(strings_results[0].number_entries, 1);
         assert_eq!(strings_results[0].unknown_minor_version, 1);
         assert_eq!(strings_results[0].unknown_major_version, 2);
 
-        assert_eq!(strings_results[1].uuid, "D9B97EA2CD39C7A9AF1888E041B9E1");
-        assert_eq!(strings_results[1].footer_data.len(), 238974);
+        assert_eq!(strings_results[1].uuid, "00B3D870FB3AE8BDC1BA3A60D0B9A0");
+        assert_eq!(strings_results[1].footer_data.len(), 2164);
 
-        assert_eq!(strings_results[2].uuid, "2578ECF07936A6A882574764C7C785");
-        assert_eq!(strings_results[2].footer_data.len(), 68714);
+        assert_eq!(strings_results[2].uuid, "014C44534A3A748476ABD88D376918");
+        assert_eq!(strings_results[2].footer_data.len(), 19011);
     }
 
     #[test]

From 604d39c7de69cbe42a8c5562d634b6512d4dfb21 Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Tue, 19 Nov 2024 15:33:52 +0100
Subject: [PATCH 08/13] chg: [unifiedlog_iterator] support jsonl output and
 more params

---
 examples/unifiedlog_iterator/src/main.rs | 189 +++++++++++++++--------
 1 file changed, 126 insertions(+), 63 deletions(-)

diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs
index bd82e41..54bc75a 100644
--- a/examples/unifiedlog_iterator/src/main.rs
+++ b/examples/unifiedlog_iterator/src/main.rs
@@ -40,6 +40,15 @@ struct Args {
     /// Path to output file. Any directories must already exist
     #[clap(short, long, default_value = "")]
     output: String,
+
+    /// Output format. Options: csv, jsonl. Default is autodetect.
+    #[clap(short, long, default_value = "auto")]
+    format: String,
+
+    /// Append to output file
+    /// If false, will overwrite output file
+    #[clap(short, long, default_value = "false")]
+    append: bool,
 }
 
 fn main() {
@@ -49,9 +58,18 @@ fn main() {
         .expect("Failed to initialize simple logger");
 
     let args = Args::parse();
-    let mut writer = construct_writer(&args.output).unwrap();
-    // Create headers for CSV file
-    output_header(&mut writer).unwrap();
+    let output_format = if args.format.is_empty() || args.format == "auto" {
+        std::path::Path::new(&args.output)
+            .extension()
+            .and_then(std::ffi::OsStr::to_str)
+            .unwrap_or("csv")
+            .to_string()
+    } else {
+        args.format.clone()
+    };
+
+
+    let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();
 
     if args.input != "" {
         parse_log_archive(&args.input, &mut writer);
@@ -61,7 +79,7 @@ fn main() {
 }
 
 // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure
-fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
+fn parse_log_archive(path: &str, writer: &mut OutputWriter) {
     let mut archive_path = PathBuf::from(path);
 
     // Parse all UUID files which contain strings and other metadata
@@ -92,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
 }
 
 // Parse a live macOS system
-fn parse_live_system(writer: &mut Writer<Box<dyn Write>>) {
+fn parse_live_system(writer: &mut OutputWriter) {
     let strings = collect_strings_system().unwrap();
     let shared_strings = collect_shared_strings_system().unwrap();
     let timesync_data = collect_timesync_system().unwrap();
@@ -115,7 +133,7 @@ fn parse_trace_file(
     shared_strings_results: &[SharedCacheStrings],
     timesync_data: &[TimesyncBoot],
     path: &str,
-    writer: &mut Writer<Box<dyn Write>>,
+    writer: &mut OutputWriter,
 ) {
     // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries)
     // Some log entries have Oversize strings located in different tracev3 files.
@@ -302,7 +320,7 @@ fn iterate_chunks(
     strings_data: &[UUIDText],
     shared_strings: &[SharedCacheStrings],
     timesync_data: &[TimesyncBoot],
-    writer: &mut Writer<Box<dyn Write>>,
+    writer: &mut OutputWriter,
     oversize_strings: &mut UnifiedLogData,
 ) -> usize {
     let log_bytes = fs::read(path).unwrap();
@@ -338,71 +356,116 @@ fn iterate_chunks(
     count
 }
 
-fn construct_writer(output_path: &str) -> Result<Writer<Box<dyn Write>>, Box<dyn Error>> {
-    let writer = if output_path != "" {
-        Box::new(
-            OpenOptions::new()
-                .append(true)
-                .create(true)
-                .open(output_path)?,
-        ) as Box<dyn Write>
-    } else {
-        Box::new(io::stdout()) as Box<dyn Write>
-    };
-    Ok(Writer::from_writer(writer))
+pub struct OutputWriter {
+    writer: OutputWriterEnum,
 }
 
-// Create csv file and create headers
-fn output_header(writer: &mut Writer<Box<dyn Write>>) -> Result<(), Box<dyn Error>> {
-    writer.write_record(&[
-        "Timestamp",
-        "Event Type",
-        "Log Type",
-        "Subsystem",
-        "Thread ID",
-        "PID",
-        "EUID",
-        "Library",
-        "Library UUID",
-        "Activity ID",
-        "Category",
-        "Process",
-        "Process UUID",
-        "Message",
-        "Raw Message",
-        "Boot UUID",
-        "System Timezone Name",
-    ])?;
-    writer.flush()?;
-    Ok(())
+enum OutputWriterEnum {
+    Csv(Writer<Box<dyn Write>>),
+    Json(Box<dyn Write>),
 }
 
+impl OutputWriter {
+    pub fn new(output_path: &str, output_format: &str, append: bool) -> Result<Self, Box<dyn Error>> {
+        let writer: Box<dyn Write> = if output_path != "" {
+            Box::new(
+                OpenOptions::new()
+                    .write(true)
+                    .create(true)
+                    .truncate(!append)
+                    .append(append)
+                    .open(output_path)?,
+            )
+        } else {
+            Box::new(io::stdout())
+        };
+
+        let writer_enum = match output_format {
+            "csv" => {
+                let mut csv_writer = Writer::from_writer(writer);
+                // Write CSV headers
+                csv_writer.write_record(&[
+                    "Timestamp",
+                    "Event Type",
+                    "Log Type",
+                    "Subsystem",
+                    "Thread ID",
+                    "PID",
+                    "EUID",
+                    "Library",
+                    "Library UUID",
+                    "Activity ID",
+                    "Category",
+                    "Process",
+                    "Process UUID",
+                    "Message",
+                    "Raw Message",
+                    "Boot UUID",
+                    "System Timezone Name",
+                ])?;
+                csv_writer.flush()?;
+                OutputWriterEnum::Csv(csv_writer)
+            }
+            "jsonl" => OutputWriterEnum::Json(writer),
+            _ => {
+                eprintln!("Unsupported output format: {}", output_format);
+                std::process::exit(1);
+            },
+        };
+
+        Ok(OutputWriter {
+            writer: writer_enum,
+        })
+    }
+
+    pub fn write_record(&mut self, record: &LogData) -> Result<(), Box<dyn Error>> {
+        match &mut self.writer {
+            OutputWriterEnum::Csv(csv_writer) => {
+                let date_time = Utc.timestamp_nanos(record.time as i64);
+                csv_writer.write_record(&[
+                    date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
+                    record.event_type.to_owned(),
+                    record.log_type.to_owned(),
+                    record.subsystem.to_owned(),
+                    record.thread_id.to_string(),
+                    record.pid.to_string(),
+                    record.euid.to_string(),
+                    record.library.to_owned(),
+                    record.library_uuid.to_owned(),
+                    record.activity_id.to_string(),
+                    record.category.to_owned(),
+                    record.process.to_owned(),
+                    record.process_uuid.to_owned(),
+                    record.message.to_owned(),
+                    record.raw_message.to_owned(),
+                    record.boot_uuid.to_owned(),
+                    record.timezone_name.to_owned(),
+                ])?;
+            }
+            OutputWriterEnum::Json(json_writer) => {
+                writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?;
+            }
+        }
+        Ok(())
+    }
+
+    pub fn flush(&mut self) -> Result<(), Box<dyn Error>> {
+        match &mut self.writer {
+            OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?,
+            OutputWriterEnum::Json(json_writer) => json_writer.flush()?,
+        }
+        Ok(())
+    }
+}
+
+
 // Append or create csv file
 fn output(
     results: &Vec<LogData>,
-    writer: &mut Writer<Box<dyn Write>>,
+    writer: &mut OutputWriter,
 ) -> Result<(), Box<dyn Error>> {
     for data in results {
-        let date_time = Utc.timestamp_nanos(data.time as i64);
-        writer.write_record(&[
-            date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
-            data.event_type.to_owned(),
-            data.log_type.to_owned(),
-            data.subsystem.to_owned(),
-            data.thread_id.to_string(),
-            data.pid.to_string(),
-            data.euid.to_string(),
-            data.library.to_owned(),
-            data.library_uuid.to_owned(),
-            data.activity_id.to_string(),
-            data.category.to_owned(),
-            data.process.to_owned(),
-            data.process_uuid.to_owned(),
-            data.message.to_owned(),
-            data.raw_message.to_owned(),
-            data.boot_uuid.to_owned(),
-            data.timezone_name.to_owned(),
-        ])?;
+        writer.write_record(&data)?;
     }
     writer.flush()?;
     Ok(())

From 8e9ffb45839e32ed47546d977273d1e1ab7d3d41 Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Tue, 19 Nov 2024 15:43:12 +0100
Subject: [PATCH 09/13] fix: [unifiedlog_iterator] fix forgotten serde
 dependency

---
 examples/unifiedlog_iterator/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/unifiedlog_iterator/Cargo.toml b/examples/unifiedlog_iterator/Cargo.toml
index 24372c5..a2dcea6 100644
--- a/examples/unifiedlog_iterator/Cargo.toml
+++ b/examples/unifiedlog_iterator/Cargo.toml
@@ -10,5 +10,6 @@ simplelog = "0.12.2"
 csv = "1.3.0"
 chrono = "0.4.38"
 log = "0.4.22"
+serde_json = "1.0.122"
 macos-unifiedlogs = {path = "../../"}
 clap = {version = "4.5.18", features = ["derive"]}
\ No newline at end of file

From 51e6026d2a933b445e1c01b4a20f4c883e853a97 Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 21 Nov 2024 16:20:39 +0100
Subject: [PATCH 10/13] shorter logpreamble parse implementation by more nom
 combinators usage

---
 Cargo.toml      |  1 +
 src/lib.rs      |  1 +
 src/preamble.rs | 58 ++++++++++++++++++++++++++-----------------------
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index b29356e..9f0a356 100755
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ simplelog = "0.12.2"
 csv = "1.3.0"
 chrono = "0.4.38"
 criterion = "0.5.1"
+anyhow = "1.0.93"
 
 [[bench]]
 name = "high_sierra_benchmark"
diff --git a/src/lib.rs b/src/lib.rs
index 9a4d9f5..c22603d 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,6 +31,7 @@
     clippy::checked_conversions,
     clippy::unnecessary_cast
 )]
+
 mod catalog;
 mod chunks;
 mod chunkset;
diff --git a/src/preamble.rs b/src/preamble.rs
index ea8e0da..a6b1052 100644
--- a/src/preamble.rs
+++ b/src/preamble.rs
@@ -5,41 +5,41 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and limitations under the License.
 
-use std::mem::size_of;
-
 use nom::{
-    bytes::complete::take,
     number::complete::{le_u32, le_u64},
+    sequence::tuple,
+    IResult,
 };
 
-#[derive(Debug)]
+#[derive(Debug, Clone, Copy)]
 pub struct LogPreamble {
     pub chunk_tag: u32,
     pub chunk_sub_tag: u32,
     pub chunk_data_size: u64,
 }
+
 impl LogPreamble {
     /// Get the preamble (first 16 bytes of all Unified Log entries (chunks)) to detect the log (chunk) type. Ex: Firehose, Statedump, Simpledump, Catalog, etc
-    pub fn detect_preamble(data: &[u8]) -> nom::IResult<&[u8], LogPreamble> {
-        let mut preamble = LogPreamble {
-            chunk_tag: 0,
-            chunk_sub_tag: 0,
-            chunk_data_size: 0,
-        };
-
-        let (input, tag) = take(size_of::<u32>())(data)?;
-        let (input, sub_tag) = take(size_of::<u32>())(input)?;
-        let (input, data_size) = take(size_of::<u64>())(input)?;
-
-        let (_, trace_tag) = le_u32(tag)?;
-        let (_, trace_sub_tag) = le_u32(sub_tag)?;
-        let (_, trace_data_size) = le_u64(data_size)?;
-
-        preamble.chunk_tag = trace_tag;
-        preamble.chunk_sub_tag = trace_sub_tag;
-        preamble.chunk_data_size = trace_data_size;
+    /// Do not consume the input
+    pub fn detect_preamble(input: &[u8]) -> IResult<&[u8], Self> {
+        let (_, preamble) = Self::parse(input)?;
         Ok((input, preamble))
     }
+
+    /// Get the preamble (first 16 bytes of all Unified Log entries (chunks)) to detect the log (chunk) type. Ex: Firehose, Statedump, Simpledump, Catalog, etc
+    /// And consume the input
+    pub fn parse(input: &[u8]) -> IResult<&[u8], Self> {
+        let (input, (chunk_tag, chunk_sub_tag, chunk_data_size)) =
+            tuple((le_u32, le_u32, le_u64))(input)?;
+        Ok((
+            input,
+            LogPreamble {
+                chunk_tag,
+                chunk_sub_tag,
+                chunk_data_size,
+            },
+        ))
+    }
 }
 
 #[cfg(test)]
@@ -47,22 +47,26 @@ mod tests {
     use super::LogPreamble;
 
     #[test]
-    fn test_detect_preamble() {
-        let test_preamble_header = [
+    fn test_detect_preamble() -> anyhow::Result<()> {
+        let test_preamble_header = &[
             0, 16, 0, 0, 17, 0, 0, 0, 208, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         ];
 
-        let (_, preamble_data) = LogPreamble::detect_preamble(&test_preamble_header).unwrap();
+        let (output, preamble_data) = LogPreamble::detect_preamble(test_preamble_header)?;
 
+        assert_eq!(output, test_preamble_header);
         assert_eq!(preamble_data.chunk_tag, 0x1000);
         assert_eq!(preamble_data.chunk_sub_tag, 0x11);
         assert_eq!(preamble_data.chunk_data_size, 0xd0);
 
-        let test_catalog_chunk = [11, 96, 0, 0, 17, 0, 0, 0, 176, 31, 0, 0, 0, 0, 0, 0];
-        let (_, preamble_data) = LogPreamble::detect_preamble(&test_catalog_chunk).unwrap();
+        let test_catalog_chunk = &[11, 96, 0, 0, 17, 0, 0, 0, 176, 31, 0, 0, 0, 0, 0, 0];
+        let (output, preamble_data) = LogPreamble::parse(test_catalog_chunk)?;
 
+        assert_eq!(output.len(), 0);
         assert_eq!(preamble_data.chunk_tag, 0x600b);
         assert_eq!(preamble_data.chunk_sub_tag, 0x11);
         assert_eq!(preamble_data.chunk_data_size, 0x1fb0);
+
+        Ok(())
     }
 }

From ef252b1b8386536623925b495f14ec85cc4a86ae Mon Sep 17 00:00:00 2001
From: jrx <jerome.git@eventuallyconsultant.com>
Date: Thu, 21 Nov 2024 16:53:50 +0100
Subject: [PATCH 11/13] added Unicode-3.0 license allowed

---
 deny.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deny.toml b/deny.toml
index e67fffc..498541b 100644
--- a/deny.toml
+++ b/deny.toml
@@ -105,6 +105,7 @@ allow = [
     "BSL-1.0",
     "Unlicense",
     "Unicode-DFS-2016",
+    "Unicode-3.0",
 ]
 # List of explicitly disallowed licenses
 # See https://spdx.org/licenses/ for list of possible licenses

From c9eb4be2c0afc86e9ac4f33096ca650498ccbc2c Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Wed, 27 Nov 2024 14:01:08 +0100
Subject: [PATCH 12/13] fix: [unifiedlog_iterator] syntax corrections + parser
 fix

---
 examples/unifiedlog_iterator/src/main.rs | 45 ++++++++++++------------
 src/parser.rs                            |  2 +-
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs
index 54bc75a..3e08d64 100644
--- a/examples/unifiedlog_iterator/src/main.rs
+++ b/examples/unifiedlog_iterator/src/main.rs
@@ -68,10 +68,9 @@ fn main() {
         args.format.clone()
     };
 
-
     let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();
 
-    if args.input != "" {
+    if !args.input.is_empty() {
         parse_log_archive(&args.input, &mut writer);
     } else if args.live != "false" {
         parse_live_system(&mut writer);
@@ -160,7 +159,7 @@ fn parse_trace_file(
             eprintln!("Parsing: {}", full_path);
 
             if data.path().exists() {
-              let count =  iterate_chunks(
+                let count = iterate_chunks(
                     &full_path,
                     &mut missing_data,
                     string_results,
@@ -190,7 +189,7 @@ fn parse_trace_file(
             eprintln!("Parsing: {}", full_path);
 
             if data.path().exists() {
-                let count =  iterate_chunks(
+                let count = iterate_chunks(
                     &full_path,
                     &mut missing_data,
                     string_results,
@@ -220,7 +219,7 @@ fn parse_trace_file(
             eprintln!("Parsing: {}", full_path);
 
             if data.path().exists() {
-                let count =  iterate_chunks(
+                let count = iterate_chunks(
                     &full_path,
                     &mut missing_data,
                     string_results,
@@ -249,7 +248,7 @@ fn parse_trace_file(
             eprintln!("Parsing: {}", full_path);
 
             if data.path().exists() {
-                let count =  iterate_chunks(
+                let count = iterate_chunks(
                     &full_path,
                     &mut missing_data,
                     string_results,
@@ -273,7 +272,7 @@ fn parse_trace_file(
     if archive_path.exists() {
         eprintln!("Parsing: logdata.LiveData.tracev3");
 
-        let count =  iterate_chunks(
+        let count = iterate_chunks(
             &archive_path.display().to_string(),
             &mut missing_data,
             string_results,
@@ -294,8 +293,7 @@ fn parse_trace_file(
     // Since we have all Oversize entries now. Go through any log entries that we were not able to build before
     for mut leftover_data in missing_data {
         // Add all of our previous oversize data to logs for lookups
-        leftover_data
-            .oversize = oversize_strings.oversize.clone();
+        leftover_data.oversize = oversize_strings.oversize.clone();
 
         // Exclude_missing = false
         // If we fail to find any missing data its probably due to the logs rolling
@@ -346,7 +344,10 @@ fn iterate_chunks(
         count += results.len();
         oversize_strings.oversize = chunk.oversize;
         output(&results, writer).unwrap();
-        if missing_logs.catalog_data.is_empty() && missing_logs.header.is_empty() && missing_logs.oversize.is_empty() {
+        if missing_logs.catalog_data.is_empty()
+            && missing_logs.header.is_empty()
+            && missing_logs.oversize.is_empty()
+        {
             continue;
         }
         // Track possible missing log data due to oversize strings being in another file
@@ -361,13 +362,17 @@ pub struct OutputWriter {
 }
 
 enum OutputWriterEnum {
-    Csv(Writer<Box<dyn Write>>),
+    Csv(Box<Writer<Box<dyn Write>>>),
     Json(Box<dyn Write>),
 }
 
 impl OutputWriter {
-    pub fn new(output_path: &str, output_format: &str, append: bool) -> Result<Self, Box<dyn Error>> {
-        let writer: Box<dyn Write> = if output_path != "" {
+    pub fn new(
+        output_path: &str,
+        output_format: &str,
+        append: bool,
+    ) -> Result<Self, Box<dyn Error>> {
+        let writer: Box<dyn Write> = if !output_path.is_empty() {
             Box::new(
                 OpenOptions::new()
                     .write(true)
@@ -384,7 +389,7 @@ impl OutputWriter {
             "csv" => {
                 let mut csv_writer = Writer::from_writer(writer);
                 // Write CSV headers
-                csv_writer.write_record(&[
+                csv_writer.write_record([
                     "Timestamp",
                     "Event Type",
                     "Log Type",
@@ -404,13 +409,13 @@ impl OutputWriter {
                     "System Timezone Name",
                 ])?;
                 csv_writer.flush()?;
-                OutputWriterEnum::Csv(csv_writer)
+                OutputWriterEnum::Csv(Box::new(csv_writer))
             }
             "jsonl" => OutputWriterEnum::Json(writer),
             _ => {
                 eprintln!("Unsupported output format: {}", output_format);
                 std::process::exit(1);
-            },
+            }
         };
 
         Ok(OutputWriter {
@@ -458,14 +463,10 @@ impl OutputWriter {
     }
 }
 
-
 // Append or create csv file
-fn output(
-    results: &Vec<LogData>,
-    writer: &mut OutputWriter,
-) -> Result<(), Box<dyn Error>> {
+fn output(results: &Vec<LogData>, writer: &mut OutputWriter) -> Result<(), Box<dyn Error>> {
     for data in results {
-        writer.write_record(&data)?;
+        writer.write_record(data)?;
     }
     writer.flush()?;
     Ok(())
diff --git a/src/parser.rs b/src/parser.rs
index 09cc789..618b752 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -105,7 +105,7 @@ pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {
 
     let entries = paths
         .flat_map(|path| {
-            path.inspect_err(|err| {
+            path.map_err(|err| {
                 error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",)
             })
             .ok()

From 200ee974f8d7f5a83764d34aabbc4336abeb18fc Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Wed, 27 Nov 2024 14:02:31 +0100
Subject: [PATCH 13/13] fix: [parser] cargo fmt fix

---
 src/parser.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 618b752..dbfdafa 100755
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -105,9 +105,9 @@ pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {
 
     let entries = paths
         .flat_map(|path| {
-            path.map_err(|err| {
-                error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",)
-            })
+            path.map_err(
+                |err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",),
+            )
             .ok()
         })
         .collect::<Vec<_>>();