diff --git a/examples/unifiedlog_iterator/Cargo.toml b/examples/unifiedlog_iterator/Cargo.toml index 24372c5..a2dcea6 100644 --- a/examples/unifiedlog_iterator/Cargo.toml +++ b/examples/unifiedlog_iterator/Cargo.toml @@ -10,5 +10,6 @@ simplelog = "0.12.2" csv = "1.3.0" chrono = "0.4.38" log = "0.4.22" +serde_json = "1.0.122" macos-unifiedlogs = {path = "../../"} clap = {version = "4.5.18", features = ["derive"]} \ No newline at end of file diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index bd82e41..3e08d64 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -40,6 +40,15 @@ struct Args { /// Path to output file. Any directories must already exist #[clap(short, long, default_value = "")] output: String, + + /// Output format. Options: csv, jsonl. Default is autodetect. + #[clap(short, long, default_value = "auto")] + format: String, + + /// Append to output file + /// If false, will overwrite output file + #[clap(short, long, default_value = "false")] + append: bool, } fn main() { @@ -49,11 +58,19 @@ fn main() { .expect("Failed to initialize simple logger"); let args = Args::parse(); - let mut writer = construct_writer(&args.output).unwrap(); - // Create headers for CSV file - output_header(&mut writer).unwrap(); + let output_format = if args.format.is_empty() || args.format == "auto" { + std::path::Path::new(&args.output) + .extension() + .and_then(std::ffi::OsStr::to_str) + .unwrap_or("csv") + .to_string() + } else { + args.format.clone() + }; - if args.input != "" { + let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); + + if !args.input.is_empty() { parse_log_archive(&args.input, &mut writer); } else if args.live != "false" { parse_live_system(&mut writer); @@ -61,7 +78,7 @@ fn main() { } // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut Writer>) { +fn parse_log_archive(path: &str, writer: &mut OutputWriter) { let mut archive_path = PathBuf::from(path); // Parse all UUID files which contain strings and other metadata @@ -92,7 +109,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { } // Parse a live macOS system -fn parse_live_system(writer: &mut Writer>) { +fn parse_live_system(writer: &mut OutputWriter) { let strings = collect_strings_system().unwrap(); let shared_strings = collect_shared_strings_system().unwrap(); let timesync_data = collect_timesync_system().unwrap(); @@ -115,7 +132,7 @@ fn parse_trace_file( shared_strings_results: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], path: &str, - writer: &mut Writer>, + writer: &mut OutputWriter, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -142,7 +159,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -172,7 +189,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -202,7 +219,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -231,7 +248,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -255,7 +272,7 @@ fn parse_trace_file( if archive_path.exists() { eprintln!("Parsing: logdata.LiveData.tracev3"); - let count = iterate_chunks( + let count = iterate_chunks( &archive_path.display().to_string(), &mut missing_data, string_results, @@ -276,8 +293,7 @@ fn parse_trace_file( // Since we have all Oversize entries now. Go through any log entries that we were not able to build before for mut leftover_data in missing_data { // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize = oversize_strings.oversize.clone(); + leftover_data.oversize = oversize_strings.oversize.clone(); // Exclude_missing = false // If we fail to find any missing data its probably due to the logs rolling @@ -302,7 +318,7 @@ fn iterate_chunks( strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], - writer: &mut Writer>, + writer: &mut OutputWriter, oversize_strings: &mut UnifiedLogData, ) -> usize { let log_bytes = fs::read(path).unwrap(); @@ -328,7 +344,10 @@ fn iterate_chunks( count += results.len(); oversize_strings.oversize = chunk.oversize; output(&results, writer).unwrap(); - if missing_logs.catalog_data.is_empty() && missing_logs.header.is_empty() && missing_logs.oversize.is_empty() { + if missing_logs.catalog_data.is_empty() + && missing_logs.header.is_empty() + && missing_logs.oversize.is_empty() + { continue; } // Track possible missing log data due to oversize strings being in another file @@ -338,71 +357,116 @@ fn iterate_chunks( count } -fn construct_writer(output_path: &str) -> Result>, Box> { - let writer = if output_path != "" { - Box::new( - OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?, - ) as Box - } else { - Box::new(io::stdout()) as Box - }; - Ok(Writer::from_writer(writer)) +pub struct OutputWriter { + writer: OutputWriterEnum, } -// Create csv file and create headers -fn output_header(writer: &mut Writer>) -> Result<(), Box> { - writer.write_record(&[ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - writer.flush()?; - Ok(()) +enum OutputWriterEnum { + Csv(Box>>), + Json(Box), +} + +impl OutputWriter { + pub fn new( + output_path: &str, + output_format: &str, + append: bool, + ) -> Result> { + let writer: Box = if !output_path.is_empty() { + Box::new( + OpenOptions::new() + .write(true) + .create(true) + .truncate(!append) + .append(append) + .open(output_path)?, + ) + } else { + Box::new(io::stdout()) + }; + + let writer_enum = match output_format { + "csv" => { + let mut csv_writer = Writer::from_writer(writer); + // Write CSV headers + csv_writer.write_record([ + "Timestamp", + "Event Type", + "Log Type", + "Subsystem", + "Thread ID", + "PID", + "EUID", + "Library", + "Library UUID", + "Activity ID", + "Category", + "Process", + "Process UUID", + "Message", + "Raw Message", + "Boot UUID", + "System Timezone Name", + ])?; + csv_writer.flush()?; + OutputWriterEnum::Csv(Box::new(csv_writer)) + } + "jsonl" => OutputWriterEnum::Json(writer), + _ => { + eprintln!("Unsupported output format: {}", output_format); + std::process::exit(1); + } + }; + + Ok(OutputWriter { + writer: writer_enum, + }) + } + + pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => { + let date_time = Utc.timestamp_nanos(record.time as i64); + csv_writer.write_record(&[ + date_time.to_rfc3339_opts(SecondsFormat::Millis, true), + record.event_type.to_owned(), + record.log_type.to_owned(), + record.subsystem.to_owned(), + record.thread_id.to_string(), + record.pid.to_string(), + record.euid.to_string(), + record.library.to_owned(), + record.library_uuid.to_owned(), + record.activity_id.to_string(), + record.category.to_owned(), + record.process.to_owned(), + record.process_uuid.to_owned(), + record.message.to_owned(), + record.raw_message.to_owned(), + record.boot_uuid.to_owned(), + record.timezone_name.to_owned(), + ])?; + } + OutputWriterEnum::Json(json_writer) => { + writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, + OutputWriterEnum::Json(json_writer) => json_writer.flush()?, + } + Ok(()) + } } // Append or create csv file -fn output( - results: &Vec, - writer: &mut Writer>, -) -> Result<(), Box> { +fn output(results: &Vec, writer: &mut OutputWriter) -> Result<(), Box> { for data in results { - let date_time = Utc.timestamp_nanos(data.time as i64); - writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - data.event_type.to_owned(), - data.log_type.to_owned(), - data.subsystem.to_owned(), - data.thread_id.to_string(), - data.pid.to_string(), - data.euid.to_string(), - data.library.to_owned(), - data.library_uuid.to_owned(), - data.activity_id.to_string(), - data.category.to_owned(), - data.process.to_owned(), - data.process_uuid.to_owned(), - data.message.to_owned(), - data.raw_message.to_owned(), - data.boot_uuid.to_owned(), - data.timezone_name.to_owned(), - ])?; + writer.write_record(data)?; } writer.flush()?; Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 09cc789..dbfdafa 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -105,9 +105,9 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { let entries = paths .flat_map(|path| { - path.inspect_err(|err| { - error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) - }) + path.map_err( + |err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",), + ) .ok() }) .collect::>();