From 604d39c7de69cbe42a8c5562d634b6512d4dfb21 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 19 Nov 2024 15:33:52 +0100 Subject: [PATCH] chg: [unifiedlog_iterator] support jsonl output and more params --- examples/unifiedlog_iterator/src/main.rs | 189 +++++++++++++++-------- 1 file changed, 126 insertions(+), 63 deletions(-) diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index bd82e41..54bc75a 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -40,6 +40,15 @@ struct Args { /// Path to output file. Any directories must already exist #[clap(short, long, default_value = "")] output: String, + + /// Output format. Options: csv, jsonl. Default is autodetect. + #[clap(short, long, default_value = "auto")] + format: String, + + /// Append to output file + /// If false, will overwrite output file + #[clap(short, long, default_value = "false")] + append: bool, } fn main() { @@ -49,9 +58,18 @@ fn main() { .expect("Failed to initialize simple logger"); let args = Args::parse(); - let mut writer = construct_writer(&args.output).unwrap(); - // Create headers for CSV file - output_header(&mut writer).unwrap(); + let output_format = if args.format.is_empty() || args.format == "auto" { + std::path::Path::new(&args.output) + .extension() + .and_then(std::ffi::OsStr::to_str) + .unwrap_or("csv") + .to_string() + } else { + args.format.clone() + }; + + + let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); if args.input != "" { parse_log_archive(&args.input, &mut writer); @@ -61,7 +79,7 @@ fn main() { } // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut Writer>) { +fn parse_log_archive(path: &str, writer: &mut OutputWriter) { let mut archive_path = PathBuf::from(path); // Parse all UUID files which contain strings and other metadata @@ -92,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { } // Parse a live macOS system -fn parse_live_system(writer: &mut Writer>) { +fn parse_live_system(writer: &mut OutputWriter) { let strings = collect_strings_system().unwrap(); let shared_strings = collect_shared_strings_system().unwrap(); let timesync_data = collect_timesync_system().unwrap(); @@ -115,7 +133,7 @@ fn parse_trace_file( shared_strings_results: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], path: &str, - writer: &mut Writer>, + writer: &mut OutputWriter, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -302,7 +320,7 @@ fn iterate_chunks( strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], - writer: &mut Writer>, + writer: &mut OutputWriter, oversize_strings: &mut UnifiedLogData, ) -> usize { let log_bytes = fs::read(path).unwrap(); @@ -338,71 +356,116 @@ fn iterate_chunks( count } -fn construct_writer(output_path: &str) -> Result>, Box> { - let writer = if output_path != "" { - Box::new( - OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?, - ) as Box - } else { - Box::new(io::stdout()) as Box - }; - Ok(Writer::from_writer(writer)) +pub struct OutputWriter { + writer: OutputWriterEnum, } -// Create csv file and create headers -fn output_header(writer: &mut Writer>) -> Result<(), Box> { - writer.write_record(&[ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - writer.flush()?; - Ok(()) +enum OutputWriterEnum { + Csv(Writer>), + Json(Box), } +impl OutputWriter { + pub fn new(output_path: &str, output_format: &str, append: bool) -> Result> { + let writer: Box = if output_path != "" { + Box::new( + OpenOptions::new() + .write(true) + .create(true) + .truncate(!append) + .append(append) + .open(output_path)?, + ) + } else { + Box::new(io::stdout()) + }; + + let writer_enum = match output_format { + "csv" => { + let mut csv_writer = Writer::from_writer(writer); + // Write CSV headers + csv_writer.write_record(&[ + "Timestamp", + "Event Type", + "Log Type", + "Subsystem", + "Thread ID", + "PID", + "EUID", + "Library", + "Library UUID", + "Activity ID", + "Category", + "Process", + "Process UUID", + "Message", + "Raw Message", + "Boot UUID", + "System Timezone Name", + ])?; + csv_writer.flush()?; + OutputWriterEnum::Csv(csv_writer) + } + "jsonl" => OutputWriterEnum::Json(writer), + _ => { + eprintln!("Unsupported output format: {}", output_format); + std::process::exit(1); + }, + }; + + Ok(OutputWriter { + writer: writer_enum, + }) + } + + pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => { + let date_time = Utc.timestamp_nanos(record.time as i64); + csv_writer.write_record(&[ + date_time.to_rfc3339_opts(SecondsFormat::Millis, true), + record.event_type.to_owned(), + record.log_type.to_owned(), + record.subsystem.to_owned(), + record.thread_id.to_string(), + record.pid.to_string(), + record.euid.to_string(), + record.library.to_owned(), + record.library_uuid.to_owned(), + record.activity_id.to_string(), + record.category.to_owned(), + record.process.to_owned(), + record.process_uuid.to_owned(), + record.message.to_owned(), + record.raw_message.to_owned(), + record.boot_uuid.to_owned(), + record.timezone_name.to_owned(), + ])?; + } + OutputWriterEnum::Json(json_writer) => { + writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, + OutputWriterEnum::Json(json_writer) => json_writer.flush()?, + } + Ok(()) + } +} + + // Append or create csv file fn output( results: &Vec, - writer: &mut Writer>, + writer: &mut OutputWriter, ) -> Result<(), Box> { for data in results { - let date_time = Utc.timestamp_nanos(data.time as i64); - writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - data.event_type.to_owned(), - data.log_type.to_owned(), - data.subsystem.to_owned(), - data.thread_id.to_string(), - data.pid.to_string(), - data.euid.to_string(), - data.library.to_owned(), - data.library_uuid.to_owned(), - data.activity_id.to_string(), - data.category.to_owned(), - data.process.to_owned(), - data.process_uuid.to_owned(), - data.message.to_owned(), - data.raw_message.to_owned(), - data.boot_uuid.to_owned(), - data.timezone_name.to_owned(), - ])?; + writer.write_record(&data)?; } writer.flush()?; Ok(())