Skip to content

Commit

Permalink
Merge branch 'main' into timesync-hashmap
Browse files Browse the repository at this point in the history
  • Loading branch information
puffyCid committed Nov 29, 2024
2 parents 3630d0e + 21a286f commit 83793da
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 129 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ simplelog = "0.12.2"
csv = "1.3.1"
chrono = "0.4.38"
criterion = "0.5.1"
anyhow = "1.0.93"

[[bench]]
name = "high_sierra_benchmark"
Expand Down
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ allow = [
"BSL-1.0",
"Unlicense",
"Unicode-DFS-2016",
"Unicode-3.0",
]
# List of explicitly disallowed licenses
# See https://spdx.org/licenses/ for list of possible licenses
Expand Down
1 change: 1 addition & 0 deletions examples/unifiedlog_iterator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ simplelog = "0.12.2"
csv = "1.3.0"
chrono = "0.4.38"
log = "0.4.22"
serde_json = "1.0.122"
macos-unifiedlogs = {path = "../../"}
clap = {version = "4.5.18", features = ["derive"]}
196 changes: 129 additions & 67 deletions examples/unifiedlog_iterator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ struct Args {
/// Path to output file. Any directories must already exist
#[clap(short, long, default_value = "")]
output: String,

/// Output format. Options: csv, jsonl. Default is autodetect.
#[clap(short, long, default_value = "auto")]
format: String,

/// Append to output file
/// If false, will overwrite output file
#[clap(short, long, default_value = "false")]
append: bool,
}

fn main() {
Expand All @@ -50,19 +59,27 @@ fn main() {
.expect("Failed to initialize simple logger");

let args = Args::parse();
let mut writer = construct_writer(&args.output).unwrap();
// Create headers for CSV file
output_header(&mut writer).unwrap();
let output_format = if args.format.is_empty() || args.format == "auto" {
std::path::Path::new(&args.output)
.extension()
.and_then(std::ffi::OsStr::to_str)
.unwrap_or("csv")
.to_string()
} else {
args.format.clone()
};

if args.input != "" {
let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();

if !args.input.is_empty() {
parse_log_archive(&args.input, &mut writer);
} else if args.live != "false" {
parse_live_system(&mut writer);
}
}

// Parse a provided directory path. Currently, expect the path to follow macOS log collect structure
fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
fn parse_log_archive(path: &str, writer: &mut OutputWriter) {
let mut archive_path = PathBuf::from(path);

// Parse all UUID files which contain strings and other metadata
Expand Down Expand Up @@ -93,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
}

// Parse a live macOS system
fn parse_live_system(writer: &mut Writer<Box<dyn Write>>) {
fn parse_live_system(writer: &mut OutputWriter) {
let strings = collect_strings_system().unwrap();
let shared_strings = collect_shared_strings_system().unwrap();
let timesync_data = collect_timesync_system().unwrap();
Expand All @@ -116,7 +133,7 @@ fn parse_trace_file(
shared_strings_results: &[SharedCacheStrings],
timesync_data: &HashMap<String, TimesyncBoot>,
path: &str,
writer: &mut Writer<Box<dyn Write>>,
writer: &mut OutputWriter,
) {
// We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries)
// Some log entries have Oversize strings located in different tracev3 files.
Expand Down Expand Up @@ -302,7 +319,7 @@ fn iterate_chunks(
strings_data: &[UUIDText],
shared_strings: &[SharedCacheStrings],
timesync_data: &HashMap<String, TimesyncBoot>,
writer: &mut Writer<Box<dyn Write>>,
writer: &mut OutputWriter,
oversize_strings: &mut UnifiedLogData,
) -> usize {
let log_bytes = fs::read(path).unwrap();
Expand Down Expand Up @@ -341,71 +358,116 @@ fn iterate_chunks(
count
}

fn construct_writer(output_path: &str) -> Result<Writer<Box<dyn Write>>, Box<dyn Error>> {
let writer = if output_path != "" {
Box::new(
OpenOptions::new()
.append(true)
.create(true)
.open(output_path)?,
) as Box<dyn Write>
} else {
Box::new(io::stdout()) as Box<dyn Write>
};
Ok(Writer::from_writer(writer))
pub struct OutputWriter {
writer: OutputWriterEnum,
}

// Create csv file and create headers
fn output_header(writer: &mut Writer<Box<dyn Write>>) -> Result<(), Box<dyn Error>> {
writer.write_record(&[
"Timestamp",
"Event Type",
"Log Type",
"Subsystem",
"Thread ID",
"PID",
"EUID",
"Library",
"Library UUID",
"Activity ID",
"Category",
"Process",
"Process UUID",
"Message",
"Raw Message",
"Boot UUID",
"System Timezone Name",
])?;
writer.flush()?;
Ok(())
enum OutputWriterEnum {
Csv(Box<Writer<Box<dyn Write>>>),
Json(Box<dyn Write>),
}

impl OutputWriter {
pub fn new(
output_path: &str,
output_format: &str,
append: bool,
) -> Result<Self, Box<dyn Error>> {
let writer: Box<dyn Write> = if !output_path.is_empty() {
Box::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(!append)
.append(append)
.open(output_path)?,
)
} else {
Box::new(io::stdout())
};

let writer_enum = match output_format {
"csv" => {
let mut csv_writer = Writer::from_writer(writer);
// Write CSV headers
csv_writer.write_record([
"Timestamp",
"Event Type",
"Log Type",
"Subsystem",
"Thread ID",
"PID",
"EUID",
"Library",
"Library UUID",
"Activity ID",
"Category",
"Process",
"Process UUID",
"Message",
"Raw Message",
"Boot UUID",
"System Timezone Name",
])?;
csv_writer.flush()?;
OutputWriterEnum::Csv(Box::new(csv_writer))
}
"jsonl" => OutputWriterEnum::Json(writer),
_ => {
eprintln!("Unsupported output format: {}", output_format);
std::process::exit(1);
}
};

Ok(OutputWriter {
writer: writer_enum,
})
}

pub fn write_record(&mut self, record: &LogData) -> Result<(), Box<dyn Error>> {
match &mut self.writer {
OutputWriterEnum::Csv(csv_writer) => {
let date_time = Utc.timestamp_nanos(record.time as i64);
csv_writer.write_record(&[
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
record.event_type.to_owned(),
record.log_type.to_owned(),
record.subsystem.to_owned(),
record.thread_id.to_string(),
record.pid.to_string(),
record.euid.to_string(),
record.library.to_owned(),
record.library_uuid.to_owned(),
record.activity_id.to_string(),
record.category.to_owned(),
record.process.to_owned(),
record.process_uuid.to_owned(),
record.message.to_owned(),
record.raw_message.to_owned(),
record.boot_uuid.to_owned(),
record.timezone_name.to_owned(),
])?;
}
OutputWriterEnum::Json(json_writer) => {
writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?;
}
}
Ok(())
}

pub fn flush(&mut self) -> Result<(), Box<dyn Error>> {
match &mut self.writer {
OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?,
OutputWriterEnum::Json(json_writer) => json_writer.flush()?,
}
Ok(())
}
}

// Append or create csv file
fn output(
results: &Vec<LogData>,
writer: &mut Writer<Box<dyn Write>>,
) -> Result<(), Box<dyn Error>> {
fn output(results: &Vec<LogData>, writer: &mut OutputWriter) -> Result<(), Box<dyn Error>> {
for data in results {
let date_time = Utc.timestamp_nanos(data.time as i64);
writer.write_record(&[
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
data.event_type.to_owned(),
data.log_type.to_owned(),
data.subsystem.to_owned(),
data.thread_id.to_string(),
data.pid.to_string(),
data.euid.to_string(),
data.library.to_owned(),
data.library_uuid.to_owned(),
data.activity_id.to_string(),
data.category.to_owned(),
data.process.to_owned(),
data.process_uuid.to_owned(),
data.message.to_owned(),
data.raw_message.to_owned(),
data.boot_uuid.to_owned(),
data.timezone_name.to_owned(),
])?;
writer.write_record(data)?;
}
writer.flush()?;
Ok(())
Expand Down
5 changes: 5 additions & 0 deletions rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
newline_style = "Unix"
tab_spaces = 4
max_width = 100
chain_width = 60
use_small_heuristics = "Default"
2 changes: 1 addition & 1 deletion src/dsc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ mod tests {
#[should_panic(expected = "Incomplete(Unknown)")]
fn test_bad_file() {
let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
test_path.push("tests/test_data/Bad Data/DSC/badfile");
test_path.push("tests/test_data/Bad Data/DSC/Badfile");

let buffer = fs::read(test_path).unwrap();
let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
clippy::checked_conversions,
clippy::unnecessary_cast
)]

mod catalog;
mod chunks;
mod chunkset;
Expand Down
Loading

0 comments on commit 83793da

Please sign in to comment.