From f1f68a6747dc6a2e2004a4f691b1c8f4dfa281a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Euri=C5=A1?= Date: Thu, 23 Nov 2023 15:14:32 +0100 Subject: [PATCH] feat: YAML serializer (#53) * feat: draft an implementation of a YAML serializer for protobufs. This introduces the `yara-x-proto-yaml` crate, which has a minimalistic API that takes a protobuf message and produces a YAML representation of it. * fix: make the order of map entries stable The `map` type in a protobuf is internally represented by a `HashMap`, which doesn't guarantee that items are always iterated in the the same order. The order may vary between executions, even if the map's content is the same. This means that we are forced to sort the items in the map before serializing them to YAML. * feat: protobuf to yaml serialization * fix: errors that occured while merging * fix: clippy warnings * chore: update comments * feat: add support for PE module output in dumper --------- Co-authored-by: Victor M. Alvarez --- Cargo.lock | 122 +++++++ Cargo.toml | 2 + docs/Module Developer's Guide.md | 21 ++ yara-x-cli/Cargo.toml | 8 +- yara-x-cli/src/commands/dump.rs | 200 +++++++++++ yara-x-cli/src/commands/mod.rs | 2 + yara-x-cli/src/main.rs | 2 + yara-x-proto-yaml/Cargo.toml | 27 ++ yara-x-proto-yaml/build.rs | 13 + yara-x-proto-yaml/src/lib.rs | 325 ++++++++++++++++++ yara-x-proto-yaml/src/tests/mod.rs | 29 ++ yara-x-proto-yaml/src/tests/test.proto | 20 ++ yara-x-proto-yaml/src/tests/testdata/1.in | 28 ++ yara-x-proto-yaml/src/tests/testdata/1.out | 17 + yara-x-proto-yaml/src/yaml.proto | 13 + yara-x-proto/src/yara.proto | 1 + yara-x/src/modules/macho/mod.rs | 44 ++- yara-x/src/modules/macho/tests/mod.rs | 7 + .../macho/tests/testdata/macho_ppc_file.out | 4 +- .../testdata/macho_x86_64_dylib_file.out | 8 +- .../macho/tests/testdata/macho_x86_file.out | 4 +- .../macho/tests/testdata/tiny_universal.out | 8 +- yara-x/src/modules/protos/macho.proto | 32 +- 23 files changed, 893 insertions(+), 44 deletions(-) create mode 100644 yara-x-cli/src/commands/dump.rs create mode 100644 yara-x-proto-yaml/Cargo.toml create mode 100644 yara-x-proto-yaml/build.rs create mode 100644 yara-x-proto-yaml/src/lib.rs create mode 100644 yara-x-proto-yaml/src/tests/mod.rs create mode 100644 yara-x-proto-yaml/src/tests/test.proto create mode 100644 yara-x-proto-yaml/src/tests/testdata/1.in create mode 100644 yara-x-proto-yaml/src/tests/testdata/1.out create mode 100644 yara-x-proto-yaml/src/yaml.proto diff --git a/Cargo.lock b/Cargo.lock index ad6da3b10..4a53a28d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,21 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -453,6 +468,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.48.5", +] + [[package]] name = "ciborium" version = "0.2.1" @@ -508,6 +537,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2275f18819641850fa26c89acc84d465c1bf91ce57bc2748b28c420473352f64" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -522,6 +552,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_derive" +version = "4.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "clap_lex" version = "0.6.0" @@ -534,6 +576,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored_json" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cff32df5cfea75e6484eeff0b4e48ad3977fb6582676a7862b3590dddc7a87" +dependencies = [ + "serde", + "serde_json", + "yansi 0.5.1", +] + [[package]] name = "compact_str" version = "0.7.1" @@ -594,6 +647,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + [[package]] name = "cpp_demangle" version = "0.3.5" @@ -1409,6 +1468,29 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "iana-time-zone" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "id-arena" version = "2.2.1" @@ -2384,6 +2466,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "protobuf-json-mapping" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523039a90666b229b5260fb91c20686ef309b9d1b1fc3cacb283a0895753ec44" +dependencies = [ + "protobuf", + "protobuf-support", + "thiserror", +] + [[package]] name = "protobuf-parse" version = "3.3.0" @@ -2787,6 +2880,7 @@ version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ + "indexmap 2.1.0", "itoa", "ryu", "serde", @@ -3903,6 +3997,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.42.0" @@ -4187,6 +4290,7 @@ dependencies = [ "anyhow", "ascii_tree", "clap", + "colored_json", "crossbeam", "crossterm 0.27.0", "enable-ansi-support", @@ -4196,13 +4300,16 @@ dependencies = [ "log", "pprof", "protobuf", + "protobuf-json-mapping", "serde_json", + "strum_macros", "superconsole", "wild", "yansi 0.5.1", "yara-x", "yara-x-fmt", "yara-x-parser", + "yara-x-proto-yaml", ] [[package]] @@ -4259,6 +4366,21 @@ dependencies = [ "protobuf-parse", ] +[[package]] +name = "yara-x-proto-yaml" +version = "0.1.0" +dependencies = [ + "chrono", + "globwalk", + "goldenfile", + "itertools 0.12.0", + "protobuf", + "protobuf-codegen", + "protobuf-parse", + "protobuf-support", + "yansi 0.5.1", +] + [[package]] name = "yara-x-py" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 9fd4ddd34..1b977c6db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "yara-x-macros", "yara-x-parser", "yara-x-proto", + "yara-x-proto-yaml", "yara-x-py", ] resolver = "2" @@ -83,6 +84,7 @@ yara-x-fmt = { path = "yara-x-fmt" } yara-x-macros = { path = "yara-x-macros" } yara-x-parser = { path = "yara-x-parser" } yara-x-proto = { path = "yara-x-proto" } +yara-x-proto-yaml = { path = "yara-x-proto-yaml" } [profile.release] diff --git a/docs/Module Developer's Guide.md b/docs/Module Developer's Guide.md index 33af9ae42..ec8ccb5f7 100644 --- a/docs/Module Developer's Guide.md +++ b/docs/Module Developer's Guide.md @@ -146,6 +146,27 @@ actually field tags (i.e: a unique number identifying each field in a message). This may be confusing if you are not familiar with protobuf's syntax, so again: explore the protobuf's [documentation](https://developers.google.com/protocol-buffers). +One thing that can be done with integer fields is to represent them in some other way. +This optional representation is shown in `yr dump` crate output. This crate provides +two output formats: JSON and YAML. Both can be shown in colored output via `-c|--color` option. +The last mentioned also provides custom representation for integer numbers. Let's say +for some fields it makes sense to show them as hexadecimal numbers. This can be done by +adding `[(yara.field_options).yaml_fmt = ""];` descriptor to the field. +Currently supported formats are: hexadecimal number and human-readable timestamp. +For example: + +``` +message Macho { + optional uint32 magic = 1 [(yara.field_options).yml_fmt= "x"]; +} +``` + +This will mark magic field as a hexadecimal number and it will be shown as +`magic: 0xfeedfacf` instead of `4277009103`. Other format that is supported right now is +for timestamps. If you want to show some integer field as a timestamp you can do it by +setting `[(yara.field_options).yml_fmt = "t"];` descriptor to the field and +human readable timestamps will be shown in YAML comment after its integer value. + Also notice that we are defining our fields as `optional`. In `proto2` fields must be either `optional` or `required`, while in `proto3` they are always optional and can't be forced to be required. We are going to discuss this topic diff --git a/yara-x-cli/Cargo.toml b/yara-x-cli/Cargo.toml index ca8f35737..29d65d292 100644 --- a/yara-x-cli/Cargo.toml +++ b/yara-x-cli/Cargo.toml @@ -36,21 +36,25 @@ logging = ["dep:log", "dep:env_logger"] [dependencies] ascii_tree = { workspace = true } anyhow = { workspace = true } -clap = { workspace = true, features=["cargo"] } +clap = { workspace = true, features=["cargo", "derive"] } globwalk = { workspace = true } enable-ansi-support = { workspace = true } env_logger = { workspace = true , optional = true } log = { workspace = true, optional = true } protobuf = { workspace = true } -serde_json = { workspace = true } +protobuf-json-mapping = "3.3.0" +serde_json = { workspace = true, features = ["preserve_order"] } yansi = { workspace = true } yara-x = { workspace = true } yara-x-parser = { workspace = true, features = ["ascii-tree"] } +yara-x-proto-yaml = { workspace = true } yara-x-fmt = { workspace = true } +colored_json = "4.0.0" crossbeam = "0.8.2" crossterm = "0.27.0" indent = "0.1.1" pprof = { version = "0.13.0", features = ["flamegraph"], optional=true } +strum_macros = "0.25" superconsole = "0.2.0" wild = "2.1.0" diff --git a/yara-x-cli/src/commands/dump.rs b/yara-x-cli/src/commands/dump.rs new file mode 100644 index 000000000..ba361d338 --- /dev/null +++ b/yara-x-cli/src/commands/dump.rs @@ -0,0 +1,200 @@ +use anyhow::Error; +use clap::{ + arg, value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum, +}; + +use colored_json::ToColoredJson; +use protobuf::MessageDyn; +use protobuf_json_mapping::print_to_string; +use std::fs::File; +use std::io::{stdin, stdout, Read}; +use std::path::PathBuf; +use strum_macros::Display; +use yansi::{Color::Cyan, Paint}; + +use yara_x_proto_yaml::Serializer; + +#[derive(Debug, Clone, ValueEnum, Display)] +enum SupportedModules { + Lnk, + Macho, + Elf, + Pe, +} + +#[derive(Debug, Clone, ValueEnum)] +enum OutputFormats { + Json, + Yaml, +} + +/// Creates the `dump` command. +/// The `dump` command dumps information about binary files. +/// +/// # Returns +/// +/// Returns a `Command` struct that represents the `dump` command. +pub fn dump() -> Command { + super::command("dump") + .about("Dump information about binary files") + .arg( + arg!() + .help("Path to binary file") + .value_parser(value_parser!(PathBuf)) + .required(false), + ) + .arg( + arg!(-o --"output-format" ) + .help("Desired output format") + .value_parser(value_parser!(OutputFormats)) + .required(false), + ) + .arg( + arg!(-c - -"color") + .help("Use colorful output") + ) + .arg( + Arg::new("modules") + .long("modules") + .short('m') + .help("Name of the module or comma-separated list of modules to be used for parsing") + .required(false) + .action(ArgAction::Append) + .value_parser(value_parser!(SupportedModules)), + ) +} + +// Obtains information about a module by calling dumper crate. +// +// # Arguments +// +// * `output_format`: The output format. +// * `module`: The module name. +// * `output`: The output protobuf structure to be dumped. +// +// # Returns +// +// Returns a `Result<(), Error>` indicating whether the operation was +// successful or not. +fn obtain_module_info( + output_format: Option<&OutputFormats>, + module: &SupportedModules, + output: &dyn MessageDyn, +) -> Result<(), Error> { + match output_format { + Some(OutputFormats::Json) => { + println!("{}", Cyan.paint(module).bold()); + println!(">>>"); + println!("{}", print_to_string(output)?.to_colored_json_auto()?); + println!("<<<"); + } + Some(OutputFormats::Yaml) | None => { + println!("{}", Cyan.paint(module).bold()); + println!(">>>"); + let mut serializer = Serializer::new(stdout()); + serializer.serialize(output).expect("Failed to serialize"); + println!("\n<<<"); + } + } + Ok(()) +} + +/// Executes the `dump` command. +/// +/// # Arguments +/// +/// * `args`: The arguments passed to the `dump` command. +/// +/// # Returns +/// +/// Returns a `Result<(), anyhow::Error>` indicating whether the operation was +/// successful or not. +pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> { + let mut buffer = Vec::new(); + + let file = args.get_one::("FILE"); + let output_format = args.get_one::("output-format"); + let modules = args.get_many::("modules"); + let colors_flag = args.get_flag("color"); + + // Disable colors if the flag is not set. + if !colors_flag { + Paint::disable(); + } + + // Get the input. + if let Some(file) = file { + File::open(file.as_path())?.read_to_end(&mut buffer)? + } else { + stdin().read_to_end(&mut buffer)? + }; + + if let Some(modules) = modules { + for module in modules { + if let Some(output) = match module { + SupportedModules::Lnk => { + yara_x::mods::invoke_mod_dyn::(&buffer) + } + SupportedModules::Macho => yara_x::mods::invoke_mod_dyn::< + yara_x::mods::Macho, + >(&buffer), + SupportedModules::Elf => { + yara_x::mods::invoke_mod_dyn::(&buffer) + } + SupportedModules::Pe => { + yara_x::mods::invoke_mod_dyn::(&buffer) + } + } { + obtain_module_info(output_format, module, &*output)?; + } + } + } else { + // Module was not specified therefore we have to obtain ouput for every supported module and decide which is valid. + if let Some(lnk_output) = + yara_x::mods::invoke_mod::(&buffer) + { + if lnk_output.is_lnk() { + obtain_module_info( + output_format, + &SupportedModules::Lnk, + &*lnk_output, + )?; + } + } + if let Some(macho_output) = + yara_x::mods::invoke_mod::(&buffer) + { + if macho_output.has_magic() { + obtain_module_info( + output_format, + &SupportedModules::Macho, + &*macho_output, + )?; + } + } + if let Some(elf_output) = + yara_x::mods::invoke_mod::(&buffer) + { + if elf_output.has_type() { + obtain_module_info( + output_format, + &SupportedModules::Elf, + &*elf_output, + )?; + } + } + if let Some(pe_output) = + yara_x::mods::invoke_mod::(&buffer) + { + if pe_output.is_pe() { + obtain_module_info( + output_format, + &SupportedModules::Pe, + &*pe_output, + )?; + } + } + } + + Ok(()) +} diff --git a/yara-x-cli/src/commands/mod.rs b/yara-x-cli/src/commands/mod.rs index f6370db38..269345bd7 100644 --- a/yara-x-cli/src/commands/mod.rs +++ b/yara-x-cli/src/commands/mod.rs @@ -1,12 +1,14 @@ mod check; mod compile; mod debug; +mod dump; mod fmt; mod scan; pub use check::*; pub use compile::*; pub use debug::*; +pub use dump::*; pub use fmt::*; pub use scan::*; diff --git a/yara-x-cli/src/main.rs b/yara-x-cli/src/main.rs index eb12f0f59..c5bf0ba1a 100644 --- a/yara-x-cli/src/main.rs +++ b/yara-x-cli/src/main.rs @@ -42,6 +42,7 @@ fn main() -> anyhow::Result<()> { commands::compile(), commands::check(), commands::debug(), + commands::dump(), commands::fmt(), ]) .get_matches_from(wild::args()); @@ -59,6 +60,7 @@ fn main() -> anyhow::Result<()> { Some(("check", args)) => commands::exec_check(args), Some(("fmt", args)) => commands::exec_fmt(args), Some(("scan", args)) => commands::exec_scan(args), + Some(("dump", args)) => commands::exec_dump(args), Some(("compile", args)) => commands::exec_compile(args), _ => unreachable!(), }; diff --git a/yara-x-proto-yaml/Cargo.toml b/yara-x-proto-yaml/Cargo.toml new file mode 100644 index 000000000..3bfb51616 --- /dev/null +++ b/yara-x-proto-yaml/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "yara-x-proto-yaml" +version.workspace = true +description.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +readme.workspace = true +license.workspace = true +rust-version.workspace = true + +[dependencies] +protobuf = { workspace = true } +itertools = { workspace = true } +yansi = { workspace = true } + +protobuf-support = "3.3.0" +chrono = "0.4.0" + +[dev-dependencies] +goldenfile = "1.5.2" +globwalk = { workspace = true } + +[build-dependencies] +protobuf = { workspace = true } +protobuf-codegen = { workspace = true } +protobuf-parse = { workspace = true } diff --git a/yara-x-proto-yaml/build.rs b/yara-x-proto-yaml/build.rs new file mode 100644 index 000000000..8d04a4c3a --- /dev/null +++ b/yara-x-proto-yaml/build.rs @@ -0,0 +1,13 @@ +use protobuf_codegen::Codegen; + +fn main() { + println!("cargo:rerun-if-changed=src"); + Codegen::new() + .pure() + .cargo_out_dir("protos") + .include("src/tests") + .include("src") + .input("src/tests/test.proto") + .input("src/yaml.proto") + .run_from_script(); +} diff --git a/yara-x-proto-yaml/src/lib.rs b/yara-x-proto-yaml/src/lib.rs new file mode 100644 index 000000000..1f2f6348e --- /dev/null +++ b/yara-x-proto-yaml/src/lib.rs @@ -0,0 +1,325 @@ +use chrono::prelude::{DateTime, NaiveDateTime, Utc}; +use itertools::Itertools; +use protobuf::MessageDyn; +use protobuf_support::text_format::escape_bytes_to; +use std::cmp::Ordering; +use std::io::{Error, Write}; +use yansi::Color; +use yansi::Paint; + +use protobuf::descriptor::FieldDescriptorProto; +use protobuf::reflect::ReflectFieldRef::{Map, Optional, Repeated}; +use protobuf::reflect::ReflectValueRef; +use protobuf::reflect::{FieldDescriptor, MessageRef}; + +use crate::yaml::exts::field_options; + +#[cfg(test)] +mod tests; + +include!(concat!(env!("OUT_DIR"), "/protos/mod.rs")); + +const INDENTATION: u16 = 4; + +// A struct that represents colors for output +struct ColorsConfig; + +impl ColorsConfig { + const STRING: Color = Color::Green; + const FIELD_NAME: Color = Color::Blue; + const REPEATED_NAME: Color = Color::Yellow; + const COMMENT: Color = Color::RGB(222, 184, 135); // Brown +} + +// A struct that represents options for a field values +#[derive(Debug, Default, Clone)] +struct ValueOptions { + is_hex: bool, + is_timestamp: bool, +} + +/// Serializes a protobuf to YAML format. +/// +/// Takes a protobuf message and produces a YAML representation of it. The +/// produced YAML intends to be as human-friendly as possible, by including +/// comments that clarify the meaning of certain values, like timestamps. +pub struct Serializer { + indent: u16, + output: W, +} + +impl Serializer { + /// Creates a new YAML serializer that writes its output to `w`. + pub fn new(w: W) -> Self { + Self { output: w, indent: 0 } + } + + /// Serializes the given protobuf message. + pub fn serialize(&mut self, msg: &dyn MessageDyn) -> Result<(), Error> { + self.write_msg(&MessageRef::new(msg)) + } +} + +impl Serializer { + fn get_value_options( + &mut self, + field_descriptor: &FieldDescriptorProto, + ) -> ValueOptions { + field_options + .get(&field_descriptor.options) + .map(|options| ValueOptions { + // Default for boolean is false + is_hex: options.yaml_fmt() == "x", + is_timestamp: options.yaml_fmt() == "t", + }) + .unwrap_or_default() + } + + fn print_integer_value_with_options + ToString + Copy>( + &mut self, + value: T, + value_options: &ValueOptions, + ) -> Result<(), std::io::Error> { + let field_value = if value_options.is_hex { + format!("0x{:x}", value.into()) + } else if value_options.is_timestamp { + let timestamp = DateTime::::from_naive_utc_and_offset( + NaiveDateTime::from_timestamp_opt(value.into(), 0).unwrap(), + Utc, + ); + format!( + "{} {}", + value.to_string(), + self.write_as_a_comment(timestamp.to_string()) + ) + } else { + value.to_string() + }; + write!(self.output, "{}", field_value) + } + + fn quote_bytes(&mut self, bytes: &[u8]) -> String { + let mut result = String::new(); + result.push('"'); + escape_bytes_to(bytes, &mut result); + result.push('"'); + result + } + + fn write_as_a_comment(&mut self, value: String) -> Paint { + ColorsConfig::COMMENT.paint(format!("{} {}", "#", value)) + } + + fn write_field_name(&mut self, name: &str) -> Result<(), Error> { + write!(self.output, "{}:", ColorsConfig::FIELD_NAME.paint(name).bold()) + } + + fn write_repeated_name(&mut self, name: &str) -> Result<(), Error> { + write!( + self.output, + "{}:", + ColorsConfig::REPEATED_NAME.paint(name).bold() + ) + } + + fn write_msg(&mut self, msg: &MessageRef) -> Result<(), Error> { + let descriptor = msg.descriptor_dyn(); + + // Iterator that returns only the non-empty fields in the message. + let mut non_empty_fields = descriptor + .fields() + .filter(|field| match field.get_reflect(&**msg) { + Optional(optional) => optional.value().is_some(), + Repeated(repeated) => !repeated.is_empty(), + Map(map) => !map.is_empty(), + }) + .peekable(); + + while let Some(field) = non_empty_fields.next() { + match field.get_reflect(&**msg) { + Optional(optional) => { + let value = optional.value().unwrap(); + self.write_field_name(field.name())?; + self.indent += INDENTATION; + self.write_name_value_separator(&value)?; + self.write_value(&field, &value)?; + self.indent -= INDENTATION; + } + Repeated(repeated) => { + self.write_repeated_name(field.name())?; + self.newline()?; + let mut items = repeated.into_iter().peekable(); + while let Some(value) = items.next() { + write!( + self.output, + "{}{} ", + " ".repeat((INDENTATION - 2) as usize), + ColorsConfig::REPEATED_NAME.paint("-").bold() + )?; + self.indent += INDENTATION; + self.write_value(&field, &value)?; + self.indent -= INDENTATION; + if items.peek().is_some() { + self.newline()?; + } + } + } + Map(map) => { + self.write_field_name(field.name())?; + self.indent += INDENTATION; + self.newline()?; + + // Iteration order is not stable (i.e: the order in which + // items are returned can vary from one execution to the + // other), because the underlying data structure is a + // HashMap. For this reason items are wrapped in a KV + // struct (which implement the Ord trait) and sorted. + // Key-value pairs are sorted by key. + let mut items = map + .into_iter() + .map(|(key, value)| KV { key, value }) + .sorted() + .peekable(); + + while let Some(item) = items.next() { + // We have to escape possible \n in key as it is interpreted as string + // it is covered in tests + let escaped_key = + self.quote_bytes(item.key.to_string().as_bytes()); + self.write_field_name(escaped_key.as_str())?; + self.indent += INDENTATION; + self.write_name_value_separator(&item.value)?; + self.write_value(&field, &item.value)?; + self.indent -= INDENTATION; + if items.peek().is_some() { + self.newline()?; + } + } + self.indent -= INDENTATION; + } + } + + if non_empty_fields.peek().is_some() { + self.newline()?; + } + } + + Ok(()) + } + + fn write_value( + &mut self, + field_descriptor: &FieldDescriptor, + value: &ReflectValueRef, + ) -> Result<(), Error> { + let value_options = self.get_value_options(field_descriptor.proto()); + match value { + ReflectValueRef::U32(v) => { + self.print_integer_value_with_options(*v, &value_options)? + } + ReflectValueRef::U64(v) => self + .print_integer_value_with_options(*v as i64, &value_options)?, + ReflectValueRef::I32(v) => { + self.print_integer_value_with_options(*v, &value_options)? + } + ReflectValueRef::I64(v) => { + self.print_integer_value_with_options(*v, &value_options)? + } + ReflectValueRef::F32(v) => write!(self.output, "{:.1}", v)?, + ReflectValueRef::F64(v) => write!(self.output, "{:.1}", v)?, + ReflectValueRef::Bool(v) => write!(self.output, "{}", v)?, + ReflectValueRef::String(v) => { + let quoted_string = self.quote_bytes(v.as_bytes()); + write!( + self.output, + "{}", + ColorsConfig::STRING.paint("ed_string) + )?; + } + ReflectValueRef::Bytes(v) => { + let quoted_string = self.quote_bytes(v); + write!( + self.output, + "{}", + ColorsConfig::STRING.paint("ed_string) + )?; + } + ReflectValueRef::Enum(d, v) => match d.value_by_number(*v) { + Some(e) => write!(self.output, "{}", e.name())?, + None => write!(self.output, "{}", v)?, + }, + ReflectValueRef::Message(msg) => self.write_msg(msg)?, + } + Ok(()) + } + + fn newline(&mut self) -> Result<(), Error> { + writeln!(self.output)?; + for _ in 0..self.indent { + write!(self.output, " ")?; + } + Ok(()) + } + + fn write_name_value_separator( + &mut self, + value: &ReflectValueRef, + ) -> Result<(), Error> { + if let ReflectValueRef::Message(_) = value { + self.newline()? + } else { + write!(self.output, " ")? + } + Ok(()) + } +} + +/// Helper type that allows to sort the entries in protobuf map. +struct KV<'a> { + key: ReflectValueRef<'a>, + value: ReflectValueRef<'a>, +} + +impl PartialOrd for KV<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for KV<'_> { + fn cmp(&self, other: &Self) -> Ordering { + match self.key { + ReflectValueRef::U32(v) => { + v.cmp(&other.key.to_u32().unwrap_or_default()) + } + ReflectValueRef::U64(v) => { + v.cmp(&other.key.to_u64().unwrap_or_default()) + } + ReflectValueRef::I32(v) => { + v.cmp(&other.key.to_i32().unwrap_or_default()) + } + ReflectValueRef::I64(v) => { + v.cmp(&other.key.to_i64().unwrap_or_default()) + } + ReflectValueRef::Bool(v) => { + v.cmp(&other.key.to_bool().unwrap_or_default()) + } + ReflectValueRef::String(v) => { + v.cmp(other.key.to_str().unwrap_or_default()) + } + _ => { + // Protobuf doesn't support map keys of any other type + // except the ones listed above. + panic!("unsupported type in map key") + } + } + } +} + +impl PartialEq for KV<'_> { + fn eq(&self, other: &Self) -> bool { + self.key.to_str().eq(&other.key.to_str()) + } +} + +impl Eq for KV<'_> {} diff --git a/yara-x-proto-yaml/src/tests/mod.rs b/yara-x-proto-yaml/src/tests/mod.rs new file mode 100644 index 000000000..c0345aee6 --- /dev/null +++ b/yara-x-proto-yaml/src/tests/mod.rs @@ -0,0 +1,29 @@ +use protobuf::text_format::parse_from_str; +use std::fs; +use yansi::Paint; + +use crate::Serializer; + +#[test] +fn yaml_serializer() { + // Disable colors for testing. + Paint::disable(); + + // Create goldenfile mint. + let mut mint = goldenfile::Mint::new("."); + + for entry in globwalk::glob("src/tests/testdata/*.in").unwrap().flatten() { + // Path to the .in file. + let in_path = entry.into_path(); + // Path to the .out file. + let out_path = in_path.with_extension("out"); + + let input = fs::read_to_string(in_path).expect("Unable to read"); + let test_pb = parse_from_str::(&input).unwrap(); + + let output_file = mint.new_goldenfile(out_path).unwrap(); + let mut serializer = Serializer::new(output_file); + + serializer.serialize(&test_pb).expect("Unable to serialize"); + } +} diff --git a/yara-x-proto-yaml/src/tests/test.proto b/yara-x-proto-yaml/src/tests/test.proto new file mode 100644 index 000000000..895ba2868 --- /dev/null +++ b/yara-x-proto-yaml/src/tests/test.proto @@ -0,0 +1,20 @@ +syntax = "proto2"; + +import "yaml.proto"; + +package test; + +message SubMessage { + optional int32 int32_dec = 1; + optional string str = 2; + map map_string_string = 3; +} + +message Message { + optional int32 int32_hex = 1 [(yaml.field_options).yaml_fmt = "x"]; + optional int64 timestamp = 2 [(yaml.field_options).yaml_fmt = "t"]; + optional int32 int32_dec = 3; + optional string str = 4; + repeated SubMessage repeated_msg = 5; + optional SubMessage nested_msg = 6; +} diff --git a/yara-x-proto-yaml/src/tests/testdata/1.in b/yara-x-proto-yaml/src/tests/testdata/1.in new file mode 100644 index 000000000..b14c56417 --- /dev/null +++ b/yara-x-proto-yaml/src/tests/testdata/1.in @@ -0,0 +1,28 @@ +int32_hex: 123 +timestamp: 999999999 +int32_dec: 123 +str: "foo" +repeated_msg { + int32_dec: 456 + str: "bar\nbar" +} +repeated_msg { + int32_dec: 789 + str: "baz" + map_string_string { + key: "foo\nbar" + value: "bar" + } +} +nested_msg { + int32_dec: 1234 + str: "qux\nfoo" + map_string_string { + key: "foo" + value: "bar" + } + map_string_string { + key: "bar" + value: "baz" + } +} \ No newline at end of file diff --git a/yara-x-proto-yaml/src/tests/testdata/1.out b/yara-x-proto-yaml/src/tests/testdata/1.out new file mode 100644 index 000000000..a29655e04 --- /dev/null +++ b/yara-x-proto-yaml/src/tests/testdata/1.out @@ -0,0 +1,17 @@ +int32_hex: 0x7b +timestamp: 999999999 # 2001-09-09 01:46:39 UTC +int32_dec: 123 +str: "foo" +repeated_msg: + - int32_dec: 456 + str: "bar\nbar" + - int32_dec: 789 + str: "baz" + map_string_string: + "foo\nbar": "bar" +nested_msg: + int32_dec: 1234 + str: "qux\nfoo" + map_string_string: + "bar": "baz" + "foo": "bar" \ No newline at end of file diff --git a/yara-x-proto-yaml/src/yaml.proto b/yara-x-proto-yaml/src/yaml.proto new file mode 100644 index 000000000..575e76de3 --- /dev/null +++ b/yara-x-proto-yaml/src/yaml.proto @@ -0,0 +1,13 @@ +syntax = "proto2"; + +package yaml; + +import "google/protobuf/descriptor.proto"; + +message FieldOptions { + optional string yaml_fmt = 3; + } + +extend google.protobuf.FieldOptions { + optional FieldOptions field_options = 51504; + } diff --git a/yara-x-proto/src/yara.proto b/yara-x-proto/src/yara.proto index f3620b896..475e44eb2 100644 --- a/yara-x-proto/src/yara.proto +++ b/yara-x-proto/src/yara.proto @@ -16,6 +16,7 @@ message ModuleOptions { message FieldOptions { optional string name = 1; optional bool ignore = 2; + optional string yaml_fmt = 3; } message MessageOptions { diff --git a/yara-x/src/modules/macho/mod.rs b/yara-x/src/modules/macho/mod.rs index 7b654aa9c..702f56334 100644 --- a/yara-x/src/modules/macho/mod.rs +++ b/yara-x/src/modules/macho/mod.rs @@ -540,6 +540,24 @@ fn should_swap_bytes(magic: u32) -> bool { matches!(magic, MH_CIGAM | MH_CIGAM_64 | FAT_CIGAM | FAT_CIGAM_64) } +/// Convert a decimal number representation to a version string representation. +/// The decimal number is expected to be in the format +/// `major(rest of digits).minor(previous 2 digits).patch(last 2 digits)`. +/// +/// # Arguments +/// +/// * `decimal_number`: The decimal number to convert. +/// +/// # Returns +/// +/// A string representation of the version number. +fn convert_to_version_string(decimal_number: u32) -> String { + let major = decimal_number >> 16; + let minor = (decimal_number >> 8) & 0xFF; + let patch = decimal_number & 0xFF; + format!("{}.{}.{}", major, minor, patch) +} + /// Convert a Mach-O Relative Virtual Address (RVA) to an offset within the /// file. /// @@ -1605,8 +1623,12 @@ fn handle_dylib_command( .to_string(), ), timestamp: Some(dy.dylib.timestamp), - compatibility_version: Some(dy.dylib.compatibility_version), - current_version: Some(dy.dylib.current_version), + compatibility_version: Some(convert_to_version_string( + dy.dylib.compatibility_version, + )), + current_version: Some(convert_to_version_string( + dy.dylib.current_version, + )), ..Default::default() }; macho_file.dylibs.push(dylib); @@ -1726,8 +1748,7 @@ fn handle_segment_command( segname: Some( std::str::from_utf8(&sg.segname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), vmaddr: Some(sg.vmaddr as u64), vmsize: Some(sg.vmsize as u64), @@ -1759,14 +1780,12 @@ fn handle_segment_command( segname: Some( std::str::from_utf8(&sec.segname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), sectname: Some( std::str::from_utf8(&sec.sectname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), addr: Some(sec.addr as u64), size: Some(sec.size as u64), @@ -1846,8 +1865,7 @@ fn handle_segment_command_64( segname: Some( std::str::from_utf8(&sg.segname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), vmaddr: Some(sg.vmaddr), vmsize: Some(sg.vmsize), @@ -1879,14 +1897,12 @@ fn handle_segment_command_64( segname: Some( std::str::from_utf8(&sec.segname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), sectname: Some( std::str::from_utf8(&sec.sectname) .unwrap_or_default() - .trim_end_matches('\0') - .to_string(), + .replace('\0', ""), ), addr: Some(sec.addr), size: Some(sec.size), diff --git a/yara-x/src/modules/macho/tests/mod.rs b/yara-x/src/modules/macho/tests/mod.rs index 487abf4e5..027414ec1 100644 --- a/yara-x/src/modules/macho/tests/mod.rs +++ b/yara-x/src/modules/macho/tests/mod.rs @@ -144,6 +144,13 @@ fn test_should_swap_bytes() { assert_eq!(should_swap_bytes(FAT_MAGIC_64), false); } +#[test] +fn test_convert_to_version_string() { + assert_eq!(convert_to_version_string(65536), "1.0.0"); + assert_eq!(convert_to_version_string(102895360), "1570.15.0"); + assert_eq!(convert_to_version_string(0), "0.0.0"); +} + #[test] fn test_rva_to_offset() { let macho = create_test_macho_file(); diff --git a/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out b/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out index 1798e72ce..336da472f 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_ppc_file.out @@ -230,7 +230,7 @@ segments { dylibs { name: "/usr/lib/libSystem.B.dylib" timestamp: 1111112572 - compatibility_version: 65536 - current_version: 4653313 + compatibility_version: "1.0.0" + current_version: "71.1.1" } entry_point: 3768 diff --git a/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out b/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out index e7d9595c8..c5c88a074 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_x86_64_dylib_file.out @@ -78,12 +78,12 @@ segments { dylibs { name: "fact_x86_64.dylib" timestamp: 1 - compatibility_version: 0 - current_version: 0 + compatibility_version: "0.0.0" + current_version: "0.0.0" } dylibs { name: "/usr/lib/libSystem.B.dylib" timestamp: 2 - compatibility_version: 65536 - current_version: 79495168 + compatibility_version: "1.0.0" + current_version: "1213.0.0" } diff --git a/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out b/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out index 3e9cef6ac..acd5b6d6e 100644 --- a/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out +++ b/yara-x/src/modules/macho/tests/testdata/macho_x86_file.out @@ -152,8 +152,8 @@ segments { dylibs { name: "/usr/lib/libSystem.B.dylib" timestamp: 2 - compatibility_version: 65536 - current_version: 79495168 + compatibility_version: "1.0.0" + current_version: "1213.0.0" } rpaths { cmd: 2147483676 diff --git a/yara-x/src/modules/macho/tests/testdata/tiny_universal.out b/yara-x/src/modules/macho/tests/testdata/tiny_universal.out index 34befd9fc..fab280f7b 100644 --- a/yara-x/src/modules/macho/tests/testdata/tiny_universal.out +++ b/yara-x/src/modules/macho/tests/testdata/tiny_universal.out @@ -169,8 +169,8 @@ file { dylibs { name: "/usr/lib/libSystem.B.dylib" timestamp: 2 - compatibility_version: 65536 - current_version: 79495168 + compatibility_version: "1.0.0" + current_version: "1213.0.0" } entry_point: 3808 stack_size: 0 @@ -352,8 +352,8 @@ file { dylibs { name: "/usr/lib/libSystem.B.dylib" timestamp: 2 - compatibility_version: 65536 - current_version: 79495168 + compatibility_version: "1.0.0" + current_version: "1213.0.0" } entry_point: 3808 stack_size: 0 diff --git a/yara-x/src/modules/protos/macho.proto b/yara-x/src/modules/protos/macho.proto index 6fe995ad0..7b44ae6b3 100644 --- a/yara-x/src/modules/protos/macho.proto +++ b/yara-x/src/modules/protos/macho.proto @@ -11,9 +11,9 @@ option (yara.module_options) = { message Dylib { optional string name = 1; - optional uint32 timestamp = 2; - optional uint32 compatibility_version = 3; - optional uint32 current_version = 4; + optional uint32 timestamp = 2 [(yara.field_options).yaml_fmt = "t"]; + optional string compatibility_version = 3; + optional string current_version = 4; } message RPath { @@ -25,13 +25,13 @@ message RPath { message Section { optional string segname = 1; optional string sectname = 2; - optional uint64 addr = 3; - optional uint64 size = 4; + optional uint64 addr = 3 [(yara.field_options).yaml_fmt = "x"]; + optional uint64 size = 4 [(yara.field_options).yaml_fmt = "x"]; optional uint32 offset = 5; optional uint32 align = 6; optional uint32 reloff = 7; optional uint32 nreloc = 8; - optional uint32 flags = 9; + optional uint32 flags = 9 [(yara.field_options).yaml_fmt = "x"]; optional uint32 reserved1 = 10; optional uint32 reserved2 = 11; optional uint32 reserved3 = 12; @@ -41,14 +41,14 @@ message Segment { optional uint32 cmd = 1; optional uint32 cmdsize = 2; optional string segname = 3; - optional uint64 vmaddr = 4; - optional uint64 vmsize = 5; + optional uint64 vmaddr = 4 [(yara.field_options).yaml_fmt = "x"]; + optional uint64 vmsize = 5 [(yara.field_options).yaml_fmt = "x"]; optional uint64 fileoff = 6; optional uint64 filesize = 7; - optional uint32 maxprot = 8; - optional uint32 initprot = 9; + optional uint32 maxprot = 8 [(yara.field_options).yaml_fmt = "x"]; + optional uint32 initprot = 9 [(yara.field_options).yaml_fmt = "x"]; optional uint32 nsects = 10; - optional uint32 flags = 11; + optional uint32 flags = 11 [(yara.field_options).yaml_fmt = "x"]; repeated Section sections = 12; } @@ -62,13 +62,13 @@ message FatArch { } message File { - optional uint32 magic = 1; + optional uint32 magic = 1 [(yara.field_options).yaml_fmt = "x"]; optional uint32 cputype = 2; optional uint32 cpusubtype = 3; optional uint32 filetype = 4; optional uint32 ncmds = 5; optional uint32 sizeofcmds = 6; - optional uint32 flags = 7; + optional uint32 flags = 7 [(yara.field_options).yaml_fmt = "x"]; optional uint32 reserved = 8; optional uint64 number_of_segments = 9; repeated Segment segments = 10; @@ -80,13 +80,13 @@ message File { message Macho { // Set Mach-O header and basic fields - optional uint32 magic = 1; + optional uint32 magic = 1 [(yara.field_options).yaml_fmt = "x"]; optional uint32 cputype = 2; optional uint32 cpusubtype = 3; optional uint32 filetype = 4; optional uint32 ncmds = 5; optional uint32 sizeofcmds = 6; - optional uint32 flags = 7; + optional uint32 flags = 7 [(yara.field_options).yaml_fmt = "x"]; optional uint32 reserved = 8; optional uint64 number_of_segments = 9; repeated Segment segments = 10; @@ -96,7 +96,7 @@ message Macho { optional uint64 stack_size = 14; // Add fields for Mach-O fat binary header - optional uint32 fat_magic = 15; + optional uint32 fat_magic = 15 [(yara.field_options).yaml_fmt = "x"]; optional uint32 nfat_arch = 16; repeated FatArch fat_arch = 17;