Skip to content

Commit

Permalink
Merge branch 'VirusTotal:main' into lc-uuid-macho
Browse files Browse the repository at this point in the history
  • Loading branch information
latonis authored Dec 31, 2023
2 parents a5f2783 + f294f0c commit 289bc31
Show file tree
Hide file tree
Showing 48 changed files with 9,153 additions and 755 deletions.
457 changes: 250 additions & 207 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ criterion = "0.5.1"
enable-ansi-support = "0.2.1"
env_logger = "0.10.0"
fmmap = "0.3.2"
globwalk = "0.8.1"
globwalk = "0.9.0"
indenter = "0.3.3"
indexmap = "2.1.0"
intaglio = "1.9.1"
Expand Down
36 changes: 19 additions & 17 deletions docs/Module Developer's Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ fn get_line(ctx: &mut ScanContext, n: i64) -> Option<RuntimeString> {
let cursor = io::Cursor::new(ctx.scanned_data());

if let Some(Ok(line)) = cursor.lines().nth(n as usize) {
Some(RuntimeString::from_bytes(ctx, line))
Some(RuntimeString::from_slice(ctx, line))
} else {
None
}
Expand Down Expand Up @@ -586,7 +586,7 @@ use the `RuntimeString` type. This type is an enum with three variants:

* `RuntimeString::Literal`
* `RuntimeString::ScannedDataSlice`
* `RuntimeString::Owned`
* `RuntimeString::Rc`

`RuntimeString::Literal` is used when the string is a literal in the YARA rule.
For example, if your rule uses the expression `my_module.my_func("foo")`, `"foo"`
Expand All @@ -601,9 +601,10 @@ is part of the scanned data, without having to make a copy of it. Internally,
this variant simply contains the offset within the data where the string starts
and its length, so it's a very similar to Rust slices.

`RuntimeString::Owned` is a string owned by the function. This is the variant
used when the string you are returning from your function is not part of the
scanned data, and therefore needs to reside in its own memory.
`RuntimeString::Rc` is a reference-counted string that is released when all
references are dropped. This is the variant used when the string you are
returning from your function is not part of the scanned data, and therefore
needs to reside in its own memory.

Regardless of the variant, `RuntimeString` has a `as_bstr` method that allows
you to obtain a reference to the actual string. This method receives a `&ScanContext`
Expand All @@ -613,20 +614,21 @@ require that the string must be a valid UTF-8, as `&str` does. Aside from that,
more information in the documentation for the [bstr](https://docs.rs/bstr/latest/bstr/)
crate.

For creating an instance of `RuntimeString` you must use the associated function
`RuntimeString::from_bytes`. This function accepts any type implementing the
trait `AsRef<[u8]>`, so you can pass either a `&str`, `&[u8]` or `String` to it.
The `from_bytes` function is smart enough to figure out which variant of
`RuntimeString` is the most appropriate, depending on what you passed to it. If
a slice (e.g: `&str`, `&[u8]`) that lies within the boundaries of the scanned
data, it will return the `RuntimeString::ScannedDataSlice` variant. In all other
cases it will return the`RuntimeString::Owned` variant.
For creating an instance of `RuntimeString` you must either use `RuntimeString::new`
or `RuntimeString::from_slice`. `RuntimeString::new` creates the runtime string
by taking ownership of a `String`, `Vec<u8>`, or any type that implements
`Into<Vec<u8>`.

In the other hand, `RuntimeString::from_slice` receives a `&[u8]`
and creates the runtime string by making a copy of the slice, except if the
slice lies within the boundaries of the scanned data, in which case the returned
variant is `RuntimeString::ScannedDataSlice`.

```rust
/// A function that always returns the string "foo".
#[module_export]
fn foo(ctx: &mut ScanContext) -> RuntimeString {
RuntimeString::from_bytes(ctx, "foo")
RuntimeString::from_slice("foo".as_bytes())
}
```

Expand All @@ -638,8 +640,8 @@ fn uppercase(ctx: &mut ScanContext, s: RuntimeString) -> RuntimeString {
let s = s.as_bstr(ctx);
// &BStr has the same methods than &str, including to_uppercase.
let s = s.to_uppercase();
// Returns RuntimeString::Owned with the new string.
RuntimeString::from_bytes(ctx, s)
// Returns RuntimeString::Rc with the new string.
RuntimeString::new(s)
}
```

Expand All @@ -654,7 +656,7 @@ fn head(ctx: &mut ScanContext, n: i64) -> Option<RuntimeString> {
let head = ctx.scanned_data().get(0..n as usize)?;
// Returns RuntimeString::ScannedDataSlice, as the `head` slice is contained
// within the scanned data.
Some(RuntimeString::from_bytes(ctx, head))
Some(RuntimeString::from_slice(ctx, head))
}
```

Expand Down
208 changes: 67 additions & 141 deletions yara-x-cli/src/commands/dump.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
use anyhow::Error;
use clap::{
arg, value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum,
};

use colored_json::ToColoredJson;
use crossterm::tty::IsTty;
use protobuf::MessageDyn;
use protobuf::MessageField;
use protobuf_json_mapping::print_to_string;
use std::fs::File;
use std::io;
use std::io::{stdin, stdout, Read};
use std::path::PathBuf;
use strum_macros::Display;
use yansi::Color::Cyan;

use crate::help;
use yara_x::mods::*;
use yara_x_proto_yaml::Serializer;

#[derive(Debug, Clone, ValueEnum, Display)]
#[derive(Debug, Clone, ValueEnum, Display, PartialEq)]
enum SupportedModules {
Lnk,
Macho,
Expand All @@ -39,7 +38,8 @@ enum OutputFormats {
/// Returns a `Command` struct that represents the `dump` command.
pub fn dump() -> Command {
super::command("dump")
.about("Dump information about binary files")
.about("Show the data produced by YARA modules for a file")
.long_about(help::DUMP_LONG_HELP)
.arg(
arg!(<FILE>)
.help("Path to binary file")
Expand All @@ -52,60 +52,18 @@ pub fn dump() -> Command {
.value_parser(value_parser!(OutputFormats))
.required(false),
)
.arg(arg!(--"no-colors").help("Turn off colors in YAML output"))
.arg(
arg!(--"no-colors")
.help("Turn off colors in YAML output")
)
.arg(
Arg::new("modules")
.long("modules")
.short('m')
.help("Name of the module or comma-separated list of modules to be used for parsing")
.required(false)
.action(ArgAction::Append)
.value_parser(value_parser!(SupportedModules)),
Arg::new("module")
.long("module")
.short('m')
.help("Module name")
.required(false)
.action(ArgAction::Append)
.value_parser(value_parser!(SupportedModules)),
)
}

// Obtains information about a module by calling dumper crate.
//
// # Arguments
//
// * `output_format`: The output format.
// * `module`: The module name.
// * `output`: The output protobuf structure to be dumped.
//
// # Returns
//
// Returns a `Result<(), Error>` indicating whether the operation was
// successful or not.
fn obtain_module_info(
output_format: Option<&OutputFormats>,
module: &SupportedModules,
output: &dyn MessageDyn,
use_colors: bool,
) -> Result<(), Error> {
match output_format {
Some(OutputFormats::Json) => {
println!("{}", Cyan.paint(module).bold());
println!(">>>");
println!("{}", print_to_string(output)?.to_colored_json_auto()?);
println!("<<<");
}
Some(OutputFormats::Yaml) | None => {
println!("{}", Cyan.paint(module).bold());
println!(">>>");
let mut serializer = Serializer::new(stdout());
serializer
.with_colors(use_colors)
.serialize(output)
.expect("Failed to serialize");
println!("\n<<<");
}
}
Ok(())
}

/// Executes the `dump` command.
///
/// # Arguments
Expand All @@ -121,12 +79,12 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> {

let file = args.get_one::<PathBuf>("FILE");
let output_format = args.get_one::<OutputFormats>("output-format");
let modules = args.get_many::<SupportedModules>("modules");
let requested_modules = args.get_many::<SupportedModules>("module");
let no_colors = args.get_flag("no-colors");

// By default use colors if output is stdout. When output is a standard
// file colors are disabled, and also when `--no-colors` is used.
let use_color = io::stdout().is_tty() && !no_colors;
let use_color = stdout().is_tty() && !no_colors;

// Get the input.
if let Some(file) = file {
Expand All @@ -135,95 +93,63 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> {
stdin().read_to_end(&mut buffer)?
};

if let Some(modules) = modules {
for module in modules {
if let Some(output) = match module {
SupportedModules::Lnk => {
yara_x::mods::invoke_mod_dyn::<yara_x::mods::Lnk>(&buffer)
}
SupportedModules::Macho => yara_x::mods::invoke_mod_dyn::<
yara_x::mods::Macho,
>(&buffer),
SupportedModules::Elf => {
yara_x::mods::invoke_mod_dyn::<yara_x::mods::ELF>(&buffer)
}
SupportedModules::Pe => {
yara_x::mods::invoke_mod_dyn::<yara_x::mods::PE>(&buffer)
}
SupportedModules::Dotnet => yara_x::mods::invoke_mod_dyn::<
yara_x::mods::Dotnet,
>(&buffer),
} {
obtain_module_info(
output_format,
module,
&*output,
use_color,
)?;
}
let mut module_output = invoke_all(&buffer);

if let Some(modules) = requested_modules {
// The user asked explicitly for one or more modules, clear out
// those that weren't explicitly asked for.
let requested_modules: Vec<_> = modules.collect();

if !requested_modules.contains(&&SupportedModules::Dotnet) {
module_output.dotnet = MessageField::none()
}
if !requested_modules.contains(&&SupportedModules::Elf) {
module_output.elf = MessageField::none()
}
if !requested_modules.contains(&&SupportedModules::Lnk) {
module_output.lnk = MessageField::none()
}
if !requested_modules.contains(&&SupportedModules::Macho) {
module_output.macho = MessageField::none()
}
if !requested_modules.contains(&&SupportedModules::Pe) {
module_output.pe = MessageField::none()
}
} else {
// Module was not specified therefore we have to obtain output for every
// supported module and decide which is valid.
if let Some(lnk_output) =
yara_x::mods::invoke_mod::<yara_x::mods::Lnk>(&buffer)
{
if lnk_output.is_lnk() {
obtain_module_info(
output_format,
&SupportedModules::Lnk,
&*lnk_output,
use_color,
)?;
}
// Module was not specified, only show those that produced meaningful
// results, the rest are cleared out.
if !module_output.dotnet.is_dotnet() {
module_output.dotnet = MessageField::none()
}
if !module_output.elf.has_type() {
module_output.elf = MessageField::none()
}
if let Some(macho_output) =
yara_x::mods::invoke_mod::<yara_x::mods::Macho>(&buffer)
{
if macho_output.has_magic() {
obtain_module_info(
output_format,
&SupportedModules::Macho,
&*macho_output,
use_color,
)?;
}
if !module_output.lnk.is_lnk() {
module_output.lnk = MessageField::none()
}
if let Some(elf_output) =
yara_x::mods::invoke_mod::<yara_x::mods::ELF>(&buffer)
{
if elf_output.has_type() {
obtain_module_info(
output_format,
&SupportedModules::Elf,
&*elf_output,
use_color,
)?;
}
if !module_output.macho.has_magic() {
module_output.macho = MessageField::none()
}
if let Some(pe_output) =
yara_x::mods::invoke_mod::<yara_x::mods::PE>(&buffer)
{
if pe_output.is_pe() {
obtain_module_info(
output_format,
&SupportedModules::Pe,
&*pe_output,
use_color,
)?;
}
if !module_output.pe.is_pe() {
module_output.pe = MessageField::none()
}
if let Some(dotnet_output) =
yara_x::mods::invoke_mod::<yara_x::mods::Dotnet>(&buffer)
{
if dotnet_output.is_dotnet() {
obtain_module_info(
output_format,
&SupportedModules::Dotnet,
&*dotnet_output,
use_color,
)?;
}
}

match output_format {
Some(OutputFormats::Json) => {
println!(
"{}",
print_to_string(module_output.as_ref())?
.to_colored_json_auto()?
);
}
Some(OutputFormats::Yaml) | None => {
let mut serializer = Serializer::new(stdout());
serializer
.with_colors(use_color)
.serialize(module_output.as_ref())
.expect("Failed to serialize");
println!();
}
}

Expand Down
13 changes: 13 additions & 0 deletions yara-x-cli/src/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,16 @@ pub const COMPILED_RULES_HELP: &str = r#"Indicates that <RULES_PATH> is a file c
YARA rules can be compiled with the `yr compile` command. The file produced by
this command can be passed later to `yr scan` by using this flag."#;

pub const DUMP_LONG_HELP: &str = r#"Show the data produced by YARA modules for a file
YARA modules analyze files and extract information from them. This command shows
all the data produced by one ore more YARA module for the given file. If no module
is explictly specified with the `--module` option, any module for which YARA
produces information will be shown.
Examples:
yr dump --module pe SOMEFILE
yr dump --module pe --module dotnet SOMEFILE
"#;
Loading

0 comments on commit 289bc31

Please sign in to comment.