Skip to content

Commit

Permalink
clean third_party packages, fix some bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
LordCasser committed Jun 6, 2024
1 parent b73ddf0 commit 7d50415
Show file tree
Hide file tree
Showing 22 changed files with 286 additions and 79,922 deletions.
470 changes: 231 additions & 239 deletions Cargo.lock

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "weggli-enhance"
version = "0.4.3"
version = "0.4.4"
authors = ["LordCasser"]
edition = "2021"
license = "Apache-2.0"
Expand All @@ -10,29 +10,29 @@ description = "weggli is a fast and robust semantic search tool for C and C++ co
name = "weggli-enhance"

[dependencies]
tree-sitter = "0.20.10"
tree-sitter-c = "0.20.6"
log = "0.4.17"
tree-sitter = "0.22.6"
tree-sitter-c = "0.21.4"
log = "0.4.21"
clap = "2.34.0"
walkdir = "2.3.2"
rayon = "1.5.3"
colored = "2.0.0"
simplelog = "0.12.0"
walkdir = "2.5.0"
rayon = "1.10.0"
colored = "2.1.0"
simplelog = "0.12.2"
fancy-regex = "0.13.0"
rustc-hash = "1.1.0"
thread_local = "1.1"
thread_local = "1.1.8"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
serde-sarif = "0.4.2"
serde_json = "1.0.107"
serde_json = "1.0"



[target.'cfg(target_family = "unix")'.dependencies]
nix = "0.25.0"

[build-dependencies]
cc = "1.0.73"
cc = "*"

[dev-dependencies]
criterion = {version = "0.3",features = ["html_reports"]}
Expand Down
39 changes: 0 additions & 39 deletions build.rs

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion rules/test.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
issue: "test"
discription: "test"
description: "test"
rules:
- reason: "CVE-2022-3545"
regexes:
Expand Down
31 changes: 14 additions & 17 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ fn _build_query_tree(
captures: Vec::new(),
negations: Vec::new(),
id,
regex_constraints: match regex_constraints {
Some(r) => r,
None => RegexMap::new(HashMap::new()),
},
regex_constraints: regex_constraints.unwrap_or_else(|| RegexMap::new(HashMap::new())),
};

// Skip the root node if it's a translation_unit.
Expand All @@ -64,8 +61,8 @@ fn _build_query_tree(
let mut variables = HashSet::new();

let sexp = if !is_multi_pattern {
// We want to wrap queries into a function_definition so we can easily
// extract the function that contains a match. Of course we should not do that
// We want to wrap queries into a function_definition, so we can easily
// extract the function that contains a match. Of course, we should not do that
// if the user specifies a function_definition as part of the query.
let needs_anchor = c.node().kind() == "compound_statement" && id == 0;
debug!("query needs anchor: {}", needs_anchor);
Expand Down Expand Up @@ -96,7 +93,7 @@ fn _build_query_tree(
} else {
// When building a QueryTree for a compound statement, we create a tree-sitter
// query with multiple root patterns for efficient searching.
// This code is only executed when creating sub queries so we can skip
// This code is only executed when creating sub queries, so we can skip
// the whole anchoring logic needed for the single pattern case.

assert!(c.goto_first_child());
Expand Down Expand Up @@ -136,8 +133,8 @@ fn _build_query_tree(
/// Iterates through `captures` starting at `offset` and returns the necessary query predicates as a string.
/// In addition, all captured variables are added to the `variables` set.
///
/// For constant captures (such as function or variable names), `process_captures` creates a equality predicate
/// (#eq @0 "memcpy"). For variables, we enforce equality between two occurences of the same variable (#eq @0 @1)
/// For constant captures (such as function or variable names), `process_captures` creates an equality predicate
/// (#eq @0 "memcpy"). For variables, we enforce equality between two occurrences of the same variable (#eq @0 @1)
fn process_captures(
captures: &[Capture],
offset: usize,
Expand All @@ -155,7 +152,7 @@ fn process_captures(
match c {
Capture::Display => (),
Capture::Check(s) => {
sexp += &format!(r#"(#eq? @{} "{}")"#, (i + offset), s);
sexp += &format!(r#"(#eq? @{} "{}")"#, i + offset, s);
}
Capture::Variable(var, _) => {
vars.entry(var.clone())
Expand Down Expand Up @@ -194,7 +191,7 @@ struct QueryBuilder {

impl QueryBuilder {
// Map from an AST node to its input source
fn get_text(&self, n: &tree_sitter::Node) -> &str {
fn get_text(&self, n: &Node) -> &str {
&self.query_source[n.byte_range()]
}

Expand All @@ -214,7 +211,7 @@ impl QueryBuilder {

// Returns true if `n` is a comparison binary expression
fn is_comparison_binary_exp(&self, n: Node) -> bool {
assert!(n.kind() == "binary_expression");
assert_eq!(n.kind(), "binary_expression");

if let Some(op) = n.child(1) {
[">", "<", "<=", ">="].contains(&op.kind())
Expand Down Expand Up @@ -306,10 +303,10 @@ impl QueryBuilder {
// filtered out by _build_query_tree
return Ok("".to_string());
} else if self.get_text(&label).to_uppercase() == "STRICT" {
if let Some(child) = c.node().named_child(1) {
return self.build(&mut child.walk(), depth, true, kind);
return if let Some(child) = c.node().named_child(1) {
self.build(&mut child.walk(), depth, true, kind)
} else {
return Ok("".to_string());
Ok("".to_string())
}
}
}
Expand Down Expand Up @@ -566,8 +563,8 @@ impl QueryBuilder {
if copy.goto_next_sibling() {
warn! {"sub expression '{}' with multiple arguments is not supported.
Do you want to match on a function call '$foo()' instead?",
self.get_text(&c.node()).to_string().red()};
warn! {"converting to function call..."};
self.get_text(&c.node()).to_string().red()}
warn! {"converting to function call..."}
return Ok(None);
}

Expand Down
69 changes: 0 additions & 69 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ pub struct Args {
pub code_path: PathBuf,
pub rule_path: PathBuf,
pub output_path: Option<String>,
pub before: usize,
pub after: usize,
pub extensions: Vec<String>,
pub limit: bool,
pub cpp: bool,
Expand Down Expand Up @@ -84,20 +82,6 @@ pub fn parse_arguments() -> Args {
.multiple(true)
.help("File extensions to include in the search."),
)
.arg(
Arg::with_name("before")
.long("before")
.short("B")
.takes_value(true)
.help("Lines to print before a match. Default = 5."),
)
.arg(
Arg::with_name("after")
.long("after")
.short("A")
.takes_value(true)
.help("Lines to print after a match. Default = 5."),
)
.arg(
Arg::with_name("limit")
.long("limit")
Expand Down Expand Up @@ -178,31 +162,6 @@ pub fn parse_arguments() -> Args {
let directory_rule = Path::new(matches.value_of("RULES").unwrap_or("."));
let directory_output = matches.value_of("output").map(|s| s.to_string());



//
// let output_path = if let Some(directory_output) = matches.value_of("output") {
// let tmp_path = Path::new(directory_output);
//
// if tmp_path.is_file(){
// if tmp_path.is_absolute() {
// tmp_path.to_path_buf()
// }else {
// std::env::current_dir().unwrap().join(directory_output)
// }
// }
// if tmp_path.is_dir() {
// if tmp_path.is_absolute() {
// tmp_path.to_path_buf().join("results.sarif")
// }else {
// std::env::current_dir().unwrap().join(directory_output).join("results.sarif")
// }
// std::env::current_dir().unwrap().join(directory_output).join("results.sarif")
// }
// } else {
// None
// };

let code_path = if directory_code.is_absolute() {
directory_code.to_path_buf()
} else {
Expand All @@ -215,16 +174,6 @@ pub fn parse_arguments() -> Args {
std::env::current_dir().unwrap().join(directory_rule)
};

let before = match matches.value_of("before") {
Some(v) => v.parse().unwrap_or(5),
None => 5,
};

let after = match matches.value_of("after") {
Some(v) => v.parse().unwrap_or(5),
None => 5,
};

let limit = matches.occurrences_of("limit") > 0;

let unique = matches.occurrences_of("unique") > 0;
Expand Down Expand Up @@ -263,8 +212,6 @@ pub fn parse_arguments() -> Args {
code_path,
rule_path: rules_path,
output_path: directory_output,
before,
after,
extensions,
limit,
cpp,
Expand Down Expand Up @@ -351,22 +298,6 @@ strict: Enable stricter matching. This turns off statement unwrapping and gree
files can also be specified via STDIN by setting the directory to '-'
and piping a list of filenames.
";

// pub const REGEX: &str = "\
// Filter variable matches based on a regular expression.
// This feature uses the Rust regex crate, so most Perl-style
// regular expression features are supported.
// (see https://docs.rs/regex/1.5.4/regex/#syntax)

// Examples:

// Find calls to functions starting with the string 'mem':
// weggli -R 'func=^mem' '$func(_);'

// Find memcpy calls where the last argument is NOT named 'size':
// weggli -R 's!=^size$' 'memcpy(_,_,$s);'
// ";

pub const UNIQUE: &str = "\
Enforce uniqueness of variable matches.
By default, two variables such as $a and $b can match on identical values.
Expand Down
36 changes: 13 additions & 23 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use std::collections::{hash_map::Keys, HashMap};
use colored::Colorize;
use query::QueryTree;
use fancy_regex::Regex;
use tree_sitter::{Language, Parser, Query, Tree};
use tree_sitter::{Parser, Query, Tree};
use tree_sitter_c::language;

#[macro_use]
extern crate log;
Expand All @@ -31,9 +32,9 @@ mod util;
pub mod query;
pub mod result;

extern "C" {
fn tree_sitter_c() -> Language;
}
// extern "C" {
// fn tree_sitter_c() -> Language;
// }

#[derive(Debug, Clone)]
pub struct QueryError {
Expand All @@ -53,43 +54,32 @@ pub fn pattern_parse(source: &str) -> Tree {
parser.parse(source, None).unwrap()
}

// pub fn get_parser() -> Parser {
// let language = unsafe { tree_sitter_c() };
//
//
// let mut parser = Parser::new();
// if let Err(e) = parser.set_language(language) {
// eprintln!("{}", e);
// panic!();
// }
// parser
// }

pub fn get_parser() -> Parser {
let mut parser = Parser::new();
if let Err(e) = parser.set_language(tree_sitter_c::language()) {
if let Err(e) = parser.set_language(&language()) {
eprintln!("{}", e);
panic!();
}
parser
}
pub fn get_pattern_parser() -> Parser {
let language = unsafe { tree_sitter_c() };
// let language = unsafe { tree_sitter_c() };


let mut parser = Parser::new();
if let Err(e) = parser.set_language(language) {
if let Err(e) = parser.set_language(&language()) {
eprintln!("{}", e);
panic!();
}
parser
}

// Internal helper function to create a new tree-sitter query.
fn ts_query(sexpr: &str) -> Result<tree_sitter::Query, QueryError> {
fn ts_query(sexpr: &str) -> Result<Query, QueryError> {
// let language = unsafe { tree_sitter_c() };
let language = tree_sitter_c::language();
let language = language();

match Query::new(language, sexpr) {
match Query::new(&language, sexpr) {
Ok(q) => Ok(q),
Err(e) => {
let errmsg = format!( "Tree sitter query generation failed: {:?}\n {} \n sexpr: {}\n This is a bug! Can't recover :/", e.kind, e.message, sexpr);
Expand Down Expand Up @@ -186,7 +176,7 @@ const VALID_NODE_KINDS: &[&str] = &[
/// If `force` is true, syntax errors are ignored. Returns a cursor to the
/// root node.
fn validate_query<'a>(
tree: &'a tree_sitter::Tree,
tree: &'a Tree,
query: &str,
force: bool,
) -> Result<tree_sitter::TreeCursor<'a>, QueryError> {
Expand Down
Loading

0 comments on commit 7d50415

Please sign in to comment.