diff --git a/lib/bap/bap.mli b/lib/bap/bap.mli index 7ac7af6ec..493b6a827 100644 --- a/lib/bap/bap.mli +++ b/lib/bap/bap.mli @@ -6040,6 +6040,19 @@ module Std : sig unlifted instructions. *) val errors : t -> error list end + + module SupersetDisasm : sig + type 'a t + val raw_superset : + ?backend:string -> data:'a -> + ?f:(mem * Basic.full_insn option -> 'a t -> 'a t) -> + string -> 'a t + val trimmed_superset : + data:'a -> ?f:('a t -> mem -> Basic.full_insn option -> (addr option * edge) list -> 'a t) list -> + backend:string -> string -> 'a t + val converged_superset : unit t -> unit t + end + end (** Assembly instruction. diff --git a/lib/bap_disasm/abstract_ssa.ml b/lib/bap_disasm/abstract_ssa.ml new file mode 100644 index 000000000..cd4eb17d0 --- /dev/null +++ b/lib/bap_disasm/abstract_ssa.ml @@ -0,0 +1,50 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std + +let stmt_def_vars = + object(self) + inherit [Exp.Set.t] Stmt.visitor + method enter_move def use accu = + if not Var.(is_virtual def) then + Set.add accu Exp.(Bil.Var def) + else accu + end + +let stmt_use_vars = + object(self) + inherit [Exp.Set.t] Stmt.visitor + method enter_move def use accu = + Set.add accu use + end + + +let stmt_def_freevars = + object(self) + inherit [Var.Set.t] Stmt.visitor + method enter_move def use accu = + if not Var.(is_virtual def) then + Set.add accu def + else accu + end + +let stmt_use_freevars = + object(self) + inherit [Var.Set.t] Stmt.visitor + method enter_move def use accu = + let free_vars = + Set.filter ~f:(fun v -> not Var.(is_virtual v)) (Exp.free_vars use) + in Set.union accu free_vars + end + +let def_ssa bil = + stmt_def_vars#run bil Exp.Set.empty + +let use_ssa bil = + stmt_use_vars#run bil Exp.Set.empty + +let def_freevars bil = + stmt_def_freevars#run bil Var.Set.empty + +let use_freevars bil = + stmt_use_freevars#run bil Var.Set.empty diff --git a/lib/bap_disasm/bap_disasm_std.ml b/lib/bap_disasm/bap_disasm_std.ml index 4d541968f..2798627fa 100644 --- a/lib/bap_disasm/bap_disasm_std.ml +++ b/lib/bap_disasm/bap_disasm_std.ml @@ -13,6 +13,7 @@ module Disasm_expert = struct type nonrec lifter = lifter module Basic = Bap_disasm_basic module Recursive = Bap_disasm_rec + module SupersetDisasm = Bap_disasm_superset module Linear = Bap_disasm_linear_sweep module Kind = Bap_insn_kind module Insn = Bap_disasm_basic.Insn diff --git a/lib/bap_disasm/bap_disasm_superset.ml b/lib/bap_disasm/bap_disasm_superset.ml new file mode 100644 index 000000000..50a4403fd --- /dev/null +++ b/lib/bap_disasm/bap_disasm_superset.ml @@ -0,0 +1,4 @@ +type 'a t = 'a Superset.t +let raw_superset = Superset.superset_disasm_of_file +let trimmed_superset = Trim.trimmed_disasm_of_file +let converged_superset s = Trim.Default.trim Features.(apply_featurepmap Features.default_features s) diff --git a/lib/bap_disasm/builder.ml b/lib/bap_disasm/builder.ml new file mode 100644 index 000000000..e5e89f043 --- /dev/null +++ b/lib/bap_disasm/builder.ml @@ -0,0 +1,10 @@ +open Core_kernel.Std +open Bap.Std +open Superset +open Trim + +module Builder + (Superset : Superset_intf) + (Reducer : Reducer) = struct + +end diff --git a/lib/bap_disasm/cfg_dot_layout.ml b/lib/bap_disasm/cfg_dot_layout.ml new file mode 100644 index 000000000..eb0dc9e56 --- /dev/null +++ b/lib/bap_disasm/cfg_dot_layout.ml @@ -0,0 +1,114 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std +open Graphlib.Std + +type colored_superset = Superset_risg.t * Addr.Hash_set.t String.Map.t + * Superset.elem Addr.Map.t + +module Make(T : sig val instance : colored_superset end) = struct + open T + module Dottable = struct + type t = colored_superset + + module V = struct + type t = Superset_risg.G.V.t + end + + module E = struct + type t = Superset_risg.G.E.t + let src (s,_) = s + let dst (_,d) = d + end + + let iter_vertex f (g, _, _) = + Superset_risg.G.iter_vertex f g + + let iter_edges_e f (g, _, _) = + Superset_risg.G.iter_edges_e f g + + let graph_attributes _ = [ + `Fontsize 14; + ] + let default_vertex_attributes gr = [ + `Shape `Box; + (*`Height 1.0*.Memory.(length mem);*) + `Fontsize 14; + `Fontcolor 0x666699; + `Fontname "Monospace"; + `Width 1.0 + ] + + let red = 0xff0000 + let green = 0x009900 + let yellow = 0xffff00 + let blue = 0x0000ff + let orange = 0xff6600 + let purple = 0x660066 + let brown = 0x663300 + let cyan = 0x0099cc + + let vertex_name name = + let fmt = Format.str_formatter in + Addr.(pp_generic ~prefix:`none ~suffix:`none ~format:`dec + fmt name); + Format.flush_str_formatter () + + let vertex_attributes v = + let default_attrs = + [ + `Label ((vertex_name v)); + ] in + let g, colors, insn_map = instance in + let contains name = + match Map.find colors name with + | Some(s) -> + Hash_set.mem s v + | None -> false in + let find_update default_attrs name color = + if contains name then + `Color color :: default_attrs + else default_attrs in + let default_attrs = + find_update default_attrs "False Negatives" red in + let default_attrs = + find_update default_attrs "True Positives" green in + let default_attrs = + find_update default_attrs "False Positives" yellow in + let default_attrs = + match List.hd default_attrs with + | Some (`Color _) -> + default_attrs + | _ -> `Color 0X660000 :: default_attrs in + match Map.find insn_map v with + | Some(mem,insn) -> + let len = float_of_int Memory.(length mem) in + `Height (1.0 *. len) :: + default_attrs + | None -> default_attrs + + + let get_subgraph _ = None + let default_edge_attributes _ = [ + `Penwidth 1.0; + `Arrowsize 0.5; + `Headport `N; + `Tailport `S; + `Labelfloat true; + ] + + let edge_attributes (src,dst) = + (*let color,weight = match kind,arity with + | `Fall,`Many -> 0x660000, 4 + | `Fall,`Mono -> 0x000066, 8 + | `Cond,_ -> 0x006600, 2 + | `Jump,_ -> 0x000066, 2 in*) + [ + (*`Color color;*) + (*`Weight weight;*) + ] + end + module Dot = Graph.Graphviz.Dot(Dottable) + + include Dot +end diff --git a/lib/bap_disasm/common.ml b/lib/bap_disasm/common.ml new file mode 100644 index 000000000..522201730 --- /dev/null +++ b/lib/bap_disasm/common.ml @@ -0,0 +1,14 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std + +let img_of_filename filename = + let img, errs = Image.create filename |> ok_exn in + List.iter errs ~f:(fun err -> + (Error.pp Format.std_formatter err); + ); + img + +let create_memory arch min_addr data = + let data = Bigstring.of_string data in + Memory.create (Arch.endian arch) min_addr data diff --git a/lib/bap_disasm/decision_tree_set.ml b/lib/bap_disasm/decision_tree_set.ml new file mode 100644 index 000000000..2103f7d47 --- /dev/null +++ b/lib/bap_disasm/decision_tree_set.ml @@ -0,0 +1,228 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std +open Graphlib.Std +open Graph + +(** The decision set represents a set of potentially inter-dependent + decision trees and potential requirements of selection at each + node. Although a graph is used, the actual structure is acyclic. + The addr -> terminal map expresses relationships between the + graph with which it is paired with other members of the + enclosing decision_tree_set *) + +let conflicts_of_entries entries insn_map = + let visited_entries = Addr.Hash_set.create () in + Hash_set.fold entries ~init:[] ~f: + (fun conflicted_entries entry -> + if not (Hash_set.mem visited_entries entry) then ( + Hash_set.add visited_entries entry; + let in_entry_conflicts = + Superset_risg.conflicts_within_insn_at insn_map entry in + let conflicts = Addr.Hash_set.create () in + Hash_set.add conflicts entry; + Set.iter in_entry_conflicts + ~f:(fun conflict -> + (* A conflict that an entry may have may or may not *) + (* itself be an entry. *) + if Hash_set.mem entries conflict then ( + Hash_set.add visited_entries conflict; + Hash_set.add conflicts conflict; + ) + ); + if (Hash_set.length conflicts) > 1 then ( + conflicts :: conflicted_entries + ) else conflicted_entries + ) else conflicted_entries + ) + +let tails_of_conflicts conflicts insn_isg = + let possible_tails = Superset_risg.mergers_of_isg insn_isg in + (* This tail is the particular instruction + that is the fall through target of several potential + competitors. We use this instruction against the + leaders map because those will be the ones that fall + through to the tail; the tail can then be associated with + those that lead into it. *) + let tails, _ = Set.fold ~init:(Addr.Map.empty, Addr.Set.empty) + ~f:(fun (tails, added_choices) possible_tail -> + (* For each edge from tail, lookup the respective vertex; if it *) + (* is in the conflicts set, then it gets added to a sheath *) + (* of choices. *) + let f sheath poss_conflict = + let not_added = not (Set.mem added_choices poss_conflict) in + let is_conflict = Set.mem conflicts poss_conflict in + let is_connected = + match Superset_risg.G.find_all_edges + insn_isg possible_tail poss_conflict with + | [] -> false | _ -> true in + if not_added && is_conflict && is_connected then + poss_conflict :: sheath + else sheath in + let sheath = List.fold_left + (Superset_risg.G.succ insn_isg possible_tail) ~init:[] ~f + in + match sheath with + | [] | _ :: []-> tails, added_choices + | _ -> + let added_choices = + Set.inter added_choices (Addr.Set.of_list sheath) in + (Addr.Map.set tails ~key:possible_tail ~data:sheath, added_choices) + ) possible_tails in + tails + +let decision_tree_of_entries conflicted_entries entries tails insn_isg = + let visited = Addr.Hash_set.create () in + let visited_choices = Addr.Hash_set.create () in + let add_choices decision_tree current_vert = + let unvisited = + not (Hash_set.mem visited_choices current_vert) in + if unvisited then + let possible_tail = current_vert in + match Addr.Map.find tails possible_tail with + | Some(sheath) -> + List.iter sheath ~f:(fun competitor -> + Hash_set.add visited_choices competitor; + Superset_risg.G.add_edge decision_tree possible_tail + competitor; + ); + | _ -> () + else (); + in + let link_zero decision_tree entry = + let width = Addr.bitwidth entry in + let zero = Addr.(of_int ~width 0) in + Superset_risg.G.add_edge decision_tree zero entry + in + let f decision_tree entry = + let width = Addr.bitwidth entry in + let saved_vert = ref @@ + Addr.of_int ~width 0 in + let link_choices current_vert = + add_choices decision_tree entry; + let contained = Superset_risg.G.mem_vertex + decision_tree current_vert in + let is_new = Hash_set.mem visited current_vert in + if contained && is_new then ( + if not @@ Superset_risg.G.mem_edge decision_tree !saved_vert + current_vert then ( + Superset_risg.G.add_edge decision_tree !saved_vert + current_vert; + ); + saved_vert := current_vert; + ); + Hash_set.add visited current_vert + in + (* Would like to have fold_component; not available in this + version *) + Superset_risg.Dfs.prefix_component link_choices insn_isg entry; + in + let conflicted_trees = + List.filter_map conflicted_entries ~f:(fun conflicted -> + if Hash_set.length conflicted > 0 then + let decision_tree = Superset_risg.G.create () in + let f entry = + if not (Hash_set.mem visited entry) then ( + link_zero decision_tree entry; + f decision_tree entry) in + Hash_set.iter conflicted ~f; + Some(decision_tree) + else None + ) in + Hash_set.fold entries ~init:conflicted_trees + ~f:(fun all_trees entry -> + if not (Hash_set.mem visited entry) then + let decision_tree = Superset_risg.G.create () in + f decision_tree entry; + if Superset_risg.G.nb_vertex decision_tree > 0 then + decision_tree :: all_trees + else all_trees + else (all_trees) + ) + + +(** Accepts a per instruction control flow graph, and a map from addr *) +(** to (mem, insn) *) +let decision_trees_of_superset superset = + let open Superset in + let insn_map = Superset.get_map superset in + let insn_risg = Superset.get_graph superset in + (* Here, for each vertex, look up the insn from the map and *) + (* identify conflicts. *) + let conflicts = Superset_risg.find_all_conflicts insn_map in + (* entries variable: + We want to know the superset of all nodes that could be the + terminating point that would otherwise be the return instruction + of a function. *) + let entries = Superset_risg.entries_of_isg insn_risg in + (* + we need to keep track of the subset of potential choices + that fall in line with the normal control flow graph, and + leave the discovery of overlapping redirection to a + second pass, in order that when we do a map over all + instructions to check for conflicts, we know which are tails + in order to properly construct the sheath type. + *) + let tails = tails_of_conflicts conflicts insn_risg in + (* It may be that some entries are accidental indirections that *) + (* happen to preside at the intended entry. These must map to to an *) + (* entirely distinct interpretation. *) + let conflicted_entries = conflicts_of_entries entries insn_map in + (* For each of the potentially conflicting entries, construct a *) + (* decision tree. *) + let decision_trees = decision_tree_of_entries + conflicted_entries entries tails insn_risg in + decision_trees + +let calculate_deltas superset ?entries is_option = + let insn_risg = Superset.get_graph superset in + let entries = Option.value entries + ~default:(Superset_risg.entries_of_isg insn_risg) in + let add_data_of_insn dataset at = + Superset.with_data_of_insn superset at ~f:(Hash_set.add dataset) + in + let deltas = ref Addr.Map.empty in + let delta = ref None in + let make_deltas addr = + let insns, datas = + match !delta with + | Some (insns, datas) -> (insns, datas) + | None -> + let insns = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + delta := Some(insns, datas); + insns, datas in + if is_option addr then ( + deltas := Addr.Map.set !deltas addr (insns, datas); + delta := None + ) else ( + add_data_of_insn datas addr; + Hash_set.add insns addr; + ) + (* else if is in entries then store the delta in the deltas map *) + in + let visited = Addr.Hash_set.create () in + Hash_set.iter entries + ~f:(Superset_risg.iter_component ~visited ~post:make_deltas insn_risg); + !deltas + +(* TODO Need to find some way to cache results *) +(* TODO could split this into tail_options *) +let insn_is_option superset addr = + let open Superset in + let len = Superset.len_at superset addr in + let bound = Addr.(addr ++ len) in + let insn_risg = Superset.get_graph superset in + let previous = Superset_risg.G.pred insn_risg addr in + List.fold ~init:false previous ~f:(fun current descedant -> + if not current then + let further = Superset_risg.G.succ insn_risg descedant in + List.fold ~init:current further ~f:(fun current opt -> + if not current then + if Addr.(addr <= opt) && Addr.(opt < bound) then + true + else false + else current + ) + else current + ) diff --git a/lib/bap_disasm/features.ml b/lib/bap_disasm/features.ml new file mode 100644 index 000000000..65199e672 --- /dev/null +++ b/lib/bap_disasm/features.ml @@ -0,0 +1,924 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std +open Bap_disasm_target_factory + +module Insn = Bap_disasm_insn +module Dis = Bap_disasm_basic + +(* TODO could have a feature that correlates def-use to text section *) +let default_features = [ + "ImgEntry"; + (*"NoExit";*) + (*"LoopsWithBreak";*) + "BranchViolations"; + (*"LayerViolations";*) + (*"TrimClamped";*) + "TrimLimitedClamped"; + "Callsites3"; + (*"TrimFixpointGrammar"; + "TrimFixpointTails";*) + (*"Clamped"; + "SCC"; + "LoopGrammar"; + "CallsiteLineage"; + "SSA";*) + (*"FreeVarSSA";*) + (*"Grammar";*) + (*"Constant";*) +] +let default_features = List.rev default_features + +let transform = Hash_set.fold ~init:Addr.Set.empty ~f:Set.add + +let find_free_insns superset = + let insn_map = Superset.get_map superset in + let insns = Map.to_sequence insn_map in + let insn_risg = Superset.get_graph superset in + let mem = Superset_risg.G.mem_vertex insn_risg in + let all_conflicts = Addr.Hash_set.create () in + let to_clamp = + Seq.fold ~init:(Addr.Set.empty) + ~f:(fun (to_clamp) (addr,(memory,_)) -> + let len = Memory.length memory in + let conflicts = Superset_risg.range_seq_of_conflicts + ~mem addr len in + let no_conflicts = Seq.is_empty conflicts in + Seq.iter conflicts ~f:(fun c -> + Hash_set.add all_conflicts c); + if no_conflicts && not Hash_set.(mem all_conflicts addr) then + Set.add to_clamp addr + else ( + to_clamp + ) + ) insns in + to_clamp +(*Hash_set.fold all_conflicts ~init:to_clamp ~f:Set.remove*) + +let restricted_clamp superset = + let insn_risg = Superset.get_graph superset in + let insn_map = Superset.get_map superset in + let entries = Superset_risg.entries_of_isg insn_risg in + let conflicts = Superset_risg.find_all_conflicts insn_map in + let to_clamp = ref Addr.Set.empty in + Hash_set.iter entries ~f:(fun entry -> + let b = ref false in + let pre v = + if Addr.(v = entry) then + b := false + else if not (!b) then + if Set.mem conflicts v then + b := true + else to_clamp := Set.add (!to_clamp) v + in Superset_risg.Dfs.iter_component ~pre insn_risg entry; + ); + !to_clamp + +let extended_clamp superset = + let to_clamp = find_free_insns superset in + let insn_map = Superset.get_map superset in + let insn_risg = Superset.get_graph superset in + (* TODO this doesn't merge with to_clamp, and the var names are misleading *) + Set.fold to_clamp ~init:Addr.Set.empty ~f:(fun to_clamp clamp -> + let _, to_clamp = Superset_risg.Dfs.fold_component + (fun addr (struck,to_clamp) -> + if struck then (struck,to_clamp) else + let conflicts = Superset_risg.conflicts_within_insn_at + insn_map addr in + let no_conflicts = Set.length conflicts = 0 in + (*let conflicts = Superset_risg.parent_conflict_at + insn_risg insn_map addr in + let no_conflicts = Set.length conflicts = 0 + && no_conflicts in*) + if no_conflicts then (struck, Set.(add to_clamp addr)) + else (true, to_clamp) + ) (false, to_clamp) insn_risg clamp in to_clamp + ) + +let extract_loop_addrs superset = + let insn_risg = Superset.get_graph superset in + let loop_addrs = Superset_risg.StrongComponents.scc_list insn_risg in + List.fold_left ~init:Addr.Map.empty loop_addrs + ~f:(fun addrs loop -> + if List.length loop >= 2 then + Option.value ~default:addrs + Option.(map List.(hd loop) ~f:(fun addr -> + Map.set addrs addr loop)) + else addrs + ) + +let extract_filtered_loop_addrs superset = + let loop_addrs = extract_loop_addrs superset in + Map.filteri loop_addrs ~f:(fun ~key ~data -> + List.length data > 20) + +let extract_constants superset = + let width = Addr.bitwidth Superset.(get_base superset) in + let s = Size.of_int_exn width in + let addrs = Image.words Superset.(get_img superset) s in + let insn_risg = Superset.get_graph superset in + Seq.fold ~init:Addr.Map.empty Table.(to_sequence addrs) + ~f:(fun constants (m, constant) -> + if Superset.contains_addr superset constant + && Superset_risg.G.(mem_vertex insn_risg constant) then + Map.set constants Memory.(min_addr m) constant + else constants + ) + +let stddev_of hs average pmap = + let deviation,deg_free = + Hash_set.fold ~init:(0.0,0) hs ~f:(fun (deviation,deg_free) addr -> + if Map.mem pmap addr then + let d = (Option.(value_exn Map.(find pmap addr)) -. average) in + let d = d *. d in + (deviation +. d, (deg_free+1)) + else (deviation, (deg_free)) + ) in + sqrt(deviation /. float_of_int (deg_free -1)) + +(* pre is called from descendant to ancestor order, so we want to + check for usage and put that into a map, and then for define on + post visitation, when coming back down from ancestors back to + descendants (as execution would move). *) +let pre_ssa superset lift factors var_use addr = + match Map.find (Superset.get_map superset) addr with + | Some (mem, insn) -> + let bil = lift (mem, insn) in + Option.value_map ~default:() bil ~f:(fun (mem,bil) -> + let use_vars = Abstract_ssa.use_ssa bil in + Set.iter use_vars ~f:(fun use_var -> + var_use := Map.set !var_use use_var addr + ) + ) + | None -> () + +let pre_freevarssa superset lift factors var_use addr = + match Map.find (Superset.get_map superset) addr with + | Some (mem, insn) -> + let bil = lift (mem, insn) in + Option.value_map ~default:() bil ~f:(fun (mem,bil) -> + let use_vars = Abstract_ssa.use_freevars bil in + Set.iter use_vars ~f:(fun use_var -> + var_use := Map.set !var_use use_var addr + ) + ) + | None -> () + +let post_ssa_with superset lift var_use addr f = + match Map.find (Superset.get_map superset) addr with + | Some (mem, insn) -> + let bil = lift (mem, insn) in + Option.value_map ~default:() bil ~f:(fun (mem,bil) -> + let use_vars = Abstract_ssa.use_ssa bil in + Set.iter use_vars ~f:(fun use_var -> + var_use := Map.remove !var_use use_var; + ); + let var_defs = Abstract_ssa.def_ssa bil in + Set.iter var_defs ~f:(fun var_def -> + match Map.find !var_use var_def with + | Some(waddr) -> + if not Addr.(waddr = addr) then ( + f waddr addr + ) + | None -> () + ); + Set.iter var_defs ~f:(fun write_reg -> + var_use := Map.remove !var_use write_reg + ) + ) + | None -> () + +let post_freevarssa_with superset lift var_use addr f = + match Map.find (Superset.get_map superset) addr with + | Some (mem, insn) -> + let bil = lift (mem, insn) in + Option.value_map ~default:() bil ~f:(fun (mem,bil) -> + let use_vars = Abstract_ssa.use_freevars bil in + let var_defs = Abstract_ssa.def_freevars bil in + Set.iter var_defs ~f:(fun var_def -> + match Map.find !var_use var_def with + | Some(waddr) -> + if not Set.(mem use_vars var_def) then ( + f waddr addr + ) + | None -> () + ); + Set.iter use_vars ~f:(fun use_var -> + var_use := Map.remove !var_use use_var; + ); + Set.iter var_defs ~f:(fun write_reg -> + var_use := Map.remove !var_use write_reg + ) + ) + | None -> () + +let extract_ssa_to_map superset = + let insn_risg = Superset.get_graph superset in + let var_use = ref Exp.Map.empty in + let defuse_map = ref Addr.Map.empty in + let add_to_map def use = + defuse_map := Map.set !defuse_map def use in + let module Target = (val target_of_arch + Superset.(get_arch superset)) in + let lift (mem, insn) = + try Superset.lift_insn Target.lift (mem,insn) with _ -> None in + let pre = pre_ssa superset lift () var_use in + let post addr = post_ssa_with superset lift var_use + addr add_to_map in + let entries = Superset_risg.entries_of_isg insn_risg in + Hash_set.iter entries ~f:(fun addr -> + Superset_risg.Dfs.iter_component ~pre ~post insn_risg addr; + var_use := Exp.Map.empty + ); + !defuse_map + +let extract_freevarssa_to_map superset = + let insn_risg = Superset.get_graph superset in + let var_use = ref Var.Map.empty in + let defuse_map = ref Addr.Map.empty in + let add_to_map def use = + defuse_map := Map.set !defuse_map def use in + let module Target = (val target_of_arch + Superset.(get_arch superset)) in + let lift (mem, insn) = + try Superset.lift_insn Target.lift (mem,insn) with _ -> None in + let pre = pre_freevarssa superset lift () var_use in + let post addr = post_freevarssa_with superset lift var_use + addr add_to_map in + let entries = Superset_risg.entries_of_isg insn_risg in + Hash_set.iter entries ~f:(fun addr -> + Superset_risg.Dfs.iter_component ~pre ~post insn_risg addr; + var_use := Var.Map.empty + ); + !defuse_map + + + +type window = (addr * (mem * Dis.full_insn option)) list + +let extract_cross_section_jmps superset = + let insn_risg = Superset.get_graph superset in + let segments = Superset.get_segments superset in + let cross_section_edges = Superset_risg.G.fold_edges + (fun src dst csedges -> + let s1 = Table.find_addr segments src in + let s2 = Table.find_addr segments dst in + match s1, s2 with + | Some (m1,_), Some (m2,_) -> + let a1 = Memory.(min_addr m1) in + let a2 = Memory.(min_addr m2) in + if not Addr.(a1 = a2) then + let ft1 = Superset.is_fall_through superset src dst in + let ft2 = Superset.is_fall_through superset dst src in + if (ft1 || ft2) then ( + Superset_risg.G.remove_edge insn_risg src dst; + Map.set csedges src dst + ) else csedges + else csedges + | _, _ -> csedges + ) insn_risg Addr.Map.empty in + cross_section_edges + +let extract_trim_clamped superset = + let to_clamp = find_free_insns superset in + let insn_risg = Superset.get_graph superset in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + Set.iter to_clamp ~f:(fun c -> + if not Hash_set.(mem visited c) then + if Superset_risg.G.mem_vertex insn_isg c then ( + Superset.mark_descendents_at + ~insn_isg ~visited ~datas superset c + ) + ); + Hash_set.iter datas ~f:(fun d -> + if Hash_set.(mem visited d) || Set.(mem to_clamp d) then + Superset.clear_bad superset d + ); + Markup.check_convergence superset visited; + superset + +let time ?(name="") f x = + let t = Sys.time() in + let fx = f x in + let s = sprintf "%s execution time: %fs\n" name (Sys.time() -. t) in + print_endline s; + fx + +let extract_trim_limited_clamped superset = + let visited = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:0 superset in + let f s = Grammar.tag_callsites visited ~callsites s in + let superset = time ~name:"tagging callsites: " f superset in + let () = Markup.clear_bad superset in + let superset = time ~name:"extract_trim_clamped " + extract_trim_clamped superset in + Markup.check_convergence superset visited; + superset + +let fixpoint_descendants superset extractf depth = + let insn_isg = + Superset_risg.Oper.mirror Superset.(get_graph superset) in + let rec fix_descendants cur_features d = + if d >= depth then + cur_features + else + let visited = Addr.Hash_set.create () in + let subset_features = Addr.Hash_set.create () in + Hash_set.iter cur_features ~f:(fun cur -> + if not Hash_set.(mem visited cur) then + Superset_risg.iter_component ~pre:(fun v -> + if Hash_set.(mem cur_features v) + && not Addr.(cur = v) then + Hash_set.add subset_features v + ) ~visited insn_isg cur + else Hash_set.add subset_features cur + ); + fix_descendants subset_features (d+1) + in + let cur_features = extractf superset in + fix_descendants cur_features 0 + +let fixpoint_map superset feature_pmap = + let insn_isg = + Superset_risg.Oper.mirror Superset.(get_graph superset) in + let visited = Addr.Hash_set.create () in + let entries = Superset_risg.entries_of_isg insn_isg in + Hash_set.fold ~init:feature_pmap entries ~f:(fun feature_pmap cur -> + if not Hash_set.(mem visited cur) then + let prev = ref [] in + let feature_pmap = ref feature_pmap in + Superset_risg.iter_component ~pre:(fun v -> + match Map.find !feature_pmap v with + | None -> () + | Some(p) -> + prev := List.append p !prev; + feature_pmap := Map.set !feature_pmap v !prev; + ) ~visited insn_isg cur; + !feature_pmap + else feature_pmap + ) + +let fixpoint_grammar superset depth = + let extractf superset = + let insn_risg = Superset.get_graph superset in + Superset_risg.get_branches insn_risg in + fixpoint_descendants superset extractf depth + +let fixpoint_ssa superset depth = + let extractf superset = + let ssa_map = extract_ssa_to_map superset in + let ssa = Addr.Hash_set.create () in + List.iter Map.(data ssa_map) ~f:Hash_set.(add ssa); + ssa in + fixpoint_descendants superset extractf depth + +let fixpoint_freevarssa superset depth = + let extractf superset = + let freevars_map = extract_freevarssa_to_map superset in + let freevars = Addr.Hash_set.create () in + List.iter Map.(data freevars_map) ~f:Hash_set.(add freevars); + freevars in + fixpoint_descendants superset extractf depth + +let fixpoint_tails superset = + let insn_risg = Superset.(get_graph superset) in + let extractf superset = + let insn_isg = + Superset_risg.Oper.mirror insn_risg in + let insn_map = Superset.get_map superset in + let conflicts = Superset_risg.find_all_conflicts insn_map in + let tails_map = + Decision_tree_set.tails_of_conflicts conflicts insn_isg in + let tails = Addr.Hash_set.create () in + List.iter Map.(keys tails_map) ~f:Hash_set.(add tails); + tails + in + fixpoint_descendants superset extractf 4 + +let allfeatures = + "RestrictedClamped" :: + "ExtendedClamped" :: + "ClassicGrammar" :: + "LinearGrammar" :: + "UnfilteredGrammar" :: + "FalseBranchMap" :: + "FilteredFalseBranchMap" :: + "UnfilteredSCC" :: + "UnionFindBranches" :: (* TODO *) + "UnionFindCompatible" :: (* TODO *) + "FreeVarSSA" :: + "MirrorSCC" :: + "JmpTargetIntersection" :: (* TODO *) + "FixpointCallsites" :: (* TODO *) + "FixpointGrammar" :: + "FixpointSSA" :: (* TODO *) + "FixpointFreevarSSA" :: + "FixpointTails" :: + default_features + +type 'a extractor = ('a Superset.t -> Addr.Set.t) +type ('a,'b) mapextractor = ('a Superset.t -> 'b Addr.Map.t) +type 'a setfilter = ('a Superset.t -> Addr.Set.t -> Addr.Set.t) +type ('a, 'b) mapfilter = ('a Superset.t -> 'b Addr.Map.t -> 'b Addr.Map.t) +type 'a setexfilt = 'a extractor * 'a setfilter +type ('a, 'b) mapexfilt = ('a,'b) mapextractor * ('a, 'b) mapfilter +let unfiltered _ = ident +let exfiltset : (unit setexfilt) String.Map.t = String.Map.empty +let exfiltmap : ((unit, Addr.t) mapexfilt) String.Map.t = String.Map.empty + +let exfiltset = String.Map.set exfiltset "FixpointGrammar" + ((fun x -> transform (fixpoint_grammar x 0)), unfiltered) +let exfiltset = String.Map.set exfiltset "FixpointTails" + ((fun x -> transform (fixpoint_tails x)), unfiltered) +let exfiltset = String.Map.set exfiltset "FixpointFreevarSSA" + ((fun x -> transform (fixpoint_freevarssa x 0)), unfiltered) +let exfiltmap = String.Map.set + exfiltmap "SSA" (extract_ssa_to_map, unfiltered) +let get_branches superset = + let insn_risg = Superset.get_graph superset in + let branches = Superset_risg.get_branches insn_risg in + transform branches +let branch_map_of_branches superset branches = + let insn_risg = Superset.get_graph superset in + let img = Superset.get_img superset in + let name = Option.value_exn Image.(filename img) in + let true_positives = Metrics.true_positives superset name in + let branches = + Hash_set.fold true_positives ~init:branches ~f:Set.remove in + Set.fold branches ~init:Addr.Map.empty ~f:(fun fpbranchmap fpbranch -> + let target = + List.find_exn Superset_risg.G.(pred insn_risg fpbranch) + ~f:Superset.(is_fall_through superset fpbranch) in + Map.set fpbranchmap fpbranch target + ) +let extract_fp_branches superset = + let branches = get_branches superset in + branch_map_of_branches superset branches +let exfiltmap = String.Map.set + exfiltmap "FalseBranchMap" (extract_fp_branches, unfiltered) +let extract_fp_branches superset = + let branches = get_branches superset in + branch_map_of_branches superset branches +let extract_filter_fp_branches superset = + let superset = Invariants.tag_layer_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + let branches = get_branches superset in + let branches = Set.diff branches (transform violations) in + let superset = Invariants.tag_branch_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + let branches = Set.diff branches (transform violations) in + branch_map_of_branches superset branches +let exfiltmap = String.Map.set + exfiltmap "FilteredFalseBranchMap" (extract_filter_fp_branches, unfiltered) +let exfiltmap = String.Map.set + exfiltmap "FreeVarSSA" (extract_freevarssa_to_map, unfiltered) +let exfiltmap = String.Map.set + exfiltmap "SSA" (extract_ssa_to_map, unfiltered) +let linear_grammar superset = + let insn_risg = Superset.get_graph superset in + let entries = Superset_risg.entries_of_isg insn_risg in + transform Grammar.(linear_branch_sweep superset entries) +let exfiltset = String.Map.set + exfiltset "LinearGrammar" (linear_grammar, unfiltered) +let exfiltset = String.Map.set + exfiltset "UnfilteredGrammar" (get_branches, unfiltered) +let classic_grammar superset = + transform Grammar.(identify_branches superset) +let branch_violations superset = + let superset = Invariants.tag_branch_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + transform violations +let exfiltset = String.Map.set + exfiltset "BranchViolations" (branch_violations, unfiltered) +let layer_violations superset = + let superset = Invariants.tag_layer_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + transform violations +let exfiltset = String.Map.set + exfiltset "LayerViolations" (layer_violations, unfiltered) +let filtered_grammar superset = + let violations = (layer_violations superset) in + let branches = Set.diff (get_branches superset) violations in + Set.diff branches (branch_violations superset) +let exfiltset = String.Map.set + exfiltset "FilteredGrammar" (filtered_grammar, unfiltered) +let loop_grammar superset = + let superset = Invariants.tag_layer_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + let branches = get_branches superset in + let branches = Set.diff branches (transform violations) in + let superset = Invariants.tag_branch_violations superset in + let violations = Markup.collect_bad superset in + let _ = Markup.clear_bad superset in + let branches = Set.diff branches (transform violations) in + let loop_addrs = extract_loop_addrs superset in + let loop_addrs = + Map.fold ~init:Addr.Set.empty loop_addrs ~f:(fun ~key ~data addrs -> + if List.length data >= 2 then + List.fold ~init:addrs data ~f:Set.add + else addrs + ) in + Set.filter branches ~f:(fun x -> Set.(mem loop_addrs x)) +let exfiltset = String.Map.set + exfiltset "LoopGrammar" (loop_grammar, unfiltered) +let exfiltset = String.Map.set + exfiltset "ClassicGrammar" (classic_grammar, unfiltered) +let exfiltset = String.Map.set + exfiltset "Callsites3" ((fun x -> transform (Superset.get_callsites ~threshold:6 x)), unfiltered) +let exfiltset = String.Map.set + exfiltset "Clamped" (find_free_insns, unfiltered) +let exfiltset = String.Map.set + exfiltset "RestrictedClamped" (restricted_clamp, unfiltered) +let exfiltset = String.Map.set + exfiltset "ExtendedClamped" (extended_clamp, unfiltered) +let extract_loops superset = + let loop_addrs = extract_loop_addrs superset in + Map.fold ~init:Addr.Set.empty loop_addrs ~f:(fun ~key ~data addrs -> + if List.length data >= 2 then + List.fold ~init:addrs data ~f:Set.add + else addrs + ) +let exfiltset = String.Map.set + exfiltset "UnfilteredSCC" (extract_loops,unfiltered) +let extract_filter_loops superset = + let loop_addrs = extract_filtered_loop_addrs superset in + Map.fold ~init:Addr.Set.empty loop_addrs ~f:(fun ~key ~data addrs -> + List.fold ~init:addrs data ~f:Set.add + ) +let extract_loops_with_break superset = + let loop_addrs = extract_loop_addrs superset in + let insn_risg = Superset.get_graph superset in + Map.fold ~init:Addr.Set.empty loop_addrs ~f:(fun ~key ~data loops -> + let loop = List.fold ~init:Addr.Set.empty data ~f:Set.add in + let has_break = Seq.exists Seq.(of_list data) + ~f:(fun addr -> + let targets = Superset_risg.G.pred insn_risg addr in + Seq.exists Seq.(of_list targets) + ~f:(fun x -> not Set.(mem loop x)) + ) in + if has_break then Set.union loops loop else loops + ) +let exfiltset = String.Map.set + exfiltset "LoopsWithBreak" (extract_loops_with_break,unfiltered) +let exfiltset = String.Map.set + exfiltset "SCC" (extract_filter_loops,unfiltered) +let extract_mirror_filter_loops superset = + let insn_risg = Superset.get_graph superset in + let insn_risg = Superset_risg.Oper.mirror insn_risg in + let superset = Superset.rebuild ~insn_risg superset in + let loop_addrs = extract_filtered_loop_addrs superset in + Map.fold ~init:Addr.Set.empty loop_addrs ~f:(fun ~key ~data addrs -> + List.fold ~init:addrs data ~f:Set.add + ) +let exfiltset = String.Map.set + exfiltset "MirrorSCC" (extract_mirror_filter_loops,unfiltered) +let extract_constants_to_set superset = + let constants = extract_constants superset in + Map.fold constants ~init:Addr.Set.empty ~f:(fun ~key ~data consts -> + Set.add consts data + ) +let extract_exitless superset = + let returned = Addr.Hash_set.create () in + let insn_risg = Superset.get_graph superset in + let entries = Superset_risg.entries_of_isg insn_risg in + Hash_set.iter entries ~f:(fun entry -> + Superset_risg.iter_component insn_risg + ~pre:(Hash_set.add returned) entry + ); + Superset_risg.G.fold_vertex (fun v exitless -> + if not (Hash_set.mem returned v) + then Set.add exitless v else exitless + ) insn_risg Addr.Set.empty + + +let collect_descendants superset ?insn_isg ?visited ?datas targets = + let visited = Option.value visited ~default:(Addr.Hash_set.create ()) in + let datas = Option.value datas ~default:(Addr.Hash_set.create ()) in + let insn_isg = match insn_isg with + | None -> + let insn_risg = Superset.get_graph superset in + Superset_risg.Oper.mirror insn_risg + | Some insn_isg -> insn_isg in + Hash_set.iter targets ~f:(fun v -> + if not Hash_set.(mem visited v) then + Superset.mark_descendents_at ~insn_isg ~visited ~datas superset v + ) + +let exfiltset = String.Map.set + exfiltset "NoExit" (extract_exitless, unfiltered) +let exfiltset = String.Map.set + exfiltset "Constant" (extract_constants_to_set,unfiltered) +let extract_union_find_compatible superset = + Addr.Set.empty +(* TODO iterate over the superset and split it into the set of items + that can be merged together tenatively. Add the clamped, constants and + unfiltered grammar. For all added, maintain an Int.Map from union + find id to number of features. *) +let exfiltset = String.Map.set + exfiltset "UnionFindCompatible" (extract_union_find_compatible,unfiltered) +let extract_union_find_branches superset = + (*let insn_risg = Superset.get_graph superset in + let insn_map = Superset.get_map superset in + let branches = Superset_risg.get_branches insn_risg in + let components = + Superset_risg.DiscreteComponents.components_list insn_risg in*) + Addr.Set.empty +(*List.fold ~init:Addr.Set.empty components + ~f:(fun (compatible) component -> + List.fold component ~init:(compatible) + ~f:(fun (insns,datas) addr -> + let insns = Union_find.(create addr) :: insns in + let conflicts = + Seq.filter ~f:(Map.mem insn_map) + Superset_risg.(conflict_seq_at insn_map addr) in + let datas = + Seq.fold ~init:datas conflicts ~f:(fun datas conflict -> + (Union_find.create conflict) :: datas) in + insns, datas + ) + )*) +let exfiltset = String.Map.set + exfiltset "UnionFindBranches" (extract_union_find_branches,unfiltered) +let extract_img_entry superset = + let img = Superset.get_img superset in + let s = sprintf "entry: %s" + Addr.(to_string Image.(entry_point img)) in + print_endline s; + Set.add Addr.Set.empty Image.(entry_point img) +let exfiltset = String.Map.set + exfiltset "ImgEntry" (extract_img_entry, unfiltered) +let extract_trim_callsites superset = + let visited = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:2 superset in + let protection = Superset.get_callsites ~threshold:0 superset in + collect_descendants superset ~visited protection; + Markup.clear_bad superset; + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.check_convergence superset visited; + superset +let extract_trim_loops_with_break superset = + (*let loops = extract_loops_with_break superset in*) + superset +let extract_trim_entry superset = + let imgentry = extract_img_entry superset in + Set.iter imgentry ~f:Superset.(mark_descendents_at superset); + superset +let extract_trim_branch_violations superset = + Invariants.tag_branch_violations superset +let extract_trim_layer_violations superset = + Invariants.tag_layer_violations superset +let extract_trim_noexit superset = + let exitless = extract_exitless superset in + Set.iter exitless ~f:Superset.(mark_bad superset); + superset + +let extract_trim_fixpoint_grammar superset = + let gdesc = fixpoint_grammar superset 10 in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:0 superset in + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.clear_bad superset; + collect_descendants ~visited ~insn_isg superset gdesc; + Hash_set.iter datas ~f:(fun d -> + if Hash_set.(mem visited d) || Hash_set.(mem gdesc d) then + Superset.clear_bad superset d + ); + Markup.check_convergence superset visited; + Markup.check_convergence superset gdesc; + superset + +let extract_trim_fixpoint_ssa superset = + let gdesc = fixpoint_ssa superset 6 in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:0 superset in + (*collect_descendants ~visited ~insn_isg superset callsites;*) + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.clear_bad superset; + collect_descendants ~visited ~insn_isg superset gdesc; + Hash_set.iter datas ~f:(fun d -> + if Hash_set.(mem visited d) || Hash_set.(mem gdesc d) then + Superset.clear_bad superset d + ); + Markup.check_convergence superset visited; + superset + +let extract_trim_fixpoint_freevarssa superset = + let gdesc = fixpoint_freevarssa superset 6 in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:0 superset in + (*collect_descendants ~visited ~insn_isg superset callsites;*) + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.clear_bad superset; + collect_descendants ~visited ~insn_isg superset gdesc; + Hash_set.iter datas ~f:(fun d -> + if Hash_set.(mem visited d) || Hash_set.(mem gdesc d) then + Superset.clear_bad superset d + ); + Markup.check_convergence superset visited; + superset + +let extract_trim_fixpoint_tails superset = + let tdesc = fixpoint_tails superset in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let callsites = Superset.get_callsites ~threshold:0 superset in + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.clear_bad superset; + Hash_set.iter tdesc ~f:(fun v -> + if not Hash_set.(mem visited v) then + Superset.mark_descendents_at ~insn_isg ~visited ~datas superset v + ); + Hash_set.iter datas ~f:(fun d -> + if Hash_set.(mem visited d) || Hash_set.(mem tdesc d) then + Superset.clear_bad superset d + ); + Markup.check_convergence superset visited; + superset + +type fkind = + | AppSuperset + (*| AppFactors + | AppFactorGraph*) + +let discard_edges superset = + let g = Superset.get_graph superset in + let insn_risg = Superset.get_graph superset in + let insn_map = Superset.get_map superset in + Superset_risg.G.iter_edges + (fun child parent -> + if not Superset.(is_fall_through superset parent child) then + match Map.find insn_map parent with + | None -> () + | Some (mem, insn) -> + match insn with + | Some(insn) -> + let insn = Insn.of_basic insn in + if Insn.(is Insn.call insn) then + Superset_risg.G.remove_edge insn_risg child parent + | None -> () + ) g; + (*let edges = Superset.get_non_fall_through_edges superset in*) + superset + +let featuremap : ((unit Superset.t -> unit Superset.t) * fkind) String.Map.t = String.Map.empty +let featuremap = String.Map.set + featuremap "Callsites3" (extract_trim_callsites,AppSuperset) +let featuremap = String.Map.set + featuremap "DiscardEdges" (discard_edges,AppSuperset) +let featuremap = String.Map.set + featuremap "LoopsWithBreak" (extract_trim_loops_with_break,AppSuperset) +let featuremap = String.Map.set + featuremap "ImgEntry" (extract_trim_entry,AppSuperset) +let featuremap = String.Map.set + featuremap "BranchViolations" (extract_trim_branch_violations,AppSuperset) +let featuremap = String.Map.set + featuremap "LayerViolations" (extract_trim_layer_violations,AppSuperset) +(*let featuremap = String.Map.set + featuremap "SCC" (extract_tag_loops,AppFactors)*) +let featuremap = String.Map.set + featuremap "NoExit" (extract_trim_noexit,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimClamped" (extract_trim_clamped,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimLimitedClamped" + (extract_trim_limited_clamped,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimFixpointGrammar" + (extract_trim_fixpoint_grammar,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimFixpointSSA" + (extract_trim_fixpoint_ssa,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimFixpointFreevarSSA" + (extract_trim_fixpoint_freevarssa,AppSuperset) +let featuremap = String.Map.set + featuremap "TrimFixpointTails" + (extract_trim_fixpoint_tails,AppSuperset) + +let apply_featureset featureset superset = + let superset = List.fold ~init:(superset) featureset ~f:(fun (superset) feature -> + match Map.(find featuremap feature) with + | None -> superset + | Some (f, AppSuperset) -> + print_endline feature; + let superset = f superset in + let superset = Trim.Default.trim superset in + superset + ) in + superset + +let fdists = String.Map.empty +let fdists = String.Map.set fdists "FixpointGrammar" 5 +let fdists = String.Map.set fdists "FixpointFreevarSSA" 3 + +let make_featurepmap featureset superset = + List.fold ~f:(fun (feature_pmap) feature -> + let p = Map.find fdists feature in + let p = Option.value p ~default:2 in + match Map.(find exfiltset feature) with + | None -> feature_pmap + | Some (extract,filter) -> + print_endline feature; + let fset = extract superset in + Set.fold fset ~init:feature_pmap + ~f:(fun feature_pmap x -> + Map.update feature_pmap x ~f:(function + | Some l -> (p, x, feature) :: l + | None -> [(p, x, feature)] + ) + ) + ) ~init:Addr.Map.empty featureset + +let total_of_features l = + List.fold ~init:0 ~f:(fun x (y,_,_) -> x + y) l + +let apply_featurepmap featureset ?(threshold=50) superset = + let feature_pmap = make_featurepmap featureset superset in + let feature_pmap = fixpoint_map superset feature_pmap in + let feature_pmap = + Map.map feature_pmap ~f:(total_of_features) in + let feature_pmap = + Map.filter feature_pmap (fun total -> total > threshold) in + let visited = Addr.Hash_set.create () in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let callsites = Superset.get_callsites ~threshold:0 superset in + let superset = Grammar.tag_callsites visited ~callsites superset in + Markup.clear_bad superset; + List.iter Map.(keys feature_pmap) ~f:(fun addr -> + Superset.mark_descendents_at superset ~insn_isg ~visited addr + ); + Markup.check_convergence superset visited; + superset +(*Trim.trim superset*) + +(*let collect_metrics superset tps pmap = + let avg hs = + let avg = + Hash_set.fold hs ~init:(0.0) ~f:(fun (avg) tp -> + let tp = Option.value Map.(find pmap tp) ~default:0.0 in + avg +. tp + ) in + avg /. (float_of_int Hash_set.(length hs)) in + let avg_tp = avg tps in + let min_tp, max_tp = + Map.fold pmap ~init:(None, None) + ~f:(fun ~key ~data (min_tp, max_tp) -> + let p = data in + if Hash_set.mem tps key then + let min_tp = min p Option.(value min_tp ~default:p) in + let max_tp = max p Option.(value max_tp ~default:p) in + (Some min_tp, Some max_tp) + else (min_tp, max_tp) + ) in + let min_tp = Option.value min_tp ~default:0.0 in + let max_tp = Option.value max_tp ~default:0.0 in + let true_positives = Hash_set.(length tps) in + let ro = Metrics.reduced_occlusion superset tps in + let fps = Metrics.false_positives superset ro in + let removed = Markup.collect_bad superset in + let fps = Hash_set.filter fps ~f:(fun x -> not Hash_set.(mem removed x)) in + let tps = Hash_set.filter tps ~f:(fun x -> not Hash_set.(mem removed x)) in + let avg_fp = avg fps in + let min_fp, max_fp = + Map.fold pmap ~init:(None, None) + ~f:(fun ~key ~data (min_fp, max_fp) -> + let p = data in + if Hash_set.mem fps key then + let min_fp = min p Option.(value min_fp ~default:p) in + let max_fp = max p Option.(value max_fp ~default:p) in + (Some min_fp, Some max_fp) + else (min_fp, max_fp) + ) in + let min_fp = Option.value min_fp ~default:0.0 in + let max_fp = Option.value max_fp ~default:0.0 in + let false_positives = Hash_set.(length fps) in + let tp_std_dev = stddev_of tps avg_tp pmap in + let fp_std_dev = stddev_of fps avg_fp pmap in + let insn_risg = Superset.get_graph superset in + let detected_insns = + Superset_risg.G.fold_vertex + (fun vert detected_insns -> Set.add detected_insns vert) + insn_risg Addr.Set.empty in + let tps = Hash_set.fold ~init:Addr.Set.empty tps ~f:(Set.add) in + let false_negatives = Set.length Set.(diff tps detected_insns) in + () +*) diff --git a/lib/bap_disasm/grammar.ml b/lib/bap_disasm/grammar.ml new file mode 100644 index 000000000..3975cd433 --- /dev/null +++ b/lib/bap_disasm/grammar.ml @@ -0,0 +1,166 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std + +let identify_branches superset = + let deferred = ref Addr.Map.empty in + let insn_risg = Superset.get_graph superset in + let entries = Superset_risg.entries_of_isg insn_risg in + (* need to create a sequence of non-fall through edges *) + let insns = Addr.Hash_set.create () in + let branches = Addr.Hash_set.create () in + let tag_branches addr = + if Superset_risg.G.in_degree insn_risg addr = 2 then + let inbound = Superset_risg.G.pred insn_risg addr in + List.iter inbound ~f:(fun child -> + (* check for edges between instructions that are not + fall through, but for which *) + if Hash_set.mem insns child then + let ft = Superset.fall_through_of superset addr in + if not Addr.(ft = child) && + not Addr.(addr = child) then + deferred := Map.set !deferred ft (child, addr) + ); + in + let confirm_branches addr = + match Map.find !deferred addr with + | Some (child, branch) -> + if Hash_set.mem insns child then + Hash_set.add branches branch + | None -> () + in + let pre addr = + Hash_set.add insns addr; + tag_branches addr + in + let post addr = + Hash_set.remove insns addr in + Traverse.visit + ~pre ~post superset entries; + let pre addr = + Hash_set.add insns addr; + confirm_branches addr + in + Traverse.visit + ~pre ~post superset entries; + branches + +let increment_map_at m ?(x=1) addr = + m := Map.update !m addr + ~f:(fun hits -> Option.value_map hits ~default:1 + ~f:(fun hits -> hits +x)); + Option.value ~default:x Map.(find !m addr) + +let linear_branch_sweep superset entries = + let jmp_hit_cnt = ref Addr.Map.empty in + let update_hit_count = increment_map_at jmp_hit_cnt in + let pre jmps targets addr = + if Set.mem targets addr then ( + ignore (update_hit_count addr); + ); + match Map.find jmps addr with + | Some(branch) -> + ignore (update_hit_count branch); + | None -> (); + in + let post _ _ _ = () in + Traverse.visit_by_block superset ~pre ~post entries; + let final_jmps = Addr.Hash_set.create () in + Map.iteri !jmp_hit_cnt ~f:(fun ~key ~data -> + let jmp_addr = key in + let cnt = data in + if cnt = 2 then + Hash_set.add final_jmps jmp_addr; + ); + final_jmps + +let tag_callsites visited ?callsites superset = + let insn_risg = Superset.get_graph superset in + let callsites = Option.value callsites + ~default:(Superset.get_callsites ~threshold:6 superset) in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + Hash_set.iter callsites ~f:(fun callsite -> + Superset_risg.iter_component ~visited insn_isg callsite; + ); + superset + +(* The objective here is to tag grammar structures while traversing *) +(* topologically in such a manner that we can converge the *) +(* probability of recognizing an intended sequence by the *) +(* compiler. After we've hit some recognition threshold, we begin *) +(* traversing forward from some activation point whereby we trim *) +(* occlusive instructions. To recognize grammars, we have several *) +(* means: one, loops are strongly connected components, and if *) +(* sequences must branch at some point only to reify at a common *) +(* point, expressing a path by which they can finally rejoin. *) +let tag_by_traversal ?(threshold=8) superset = + let insn_risg = Superset.get_graph superset in + let visited = Addr.Hash_set.create () in + (* TODO should be either in it's own module and designated function *) + let callsites = Superset.get_callsites ~threshold:6 superset in + let superset = tag_callsites visited ~callsites superset in + let superset = Invariants.tag_layer_violations superset in + let superset = Invariants.tag_branch_violations superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let entries = Superset_risg.entries_of_isg insn_risg in + let branches = Superset_risg.get_branches insn_risg in + (*let branches = identify_branches superset in*) + (*let branches = linear_branch_sweep superset entries in*) + (* TODO should delete this printf *) + printf "detected %d if structures, %d callsites\n" + (Hash_set.length branches) Hash_set.(length callsites); + let cur_total = ref 0 in + let positives = ref [] in + let entry = ref None in + let tps = Addr.Hash_set.create () in + (* In the case that our current starting point, entry, is none, set *) + (* it to being the address of the lambda parameter, addr. Then, set *) + (* the current total number of recognized grammar items to zero, *) + (* as well as the positives since we're starting over *) + let pre addr = + if Option.is_none !entry then ( + entry := Some(addr); + cur_total := 0; + positives := []; + ); + if Hash_set.mem branches addr || Hash_set.mem callsites addr then ( + cur_total := !cur_total + 1; + positives := addr :: !positives; + if !cur_total >= threshold then ( + let open Option in + (* TODO could add the remaining part of the positives from threshold *) + ignore (List.nth !positives threshold >>| + (fun convergent_point -> + Hash_set.add tps convergent_point)); + ) + ) in + (* TODO, post does not take into account that increments may occur *) + (* across layers and keep those isolated *) + let post addr = + entry := Option.value_map !entry ~default:!entry + ~f:(fun e -> if e = addr then None else Some(e)); + if Hash_set.mem branches addr || Hash_set.mem callsites addr then ( + cur_total := !cur_total - 1; + match !positives with + | _ :: remaining -> positives := remaining + | [] -> (); + ) in + Traverse.visit + ~pre ~post superset entries; + printf "marked %d convergences\n" (Hash_set.length tps); + let visited = Addr.Hash_set.create () in + Hash_set.iter tps ~f:(fun tp -> + if not (Hash_set.mem visited tp) then ( + Superset_risg.Dfs.prefix_component (fun tp -> + let mark_bad addr = + if Superset_risg.G.mem_vertex insn_risg addr then ( + Superset.mark_bad superset addr + ) in + Superset.with_data_of_insn superset tp ~f:mark_bad; + Hash_set.add visited tp; + ) insn_isg tp; + ) + ); + Hash_set.iter visited + ~f:(fun tp -> Superset.clear_bad superset tp); + superset diff --git a/lib/bap_disasm/insn_disasm_benchmark.ml b/lib/bap_disasm/insn_disasm_benchmark.ml new file mode 100644 index 000000000..e36570956 --- /dev/null +++ b/lib/bap_disasm/insn_disasm_benchmark.ml @@ -0,0 +1,46 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std +open Or_error + +module Linear = Bap_disasm_linear_sweep +exception Inconsistent_img of string + +let read arch ic : (string * addr * addr) list = + let sym_of_sexp x = [%of_sexp:string * int64 * int64] x in + let addr_of_int64 x = + let width = Arch.addr_size arch |> Size.in_bits in + Addr.of_int64 ~width x in + List.(Sexp.input_sexps ic >>| sym_of_sexp >>| (fun (s, es, ef) -> + s, addr_of_int64 es, addr_of_int64 ef)) + +let read_addrs ic : addr list = + List.t_of_sexp Addr.t_of_sexp @@ Sexp.input_sexp ic + +let ground_truth_of_unstripped_bin bin : addr seq Or_error.t = + let tmp = Filename.temp_file "bw_" ".symbol" in + let cmd = sprintf "bap-byteweight dump -i symbols %S > %S" + bin tmp in + if Sys.command cmd = 0 + then return (Seq.of_list @@ In_channel.with_file tmp ~f:read_addrs) + else errorf + "failed to fetch symbols from unstripped binary, command `%s' + failed" cmd + +let linear_of_ground_truth bin = + let entrances = ground_truth_of_unstripped_bin bin |> ok_exn in + let img = Common.img_of_filename bin in + let arch = Image.arch img in + let segments = Image.segments img in + Seq.map entrances ~f:(fun entrance -> + let segment_for_entrance = + match Table.find_addr segments entrance with + | Some (mem, seg) -> mem + | None -> + raise (Inconsistent_img + "Image purports addr spans to not obey them") in + let mem_for_entrance = + Memory.view ~from:entrance segment_for_entrance |> ok_exn in + Linear.sweep arch mem_for_entrance |> ok_exn + ) + diff --git a/lib/bap_disasm/invariants.ml b/lib/bap_disasm/invariants.ml new file mode 100644 index 000000000..42e6c64d1 --- /dev/null +++ b/lib/bap_disasm/invariants.ml @@ -0,0 +1,118 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std + +module type InvariantApplicator = sig + val apply : 'a Superset.t -> 'a Superset.t +end + + +let enforce_exclusivity insn_delta data_delta = + let insns_in_data = + Hash_set.fold ~init:[] data_delta ~f:(fun violators data -> + if Hash_set.mem insn_delta data then + data :: violators + else violators + ) in + let datas_in_insn = + Hash_set.fold ~init:[] insn_delta ~f:(fun violators insn -> + if Hash_set.mem data_delta insn then + insn :: violators + else violators + ) in + (insns_in_data, datas_in_insn) + +let mark_nonexclusive superset insn_delta data_delta ~mark = + let (data_violators, insn_violators) = + enforce_exclusivity insn_delta data_delta in + List.iter data_violators ~f:mark; + List.iter insn_violators ~f:mark + +(* Unfortunately, I can't build this with functional programming in *) +(* mind, because the ocamlgraph function (fold) required to do so is *) +(* missing from the DFS module. *) +let tag_layer_violations superset = + let insn_risg = Superset.get_graph superset in + let insn_map = Superset.get_map superset in + let add_data_of_insn dataset at = + Superset.with_data_of_insn superset at ~f:(Hash_set.add dataset) + in + let remove_data_of_insn dataset at = + Superset.with_data_of_insn superset at ~f:(Hash_set.remove dataset) + in + let conflicts = Superset_risg.find_all_conflicts insn_map in + let entries = Superset_risg.entries_of_isg insn_risg in + let tails = Decision_tree_set.tails_of_conflicts + conflicts insn_risg in + let options = Map.fold tails ~init:Addr.Set.empty ~f: + (fun ~key ~data options -> + List.fold ~init:options data ~f:Set.add) in + let is_option addr = + Set.mem options addr in + let insns = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let pre deltas addr = + add_data_of_insn datas addr; + Hash_set.add insns addr in + let tag_violators deltas addr = + match Map.find deltas addr with + | Some (insn_delta, data_delta) -> + Hash_set.iter insn_delta ~f:(fun insn -> + let inbound = Superset_risg.G.pred insn_risg insn in + (* TODO what if we encounter a predecessor we haven't *) + (* visited before? *) + List.iter inbound ~f:(fun src -> + if Hash_set.mem data_delta src then ( + Superset.mark_bad superset insn; + ) (*else if Hash_set.mem datas src then ( + Superset.mark_bad superset insn; + )*) + ); + ); + (*mark_nonexclusive superset insn_delta data_delta + ~mark:(Superset.mark_bad superset)*) + | None -> (); + in + let post deltas addr = + tag_violators deltas addr; + Hash_set.remove insns addr; + remove_data_of_insn datas addr in + Traverse.visit_with_deltas + ~is_option ~pre ~post superset entries; + superset + +let tag_branch_violations superset = + let insn_risg = Superset.get_graph superset in + let add_data_of_insn dataset at = + Superset.with_data_of_insn superset at ~f:(Hash_set.add dataset) + in + (* TODO removing should move to an alternate set to track discrete lineages *) + let remove_data_of_insn dataset at = + Superset.with_data_of_insn superset at ~f:(Hash_set.remove dataset) + in + let insns = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + let pre addr = + add_data_of_insn datas addr; + Hash_set.add insns addr; + if Hash_set.mem datas addr then ( + Superset.mark_bad superset addr; + ); + let inbound = Superset_risg.G.pred insn_risg addr in + List.iter inbound ~f:(fun target -> + let ft = Superset.is_fall_through + superset addr target in + if not ft then ( + if Hash_set.mem datas target then + Superset.mark_bad superset addr; + ) + ) + in + let post addr = + (* TODO removing should move to a different set, for tracking + alternate lineages *) + Hash_set.remove insns addr; + remove_data_of_insn datas addr in + let entries = Superset_risg.entries_of_isg insn_risg in + Traverse.visit ~pre ~post superset entries; + superset diff --git a/lib/bap_disasm/markup.ml b/lib/bap_disasm/markup.ml new file mode 100644 index 000000000..baeb56540 --- /dev/null +++ b/lib/bap_disasm/markup.ml @@ -0,0 +1,58 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std + +let mark_threshold_with_pmap ?visited ?datas superset pmap threshold = + let visited = Option.value visited + ~default:(Addr.Hash_set.create ()) in + let datas = Option.value datas + ~default:(Addr.Hash_set.create ()) in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + Map.iteri pmap ~f:(fun ~key ~data -> + let addr = key in + let p = data in + if p > threshold then ( + if Superset_risg.G.mem_vertex insn_risg addr then + Superset.mark_descendents_at + ~datas ~visited ~insn_isg superset addr; + ) + ) + +let mark_tps superset visited = + let insn_risg = Superset.get_graph superset in + (*if Superset_risg.G.mem_vertex insn_risg bad then*) + Hash_set.iter visited + ~f:(fun tp -> + if Superset_risg.G.mem_vertex insn_risg tp then + Superset.clear_bad superset tp) + +let collect_bad superset = + let visited = Addr.Hash_set.create () in + let _ = Superset.with_bad superset ~visited + ~pre:(fun _ _ -> ()) ~post:(fun _ _ -> ()) () in + visited + +let clear_bad superset = Superset.clear_all_bad superset + +(* TODO need to do more to make sure that those are actually + retained, since it may be the case that a descendant identifies as + bad as well. + 1) may be able to tack this onto trim, so that we disable it at + retain points. would make sense if the superset type was extensible. +*) +let enforce_uncertain superset visited datas pmap = + Map.iteri !pmap ~f:(fun ~key ~data -> + let addr = key in + let prob = data in + let mark_good addr = + Superset.clear_bad superset addr in + if prob >= 0.85 then + mark_good addr + ) + +let check_convergence superset visited = + Hash_set.iter visited ~f:(fun tp -> + Superset.clear_bad superset tp + ) + diff --git a/lib/bap_disasm/metrics.ml b/lib/bap_disasm/metrics.ml new file mode 100644 index 000000000..38625ae52 --- /dev/null +++ b/lib/bap_disasm/metrics.ml @@ -0,0 +1,288 @@ +open Bap_types.Std +open Bap_image_std +open Core_kernel.Std + + +type format_as = | Latex + | Standard +[@@deriving sexp] + +type metrics = { + name : string; + detected_insn_count : int; + false_negatives : int; + false_positives : int; + detected_entries : int; + actual_entries : int; + trimmed : int list; +} + +module InvariantTrackingApplicator = struct +end + +module type MetricsGathererInstance = sig + type acc = (Addr.Hash_set.t * Superset_risg.t ) + val accu : acc +end + +module MetricsGatheringReducer(M : MetricsGathererInstance) : Trim.Reducer = struct + type acc = M.acc + let accu = M.accu + let check_pre _ accu _ = accu + let check_post _ accu _ = accu + let check_elim _ _ _ = true + let mark superset (s,g) addr = + Hash_set.add s addr; + let src_graph = Superset.get_graph superset in + List.iter Superset_risg.G.(succ src_graph addr) ~f:(fun succ -> + Superset_risg.G.add_edge g addr succ + ); + List.iter Superset_risg.G.(pred src_graph addr) ~f:(fun pred -> + Superset_risg.G.add_edge g pred addr + ) +end + +module type PerMarkTracker = sig + type acc = (Addr.Set.t Addr.Map.t) ref + val accu : acc +end + +module PerMarkTrackingReducer(M : PerMarkTracker) : Trim.Reducer = + struct + type acc = M.acc + let accu = M.accu + let cur_root = ref None + let check_pre superset accu addr = + (match !cur_root with + | Some x -> + accu := + Map.update !accu x ~f:(fun s -> + match s with + | None -> Addr.Set.empty + | Some s -> Set.add s addr + ); + | None -> cur_root:=Some(addr)); + accu + let check_post superset (accu : acc) addr = + (match !cur_root with + | None -> () + | Some x -> if x = addr then cur_root := None); + accu + let check_elim _ _ _ = true + let mark superset accu addr = + let cur_root = Option.value_exn !cur_root in + accu := + Map.update !accu cur_root ~f:(fun s -> + match s with + | None -> Addr.Set.empty + | Some s -> Set.add s addr + ); + end + +let print_dot superset colorings = + (*if not (colorings = String.Map.empty) then*) + let img = Superset.get_img superset in + let fout = Out_channel.create @@ + Option.value_exn Image.(filename img) ^ ".dot" in + let superset_risg = Superset.get_graph superset in + let superset_isg = Superset_risg.Oper.mirror superset_risg in + let insn_map = Superset.get_map superset in + let module Layout = + Cfg_dot_layout.Make(struct + let instance = (superset_isg, colorings, insn_map) + end) in + Layout.Dot.output_graph fout (superset_isg, colorings, insn_map) + + +let format_standard metrics = + match metrics with + | Some metrics -> + sprintf "%s%d\n%s%d\n%s%d\n%s%d\n%s%d" + "Total instructions recovered: " metrics.detected_insn_count + "False negatives: " metrics.false_negatives + "False positives: " metrics.false_positives + "Detected function entrances: " metrics.detected_entries + "Actual function entrances: " metrics.actual_entries + | None -> "No metrics gathered!" + +let format_latex metrics = + match metrics with + | Some metrics -> + (match metrics.trimmed with + | (phase1 :: phase2 :: _) -> + sprintf "%s & %d & %d & %d & %d \\\\\n" + metrics.name + metrics.false_negatives + phase1 + phase2 + metrics.detected_insn_count; + | _ -> "Missing trim phases") + | None -> "No metrics gathered!" + +let true_positives_of_ground_truth ?insn_isg superset ground_truth = + let insn_isg = + match insn_isg with + | Some insn_isg -> insn_isg + | None -> + let insn_risg = Superset.get_graph superset in + Superset_risg.Oper.mirror insn_risg in + let true_positives = Addr.Hash_set.create () in + Set.iter ground_truth ~f:(fun addr -> + if Superset_risg.G.mem_vertex insn_isg addr then + Superset_risg.Dfs.prefix_component + (Hash_set.add true_positives) insn_isg addr; + ); + true_positives + +(* implement jmp_of_fp as a map from target to source in *) +(* True positive set is going to come up short because if it isn't in *) +(* the isg, it isn't going to be explored *) +let true_positives ?insn_isg superset f = + let function_starts = + Insn_disasm_benchmark.ground_truth_of_unstripped_bin f |> ok_exn + in + let ground_truth = + Addr.Set.of_list @@ Seq.to_list function_starts in + true_positives_of_ground_truth ?insn_isg superset ground_truth + +let reduced_occlusion superset tp = + let fps = Addr.Hash_set.create () in + Hash_set.iter tp ~f:(fun addr -> + let len = Superset.len_at superset addr in + Seq.iter (Superset_risg.seq_of_addr_range addr len) + ~f:(fun x -> Hash_set.add fps x); + Hash_set.remove fps addr; + ); + fps + +let false_positives superset ro = + let insn_risg = Superset.get_graph superset in + let fps = Addr.Hash_set.create () in + Hash_set.iter ro ~f:(fun v -> + if Superset_risg.G.mem_vertex insn_risg v then + Hash_set.add fps v + ); + fps + +let fn_insn_cnt superset tps = + let insn_risg = Superset.get_graph superset in + Hash_set.fold ~init:0 tps ~f:(fun count v -> + if Superset_risg.G.mem_vertex insn_risg v then count + else count+1) + +let check_tp_set true_positives s = + let n = Hash_set.length s in + let tp_of_s = + Hash_set.fold ~init:0 true_positives + ~f:(fun tp_of_s x -> + if Hash_set.mem s x + then tp_of_s + 1 else tp_of_s) in + let fp_of_s = n - tp_of_s in + fp_of_s, tp_of_s + +let check_fn_entries superset ground_truth = + let insn_risg = Superset.get_graph superset in + let detected_insns = + Superset_risg.G.fold_vertex + (fun vert detected_insns -> Set.add detected_insns vert) + insn_risg Addr.Set.empty in + Set.diff ground_truth detected_insns + +(* adjust this to collect metrics into the metrics field, and then *) +(* split the printing out into a separate function *) +let gather_metrics ~bin superset = + let insn_map = Superset.get_map superset in + let insn_risg = Superset.get_graph superset in + let function_starts = + Insn_disasm_benchmark.ground_truth_of_unstripped_bin bin |> ok_exn in + let ground_truth = + Addr.Set.of_list @@ Seq.to_list function_starts in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + let ground_truth = + Set.(filter ground_truth ~f:(fun e -> + let img = Superset.get_img superset in + Superset.with_img ~accu:false img + ~f:(fun ~accu mem -> + accu || Memory.(contains mem e)) + )) in + let true_positives = true_positives_of_ground_truth ~insn_isg superset ground_truth in + let datas = Addr.Hash_set.create () in + let detected_insns = Addr.Hash_set.create () in + let dfs_find_conflicts addr = + Superset.with_descendents_at + ~insn_isg ~visited:detected_insns superset addr + ~f:(fun v -> Superset.with_data_of_insn superset v + ~f:(fun x -> Hash_set.add datas x)) in + let reduced_occlusion () = Hash_set.fold ~init:0 datas + ~f:(fun ro d -> + if Superset_risg.G.(mem_vertex insn_risg d) + then ro+1 else ro) in + let num_bytes = + Superset.with_img ~accu:0 Superset.(get_img superset) + ~f:(fun ~accu mem -> accu + Memory.(length mem)) in + let entries = Superset_risg.entries_of_isg insn_risg in + let branches = Grammar.linear_branch_sweep superset entries in + let fp_branches, tp_branches = check_tp_set true_positives branches in + printf "Num f.p. branches: %d, num tp branches: %d\n" fp_branches tp_branches; + printf "superset_isg_of_mem length %d\n" num_bytes; + let total_clean,_ = + Set.fold ground_truth ~init:(0,0) ~f:(fun (n,prev) x -> + dfs_find_conflicts x; + if Hash_set.length datas > prev then + ((n),Hash_set.(length datas)) + else ((n+1),prev) + ) in + printf "Number of functions precisely trimmed: %d of %d\n" + total_clean Set.(length ground_truth); + printf "Number of possible reduced false positives: %d\n" + Hash_set.(length datas); + printf "Reduced occlusion: %d\n" (reduced_occlusion ()); + printf "True positives: %d\n" Hash_set.(length true_positives); + let fn_entries = check_fn_entries superset ground_truth in + if not (Set.length fn_entries = 0) then + printf "Missed function entrances %s\n" + (List.to_string ~f:Addr.to_string @@ Set.to_list fn_entries); + printf "Occlusion: %d\n" + (Set.length @@ Superset_risg.find_all_conflicts insn_map); + printf "Instruction fns: %d\n" (fn_insn_cnt superset true_positives); + printf "superset_map length %d graph size: %d num edges %d\n" + Map.(length insn_map) + (Superset_risg.G.nb_vertex insn_risg) + (Superset_risg.G.nb_edges insn_risg); + let false_negatives = Set.(length fn_entries) in + let detected_entries = Set.(length ground_truth) - false_negatives in + let false_positives = Hash_set.fold detected_insns ~init:0 + ~f:(fun c v -> if not Set.(mem ground_truth v) then c+1 else c) in + let detected_insn_count = Superset_risg.G.nb_vertex insn_risg in + Some ({ + name = bin; + detected_insn_count = detected_insn_count; + false_positives = false_positives; + false_negatives = false_negatives; + detected_entries = detected_entries; + actual_entries = (Set.length ground_truth); + trimmed = []; + }) + +(*module Opts = struct + open Cmdliner + + let list_content_doc = sprintf + "Metrics may be collected against a symbol file" + let content_type = + Arg.(value & + opt (some string) (None) + & info ["metrics_data"] ~doc:list_content_doc) + + let list_formats_types = [ + "standard", Standard; + "latex", Latex; + ] + let list_formats_doc = sprintf + "Available output metrics formats: %s" @@ + Arg.doc_alts_enum list_formats_types + let metrics_format = + Arg.(value & opt (enum list_formats_types) Standard + & info ["metrics_format"] ~doc:list_formats_doc) + +end*) diff --git a/lib/bap_disasm/report.ml b/lib/bap_disasm/report.ml new file mode 100644 index 000000000..cd0f1cc03 --- /dev/null +++ b/lib/bap_disasm/report.ml @@ -0,0 +1,218 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std + +let () = Random.self_init () + +type map_report = { + tp_to_tp : int; + tp_to_fp : int; + fp_to_tp : int; + fp_to_fp : int; +} [@@deriving sexp] + +type report = { + actual_tp_cases : int; + outbound_cases : int; + outbound_inflicted_fn : int; + raw_cases : int; + total_cases_identified : int; + overlap_space : float; + overlap_max : float; + overlap_min : float; + analysis_fp : int; + analysis_cleansed : int; + analysis_fn : int; + tp_cases_reported : int; + fp_cases_reported : int; + extp : addr option; + exfp : addr option; + map_details : map_report option; +} [@@deriving sexp] + +let format_map_details md = + match md with + | None -> "None" + | Some(md) -> sprintf "tptp %d tpfp %d fptp %d fpfp %d" + md.tp_to_tp md.tp_to_fp md.fp_to_tp md.fp_to_fp + +let format_report r = + sprintf "\tReport: \n%s %d \n%s%d\n%s%d\n%s%f\n%s%d\n%s%d\n%s%d\n%s %d\n%s %s\n%s%d\n%s%d\n%s%d\n%s%s\n%s%s\n%s\n\n" + "actual tp cases: " r.actual_tp_cases + "raw_cases: " r.raw_cases + "total identified: " r.total_cases_identified + "overlap: " r.overlap_space + "tp reported: " r.tp_cases_reported + "fp reported: " r.fp_cases_reported + "outbound cases: " r.outbound_cases + "outbound inflicted fn: " r.outbound_inflicted_fn + "map results: " (format_map_details r.map_details) + "analysis removed: " r.analysis_cleansed + "analysis fn: " r.analysis_fn + "analysis fp: " r.analysis_fp + "extp: " Option.(value_map r.extp ~f:Addr.to_string ~default:"None") + "exfp: " Option.(value_map r.exfp ~f:Addr.to_string ~default:"None") + "=================" + +let collect_set_report + (superset : 's) (extractf : 's -> 'e) (filterf : 's -> 'e -> 'e) tps ro fps pmap = + let extracted = extractf superset in + let actual_tp_cases = + Addr.Set.fold ~init:0 extracted ~f:(fun actual e -> + if Hash_set.mem tps e then actual+1 else actual + ) in + let raw_cases = Addr.Set.length extracted in + let filtered = filterf superset extracted in + let tp_cases_reported, fp_cases_reported = + Addr.Set.fold ~init:(0,0) filtered ~f:(fun (tpr, fpr) e -> + let t = if Hash_set.mem tps e then 1 else 0 in + let f = if Hash_set.mem fps e then 1 else 0 in + (tpr + t, fpr + f) + ) in + let outbound_cases = + Addr.Set.fold ~init:0 filtered ~f:(fun outbound e -> + if not Hash_set.(mem tps e) && not Hash_set.(mem fps e) + then outbound + 1 else outbound + ) in + let visited = Addr.Hash_set.create () in + let outbound = Addr.Hash_set.create () in + let insn_risg = Superset.get_graph superset in + let insn_isg = Superset_risg.Oper.mirror insn_risg in + Addr.Set.iter filtered + ~f:(fun e -> + if not Hash_set.(mem tps e) && not Hash_set.(mem fps e) then ( + if not (Hash_set.mem visited e) && + Superset_risg.G.mem_vertex insn_isg e then + Superset_risg.Dfs.prefix_component (fun tp -> + let mark_bad addr = + if Superset_risg.G.mem_vertex insn_isg addr then + Superset.mark_bad superset addr in + Superset.with_data_of_insn superset tp ~f:mark_bad; + Superset.with_data_of_insn superset tp ~f:(Hash_set.add outbound); + Hash_set.add visited tp; + ) insn_isg e + ) + ); + let outbound_inflicted_fn = + Hash_set.fold ~init:0 outbound ~f:(fun inflicted o -> + if Hash_set.mem tps o then inflicted+1 else inflicted + ) in + let (tp_max, tp_min, fp_max, fp_min) = Map.fold pmap ~init:(0.0, 0.0, 0.0, 0.0) + ~f:(fun ~key ~data (tp_max, tp_min, fp_max, fp_min) -> + let tp_max, tp_min = + if Hash_set.mem tps key then + max tp_max data, min tp_min data + else tp_max, tp_min in + let fp_max, fp_min = + if Hash_set.mem fps key then + max fp_max data, min fp_min data + else fp_max, fp_min in + tp_max, tp_min, fp_max, fp_min + ) in + let overlap_max = + if tp_min > fp_max then tp_min + else if fp_min > tp_max then fp_min + else max tp_min fp_min in + let overlap_min = + if tp_min < fp_max then tp_min + else if fp_min < tp_max then fp_min + else min tp_min fp_min in + let overlap_space = overlap_max -. overlap_min in + let visited = Addr.Hash_set.create () in + let datas = Addr.Hash_set.create () in + Markup.mark_threshold_with_pmap ~visited ~datas superset pmap 0.99; + let removed = Markup.collect_bad superset in + let analysis_cleansed = Hash_set.length removed in + let (analysis_fn,analysis_fp) = + Hash_set.fold removed ~init:(0,0) ~f:(fun (fn,fp) addr -> + let fn = if Hash_set.mem tps addr then fn+1 else fn in + let fp = if Hash_set.mem fps addr then fp+1 else fp in + fn,fp + ) in + let pick_addr s = + let s = Hash_set.to_array s in + let len = Array.(length s) in + if len = 0 then None else Some(Array.get s Random.(int len)) + in + let inter s1 s2 = + let r = Addr.Hash_set.create () in + Hash_set.iter s1 ~f:(fun x -> + if Hash_set.mem s2 x then Hash_set.add r x + ); r in + let filtered = + let r = Addr.Hash_set.create () in + Set.iter filtered ~f:Hash_set.(add r); + r in + let extp = pick_addr (inter tps filtered) in + let exfp = pick_addr (inter fps filtered) in + Markup.clear_bad superset; + let total_cases_identified = Hash_set.length filtered in + { + actual_tp_cases; + outbound_cases; + outbound_inflicted_fn; + raw_cases; + total_cases_identified; + overlap_space; + overlap_max; + overlap_min; + analysis_cleansed; + analysis_fp; + analysis_fn; + extp; + exfp; + tp_cases_reported; + fp_cases_reported; + map_details= None; + } + + +let collect_map_report + (superset : 's) (extractf : 's -> 'e) (filterf : 's -> 'e -> 'e) tps ro fps pmap = + let m = extractf superset in + let set_extractf superset = + Map.fold ~init:Addr.Set.empty m ~f:(fun ~key ~data s -> + Set.add Set.(add s key) data) in + let r = + collect_set_report superset set_extractf (fun _ x -> x) tps ro fps pmap in + let (tptp,tpfp,fptp,fpfp) = Map.fold ~init:(0,0,0,0) m + ~f:(fun ~key ~data (tptp,tpfp,fptp,fpfp) -> + if Hash_set.mem tps key then + if Hash_set.mem tps data then + (tptp+1,tpfp,fptp,fpfp) + else if Hash_set.mem fps data then + (tptp,tpfp+1,fptp,fpfp) + else + (tptp,tpfp,fptp,fpfp) + else if Hash_set.mem fps key then + if Hash_set.mem tps data then + (tptp,tpfp,fptp+1,fpfp) + else if Hash_set.mem fps data then + (tptp,tpfp,fptp,fpfp+1) + else (tptp,tpfp,fptp,fpfp) + else (tptp,tpfp,fptp,fpfp) + ) in + let map_details = Some { + tp_to_tp = tptp; + tp_to_fp = tpfp; + fp_to_tp = fptp; + fp_to_fp = fpfp; + } in + { + actual_tp_cases = r.actual_tp_cases; + outbound_cases = r.outbound_cases; + outbound_inflicted_fn = r.outbound_inflicted_fn; + raw_cases = r.raw_cases; + total_cases_identified = r.total_cases_identified; + overlap_space = r.overlap_space; + overlap_max = r.overlap_max; + overlap_min = r.overlap_min; + analysis_cleansed = r.analysis_cleansed; + analysis_fp = r.analysis_fp; + analysis_fn = r.analysis_fn; + tp_cases_reported = r.tp_cases_reported; + fp_cases_reported = r.fp_cases_reported; + extp = r.extp; + exfp = r.exfp; + map_details; + } diff --git a/lib/bap_disasm/sheathed.ml b/lib/bap_disasm/sheathed.ml new file mode 100644 index 000000000..7928b2d3b --- /dev/null +++ b/lib/bap_disasm/sheathed.ml @@ -0,0 +1,78 @@ +open Core_kernel.Std +open Superset_risg +open Common +open Graphlib.Std +open Graph +open Bap_types.Std +open Bap_image_std + + +let parents_of_insns insn_isg component = + Set.fold component ~init:Addr.Set.empty ~f:(fun potential_parents addr -> + Superset_risg.G.fold_succ (fun ancestor potential_parents -> + if not Set.(mem component ancestor) then + Set.add potential_parents ancestor + else potential_parents + ) insn_isg addr potential_parents + ) + +let filter_components ?(min_size=20) components = + List.fold_left components ~init:Addr.Set.empty + ~f:(fun keep component -> + let component = Addr.Set.of_list component in + if Set.length component > min_size then + Addr.Set.(union keep component) + else + keep + ) + +let tag_loop_contradictions ?(min_size=20) superset = + let insn_risg = Superset.get_graph superset in + let insn_map = Superset.get_map superset in + let keep = filter_components ~min_size @@ + StrongComponents.scc_list insn_risg in + (* Here we have to be careful; we only want to find instructions + that occur within a loop that produce a self-contradiction *) + let parents = parents_of_insns insn_risg keep in + let to_remove = + Superset_risg.conflicts_within_insns insn_map keep in + let to_remove = Set.inter to_remove parents in + let to_remove = Set.diff to_remove keep in + printf "tagged %d contradictions of %d parents of %d to keep\n" + Set.(length to_remove) + Set.(length parents) + Set.(length keep); + Set.iter to_remove ~f:(Superset.mark_bad superset); + Superset.rebuild ~insn_map ~insn_risg superset + +let default_tags = [tag_loop_contradictions] + +let tagged_disasm_of_file ?(backend="llvm") bin = + let superset = Trim.tagged_disasm_of_file + ~f:[(fun s x y z -> Superset.add_to_map s x y)] + ~data:() ~backend bin in + tag_loop_contradictions superset + +(* TODO belongs elsewhere or is a duplicate *) +let trimmed_disasm_of_file ?(backend="llvm") bin = + let superset = tagged_disasm_of_file ~backend bin in + Trim.Default.trim superset + +let sheaths_of_file ?(backend="llvm") bin = + let superset = tagged_disasm_of_file ~backend bin in + superset, Decision_tree_set.decision_trees_of_superset superset + +let trimmed_sheaths_of_file ?(backend="llvm") bin = + let superset = Trim.Default.trim (tagged_disasm_of_file ~backend bin) in + superset, Decision_tree_set.decision_trees_of_superset superset + +(* TODO test the below functions *) +let iter_decision_set ?(backend="llvm") bin ~f = + let superset, decision_trees = trimmed_sheaths_of_file ~backend bin in + List.iter decision_trees ~f + +let fold_decision_set ~init ?(backend="llvm") bin ~f = + let superset, decision_trees = + trimmed_sheaths_of_file ~backend bin in + List.fold decision_trees ~init ~f + diff --git a/lib/bap_disasm/superset.ml b/lib/bap_disasm/superset.ml new file mode 100644 index 000000000..a32ef21f7 --- /dev/null +++ b/lib/bap_disasm/superset.ml @@ -0,0 +1,394 @@ +open Core_kernel.Std +open Or_error +open Bap_types.Std +open Bap_image_std +open Bap_disasm_target_factory +(*open Format*) + +module Dis = Bap_disasm_basic +module Brancher = Bap_disasm_brancher + +type elem = mem * (Dis.full_insn option) + +type 'a t = { + arch : arch; + img : Image.t option; + brancher : Brancher.t; + data : 'a; + insn_map : (mem * (Dis.full_insn option)) Addr.Map.t; + (* TODO: needs to become an array *) + insn_risg : Superset_risg.t; + bad : Addr.Hash_set.t; + keep : Addr.Hash_set.t; + (* TODO registerable per-feature info? *) + (* marked data *) + (* visited *) + (* union_find *) +} [@@deriving fields] + +module type Superset_intf = sig +end + +let contains_addr superset addr = + let img = Option.value_exn superset.img in + let segments = Table.to_sequence Image.(segments img) in + Seq.fold segments ~init:false ~f:(fun status (mem, segment) -> + status || Memory.contains mem addr) + +let get_img superset = Option.(value_exn superset.img) + +let get_segments superset = + Image.segments Option.(value_exn superset.img) + +let get_endianness superset = + Image.endian Option.(value_exn superset.img) + +let get_arch superset = superset.arch + +let get_graph superset = superset.insn_risg + +let mark_bad superset addr = + Hash_set.add superset.bad addr + +let num_bad superset = + Hash_set.length superset.bad + +let clear_bad superset addr = + Hash_set.remove superset.bad addr + +let clear_all_bad superset = + Hash_set.clear superset.bad + +let with_bad superset ?visited ~pre ~post accu = + Hash_set.fold ~init:accu superset.bad ~f:(fun accu b -> + if Superset_risg.G.mem_vertex superset.insn_risg b then + Superset_risg.fold_component accu superset.insn_risg + ?visited ~pre ~post b + else accu + ) + +let get_map superset = superset.insn_map + +let get_base superset = + let insn_map = get_map superset in + let (base_addr, _) = Addr.Map.min_elt insn_map |> Option.value_exn in + base_addr + +let get_data superset = superset.data + +let len_at superset at = + let insn_map = get_map superset in + match Map.find insn_map at with + | None -> 0 + | Some(mem, _) -> Memory.length mem + +let fall_through_of superset addr = + let len = len_at superset addr in + Addr.(addr ++ len) + +let is_fall_through superset parent child = + let ft = fall_through_of superset parent in + (* TODO should check for edge *) + Addr.(child = ft) + +let get_callers superset addr = + let g = (get_graph superset) in + if Superset_risg.G.mem_vertex g addr && + Superset_risg.G.out_degree g addr > 0 then + let callers = Superset_risg.G.succ g addr in + List.filter callers ~f:(fun caller -> + not (is_fall_through superset caller addr)) + else [] + +let get_non_fall_through_edges superset = + let g = (get_graph superset) in + Superset_risg.G.fold_edges + (fun child parent jmps -> + if is_fall_through superset parent child then + Map.set jmps child parent + else jmps + ) g Addr.Map.empty + +let get_callsites ?(threshold=6) superset = + let g = (get_graph superset) in + let callsites = Addr.Hash_set.create () in + Superset_risg.G.iter_vertex + (fun v -> + let callers = Superset_risg.G.succ g v in + let num_callers = + List.fold callers ~init:0 ~f:(fun total caller -> + if not (is_fall_through superset caller v) then + total + 1 + else total) in + if num_callers > threshold then ( + Hash_set.add callsites v; + ) + ) g; + callsites + +let with_data_of_insn superset at ~f = + let len = len_at superset at in + let body = Superset_risg.seq_of_addr_range at len in + Seq.iter body ~f + +let with_descendents_at ?insn_isg ?visited superset addr ~f = + let insn_risg = get_graph superset in + if Superset_risg.G.mem_vertex insn_risg addr then + let insn_isg = + match insn_isg with + | Some insn_isg -> insn_isg + | None -> + Superset_risg.Oper.mirror insn_risg in + Superset_risg.iter_component ?visited ~pre:f insn_isg addr + +let mark_descendents_at ?insn_isg ?visited ?datas superset addr = + let datas = Option.value datas + ~default:(Addr.Hash_set.create ()) in + let mark_bad = mark_bad superset in + with_descendents_at ?insn_isg ?visited superset addr + ~f:(fun v -> + with_data_of_insn superset v ~f:mark_bad; + with_data_of_insn superset v ~f:(Hash_set.add datas); + ) + +let create ?insn_map ?insn_risg arch data = + let insn_map = Option.value insn_map ~default:Addr.Map.empty in + let insn_risg = Option.value insn_risg + ~default:(Superset_risg.G.create ()) in + { + arch = arch; + img = None; + brancher = Brancher.of_bil arch; + insn_map = insn_map; + insn_risg = insn_risg; + bad = Addr.Hash_set.create (); + keep = Addr.Hash_set.create (); + data = data; + } + +let rebuild ?data ?insn_map ?insn_risg superset = + let insn_map = Option.value insn_map ~default:superset.insn_map in + let data = Option.value data ~default:superset.data in + let insn_risg = Option.value insn_risg ~default:superset.insn_risg in + { + arch = superset.arch; + brancher = superset.brancher; + img = superset.img; + bad = superset.bad; + data = data; + insn_risg = insn_risg; + insn_map = insn_map; + keep = superset.keep; + } + +let drop superset = + rebuild ~data:() superset + +let remove superset addr = + Superset_risg.G.remove_vertex superset.insn_risg addr; + rebuild superset + +let add_to_map superset mem insn = + let insn_map = get_map superset in + let addr = (Memory.min_addr mem) in + let insn_map = Addr.Map.set insn_map addr (mem, insn) in + rebuild ~insn_map superset + +let add_to_graph superset mem insn = + let addr = Memory.min_addr mem in + Superset_risg.G.add_vertex superset.insn_risg addr; + rebuild superset + +let add superset mem insn = + let superset = add_to_graph superset mem insn in + let superset = add_to_map superset mem insn in + superset + +let replace superset mem insn = + let addr = Memory.min_addr mem in + let superset = remove superset addr in + add superset mem insn + +let format_cfg ?format superset = + let format = Option.value format ~default:Format.std_formatter in + Superset_risg.Gml.print format superset.insn_risg + +let isg_to_string superset = + let format = Format.str_formatter in + format_cfg ~format superset; + Format.flush_str_formatter () + +let next_chunk mem ~addr = + let next_addr = Addr.succ addr in + Memory.view ~from:next_addr mem + +let run_seq dis mem = + let open Seq.Generator in + let rec disasm cur_mem = + let elem = match Dis.insn_of_mem dis cur_mem with + | Ok (m, insn, _) -> (m, insn) + | Error _ -> (cur_mem, None) in + yield elem >>= fun () -> + match next_chunk mem ~addr:(Memory.min_addr cur_mem) with + | Ok next -> disasm next + | Error _ -> return () in + run (disasm mem) + +let run dis ~accu ~f mem = + Seq.fold ~init:accu ~f:(fun x y -> f y x) (run_seq dis mem) + +let disasm ?(backend="llvm") ~accu ~f arch mem = + Dis.with_disasm ~backend (Arch.to_string arch) + ~f:(fun d -> Ok(run d ~accu ~f mem)) + +let lift_insn lift_fn (mem,insn) = + let lift_fn = lift_fn mem in + let insn = Option.map insn ~f:lift_fn in + Option.map insn ~f:(fun bil -> (mem, bil |> ok_exn)) + +let lift arch insns = + let module Target = (val target_of_arch arch) in + let lifter = Target.lift in + let lifted_superset = Addr.Map.empty in + List.fold insns ~init:lifted_superset + ~f:(fun lifted_superset (mem, insn) -> + match lift_insn lifter (mem, insn) with + | Some (mem, bil) -> + let addr = Memory.min_addr mem in + Map.set lifted_superset ~key:addr + ~data:(bil, Memory.length mem) + | None -> lifted_superset + ) + +let memmap_all ?backend arch mem = + let filter_add elem memmap = + let (mem, insn) = elem in + Option.value_map insn ~default:memmap + ~f:(Memmap.add memmap mem) in + disasm ?backend ~accu:Memmap.empty ~f:filter_add arch mem |> ok_exn + +let sexp_of_mem mem = + let endianness = Memory.endian mem in + let maddr = Memory.min_addr mem in + let bstr_mem = Memory.to_string mem in + Tuple3.sexp_of_t + Addr.sexp_of_endian + Addr.sexp_of_t + String.sexp_of_t (endianness, maddr, bstr_mem) + +let mem_of_sexp sexp_mem = + let (endianness, maddr, mem) = + Tuple3.t_of_sexp + Addr.endian_of_sexp + Addr.t_of_sexp + String.t_of_sexp sexp_mem in + let mem = Bigstring.of_string mem in + Memory.create endianness maddr mem |> ok_exn + +let insn_map_to_string insn_map = + Sexp.to_string @@ Addr.Map.sexp_of_t + (fun (mem, _) -> sexp_of_mem mem) insn_map + +let insn_map_of_string map_str = + let map_sexp = Sexp.of_string map_str in + Addr.Map.t_of_sexp (fun m -> mem_of_sexp m, None) map_sexp + +let meta_of_string meta_str = + let sexp_meta = Sexp.of_string meta_str in + Arch.t_of_sexp sexp_meta + +let meta_to_string superset = + Sexp.to_string (Arch.sexp_of_t superset.arch) + +let import bin = + let insn_risg = Superset_risg.Gml.parse (bin ^ ".graph") in + let map_str = In_channel.read_all (bin ^ ".map") in + let insn_map = insn_map_of_string map_str in + let meta_str = In_channel.read_all (bin ^ ".meta") in + let arch = meta_of_string meta_str in + let superset = create ~insn_risg arch ~insn_map () in + superset + +let export bin superset = + let graph_f = Out_channel.create (bin ^ ".graph") in + let formatter = Format.formatter_of_out_channel graph_f in + let () = Superset_risg.Gml.print formatter superset.insn_risg in + let () = Out_channel.close graph_f in + let insn_map = get_map superset in + let map_str = insn_map_to_string insn_map in + Out_channel.write_all (bin ^ ".map") ~data:map_str; + let meta_str = meta_to_string superset in + Out_channel.write_all (bin ^ ".meta") ~data:meta_str + +let export_addrs bin superset = + let insn_map = get_map superset in + let addrs = Map.keys insn_map in + let addrs = List.map addrs ~f:Addr.to_string in + let base = Filename.basename bin in + let addrs_file = Out_channel.create ("./" ^ base ^ "_addrs.txt") in + Out_channel.output_lines addrs_file addrs + +let update_with_mem ?backend ?f superset mem = + let update = Option.value f ~default:(fun (m, i) a -> a) in + let f (mem, insn) superset = + let superset = add superset mem insn in + update (mem, insn) superset in + disasm ?backend ~accu:superset ~f superset.arch mem |> ok_exn + +let with_img ~accu img ~f = + let segments = Table.to_sequence @@ Image.segments img in + Seq.fold segments ~init:accu ~f:(fun accu (mem, segment) -> + if Image.Segment.is_executable segment then + f ~accu mem + else accu + ) + +let superset_of_img ~data ?f ~backend img = + let arch = Image.arch img in + let brancher = Brancher.of_bil arch in + let superset = { + data = data; + arch = arch; + bad = Addr.Hash_set.create (); + insn_risg = Superset_risg.G.create (); + insn_map = Addr.Map.empty; + brancher = brancher; + img = Some img; + keep = Addr.Hash_set.create (); + } in + with_img ~accu:superset img + ~f:(fun ~accu mem -> + update_with_mem ~backend accu mem ?f + ) + +let superset_disasm_of_file ?(backend="llvm") ~data ?f binary = + let img = Common.img_of_filename binary in + let r = superset_of_img ~data ~backend img ?f in + r + +let fold_insns superset f = + let insn_map = get_map superset in + Addr.Map.fold ~init:superset insn_map ~f:(fun ~key ~data superset -> + let mem, insn = data in + f ~superset ~mem ~insn + ) + +let with_graph superset f = + let insn_risg = superset.insn_risg in + f insn_risg + +let rebalance superset = + let insn_map = get_map superset in + let superset_risg = get_graph superset in + Superset_risg.G.iter_vertex (fun vert -> + if not Map.(mem insn_map vert) then ( + mark_bad superset vert; + ) + ) superset_risg; + let insn_map = Map.filteri ~f:(fun ~key ~data -> + let vert = key in + (*let (mem, insn) = data in + Option.is_some insn && *) + Superset_risg.G.(mem_vertex superset_risg vert) + ) insn_map in + rebuild ~insn_risg:superset_risg ~insn_map superset diff --git a/lib/bap_disasm/superset_risg.ml b/lib/bap_disasm/superset_risg.ml new file mode 100644 index 000000000..e72bc65ec --- /dev/null +++ b/lib/bap_disasm/superset_risg.ml @@ -0,0 +1,300 @@ +open Bap_types.Std +open Bap_image_std +open Graph +open Core_kernel.Std + +module G = Imperative.Digraph.ConcreteBidirectional(struct + type t = Addr.t + let compare = Addr.compare + let hash = Addr.hash + let equal = Addr.equal + end) +type t = G.t + +module P = Persistent.Digraph.ConcreteBidirectional(struct + type t = Addr.t + let compare = Addr.compare + let hash = Addr.hash + let equal = Addr.equal + end) + +module Kruskal = Kruskal.Make(G)(struct + type t = G.E.label + let compare _ _ = 0 + end) + +module Topological = Topological.Make(G) +module Dominator = Dominator.Make(G) +module Oper = Oper.I(G) +module StrongComponents = Components.Make(G) +(*module DiscreteComponents = Components.Undirected(G)*) +module Dfs = Graph.Traverse.Dfs(G) +module Bfs = Graph.Traverse.Bfs(G) +module Path = Path.Check(G) +module GmlOut = Gml.Print(G)(struct + let node (label : G.V.label) = + [ "addr", Gml.String (Addr.to_string label) ] + let edge _ = [] + end) +module B = struct + module G = struct + include P + end + include P + let copy g = g + let empty () = P.empty +end +module GmlIn = Gml.Parse(B)(struct + let node (labels : Gml.value_list) = + match labels with + | [] -> assert false + | fail :: [] -> assert false + | (id, idval) :: (s, gmlval) :: _ -> + match idval, gmlval with + | Gml.Int(idval), Gml.String(addr) -> + B.G.V.label Addr.(of_string addr) + | _ -> assert false + + let edge (labels : Gml.value_list) = () + end) +module Gml = struct + include GmlIn + include GmlOut + let parse gmlstr = + let pgraph = parse gmlstr in + let igraph = G.create () in + P.iter_edges (fun src target -> + let src = B.G.V.create src in + let target = B.G.V.create target in + G.add_edge igraph src target; + ) pgraph; + igraph +end + +let add ?superset_risg mem insn = + let superset_risg = + Option.value superset_risg ~default:(G.create ()) in + let src = Memory.min_addr mem in + let bad = Addr.of_int ~width:(Addr.bitwidth src) 0 in + match insn with + | Some(insn) -> + G.add_vertex superset_risg src; + | None -> G.add_edge superset_risg bad src + +let subgraph insn_risg subgraph = + let g = G.create () in + Hash_set.iter subgraph ~f:(fun addr -> + G.add_vertex g addr; + G.iter_succ + (fun s -> + if Hash_set.mem subgraph s then + G.add_edge g addr s + ) insn_risg addr; + G.iter_pred + (fun s -> + if Hash_set.mem subgraph s then + G.add_edge g s addr + ) insn_risg addr; + ); + g + +let exits_of_isg insn_isg component = + Set.fold component ~init:Addr.Set.empty ~f:(fun potential_exits addr -> + G.fold_pred (fun ancestor potential_exits -> + if not Set.(mem component ancestor) then + Set.add potential_exits ancestor + else potential_exits + ) insn_isg addr potential_exits + ) + + +let risg_of_raw_superset ?superset_risg raw_superset = + let superset_risg = Option.value superset_risg ~default:(G.create ()) in + List.iter raw_superset ~f:(fun (mem, insn) -> + add ~superset_risg mem insn + ); + superset_risg + +let conflicts_within_insn_at ?mem ?conflicts insn_map addr = + let mem = Option.value mem ~default:(Map.mem insn_map) in + let conflicts = Option.value conflicts ~default:Addr.Set.empty in + let rec within_insn conflicts insn_map cur_addr len = + if Addr.(cur_addr >= (addr ++ len)) then + conflicts + else + let conflicts = if mem cur_addr then + let conflicts = Set.add conflicts addr in + Set.add conflicts cur_addr + else conflicts in + within_insn conflicts insn_map Addr.(cur_addr ++ 1) len in + match Map.find insn_map addr with + | Some ((mem, _)) -> + (* look within the body for instructions *) + let len = (Memory.length mem) in + within_insn conflicts insn_map Addr.(addr ++ 1) len + | None -> conflicts + +let conflicts_within_insns insn_map keep = + Set.fold keep ~init:Addr.Set.empty + ~f:(fun conflicts addr -> + conflicts_within_insn_at + ~conflicts insn_map addr + ) + +let find_all_conflicts ?mem insn_map = + List.fold Map.(keys insn_map) ~init:Addr.Set.empty + ~f:(fun conflicts addr -> + conflicts_within_insn_at ?mem ~conflicts insn_map addr + ) + +let seq_of_addr_range addr len = + let open Seq.Generator in + let rec gen_next_addr cur_addr = + if Addr.(cur_addr >= (addr ++ len)) then + return () + else + yield cur_addr >>= fun () -> + let next_addr = Addr.succ cur_addr in + gen_next_addr next_addr + in run (gen_next_addr Addr.(succ addr)) + +let range_seq insn_map = + let map_seq = Addr.Map.to_sequence insn_map in + Seq.bind map_seq (fun (addr, (mem, _)) -> + seq_of_addr_range addr (Memory.length mem) + ) + +let range_seq_of_conflicts ~mem addr len = + let range_seq = seq_of_addr_range addr len in + Seq.filter range_seq ~f:mem + +(* TODO do not need to use insn_cfg. Could use superset type *) +let seq_of_all_conflicts insn_map insn_isg = + let insn_map_seq = Addr.Map.to_sequence insn_map in + let check_mem = Addr.Map.(mem insn_map) in + Seq.bind insn_map_seq (fun (addr, (mem, _)) -> + range_seq_of_conflicts ~mem:check_mem addr (Memory.length mem) + ) + +let conflict_seq_at insn_map addr = + let check_mem = Addr.Map.(mem insn_map) in + match Map.find insn_map addr with + | Some(mem, _) -> + let len = Memory.length mem in + range_seq_of_conflicts ~mem:check_mem addr len + | None -> Seq.empty + +let parent_conflict_at insn_risg insn_map addr = + let children = G.pred insn_risg addr in + List.fold children ~init:Addr.Set.empty ~f:(fun cparents child -> + let parents = G.succ insn_risg child in + List.fold parents ~init:cparents ~f:(fun cparents parent -> + if not Addr.(parent = addr) then + match Map.find insn_map parent with + | Some(mem, _) -> + let len = Memory.length mem in + if Addr.(parent < addr) && Addr.(addr < (parent ++ len)) then + Set.add cparents parent + else cparents + | None -> cparents + else cparents + ) + ) + +let mergers_of_isg insn_isg = + G.fold_vertex (fun addr mergers -> + if G.out_degree insn_isg addr > 1 then + Addr.Set.add mergers addr + else mergers) insn_isg Addr.Set.empty + +let is_entry insn_isg addr = + G.in_degree insn_isg addr = 0 && + G.out_degree insn_isg addr > 0 + +let entries_of_isg insn_isg = + G.fold_vertex (fun addr accu -> + if is_entry insn_isg addr then + (Hash_set.add accu addr; accu) + else accu) + insn_isg (Addr.Hash_set.create ()) + +let is_branch insn_risg addr = + G.in_degree insn_risg addr = 2 + +let get_branches insn_risg = + let branches = Addr.Hash_set.create () in + G.iter_vertex (fun vert -> + if is_branch insn_risg vert then + Hash_set.add branches vert; + ) insn_risg; + branches + +let get_loop_addrs insn_risg = + let loop_addrs = + StrongComponents.scc_list insn_risg in + List.fold_left loop_addrs ~init:Addr.Set.empty + ~f:(fun loop_addrs loop -> + List.fold_left ~init:loop_addrs loop ~f:(fun loop_addrs addr -> + Set.add loop_addrs addr + ) + ) + +let iter_component ?(terminator=(fun _ -> true)) + ?visited ?(pre=fun _ -> ()) ?(post=fun _ -> ()) g v = + let visited = Option.value visited + ~default:(Addr.Hash_set.create ()) in + let rec visit v = + Hash_set.add visited v; + pre v; + G.iter_succ + (fun w -> + if (not (Hash_set.mem visited w)) && (terminator w) then + visit w) g v; + post v + in visit v + +let fold_component ?visited ~pre ~post i g v0 = + let visited = Option.value visited + ~default:(Addr.Hash_set.create ()) in + let s = Stack.create () in + (* invariant: [h] contains exactly the vertices which have been pushed *) + let push v = + if not (Hash_set.mem visited v) then begin + Hash_set.add visited v; + Stack.push s v + end + in + push v0; + let rec loop acc = + match Stack.pop s with + | Some v -> + let acc = pre acc v in + G.iter_succ push g v; + loop @@ post acc v + | None -> acc + in + loop i + +let get_depth insn_risg x = + let depth = ref 0 in + let deepest = ref 0 in + let pre x = depth := !depth + 1 in + let post x = + deepest := max !deepest !depth; + depth := !depth - 1; in + iter_component insn_risg ~pre ~post x; + !deepest + +let collect_target_entries visited insn_risg insn_isg addr = + let target_entries = Addr.Hash_set.create () in + let pre t = + if is_entry insn_risg t then + Hash_set.add target_entries t in + let post _ = () in + iter_component ~visited ~pre ~post insn_isg addr; + target_entries + +let activate_descendants active insn_isg addr = + let pre _ = ()in + let post _ = () in + iter_component ~visited:active ~pre ~post insn_isg addr diff --git a/lib/bap_disasm/traverse.ml b/lib/bap_disasm/traverse.ml new file mode 100644 index 000000000..3bd86737f --- /dev/null +++ b/lib/bap_disasm/traverse.ml @@ -0,0 +1,66 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std + +let visit ?visited ~pre ~post superset entries = + let visited = Option.value visited + ~default:(Addr.Hash_set.create ()) in + let pre addr = + Hash_set.add visited addr; + pre addr in + let insn_risg = Superset.get_graph superset in + Hash_set.iter entries ~f:(fun addr -> + if not (Hash_set.mem visited addr) then + Superset_risg.iter_component ~visited ~pre ~post insn_risg addr + ) + +let visit_with_deltas ?pre ?post ~is_option superset entries = + let pre = Option.value pre ~default:(fun _ _ -> ()) in + let post = Option.value post ~default:(fun _ _ -> ()) in + let deltas = ref (Decision_tree_set.calculate_deltas + superset ~entries is_option) in + let pre addr = + pre !deltas addr in + let post addr = + post !deltas addr; + deltas := Map.remove !deltas addr + in + visit ~pre ~post superset entries + +let visit_by_block superset + ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) entries = + let insn_risg = Superset.get_graph superset in + let (jmps,targets) = Superset_risg.G.fold_edges (fun src target (jmps,targets) -> + let is_branch = Superset_risg.is_branch insn_risg target in + let is_jmp_edge = not (Superset.is_fall_through superset src target) in + if is_branch && is_jmp_edge then + (Map.set jmps src target, Set.add targets target) + else (jmps, targets) + ) insn_risg (Addr.Map.empty,Addr.Set.empty) in + (*let loop_addrs = Superset_risg.get_loop_addrs insn_risg in + let jmps = Set.fold loop_addrs ~init:jmps ~f:(fun jmps addr -> + match Map.find jmps addr with + | Some(j) -> + if Set.mem loop_addrs j then + Map.remove jmps j + else jmps + | None -> jmps + ) in*) + Map.iteri jmps ~f:(fun ~key ~data -> + Superset_risg.G.remove_edge insn_risg key data; + ); + let entries = Superset_risg.entries_of_isg insn_risg in + let visited = Addr.Hash_set.create () in + let rec visit v = + Hash_set.add visited v; + pre jmps targets v; + Superset_risg.G.iter_succ + (fun w -> if not (Hash_set.mem visited w) then visit w else pre jmps targets w) + insn_risg v; + post jmps targets v; + in + Hash_set.iter entries ~f:visit; + Map.iteri jmps ~f:(fun ~key ~data -> + Superset_risg.G.add_edge insn_risg key data; + ) + diff --git a/lib/bap_disasm/trim.ml b/lib/bap_disasm/trim.ml new file mode 100644 index 000000000..0d244d2aa --- /dev/null +++ b/lib/bap_disasm/trim.ml @@ -0,0 +1,271 @@ +open Core_kernel.Std +open Bap_types.Std +open Bap_image_std +open Bap_disasm_target_factory + +module Insn = Bap_disasm_insn +module Brancher = Bap_disasm_brancher + +let static_successors brancher mem insn = + match insn with + | None -> [None, `Fall] + | Some insn -> + try + Brancher.resolve brancher mem insn + with _ -> ( + print_endline @@ + "Target resolve failed on memory at " ^ Memory.to_string mem; + [None, `Fall] + ) + +let find_non_mem_accesses superset = + let check_return_addr r addr = + match addr with + | Bil.Int(addr) -> + if Superset.contains_addr superset addr then + r + else r.return(Some(false)) + | _ -> r in + (object(self) + inherit [bool] Stmt.finder + method! enter_load ~mem ~addr _ _ r = + check_return_addr r addr + method! enter_store ~mem ~addr ~exp _ _ r = + check_return_addr r addr + end) + +let accesses_non_mem superset mem insn _ = + let arch = Superset.get_arch superset in + let module Target = (val target_of_arch arch) in + let lifter = Target.lift in + try + let bil = Superset.lift_insn lifter (mem, insn) in + let _, bil = Option.value ~default:(mem,[]) bil in + let status = List.fold bil ~init:None ~f:(fun status _stmt -> + Option.value_map status ~default:(Some(false)) ~f:(fun status -> + if not status then + Stmt.find (find_non_mem_accesses superset) _stmt + else Some(status) + ) + ) in + Option.value status ~default:false + with _ -> false + +(* TODO Does this belong in Superset? *) +let tag_with ~f (mem, insn) superset = + let open Superset in + let targets = static_successors superset.brancher mem insn in + f superset mem insn targets + +let tag_target_not_in_mem superset mem insn targets = + List.iter targets + ~f:(fun (target,_) -> + match target with + | Some(target) -> + if not (Superset.contains_addr superset target) then + Superset.mark_bad superset target + | None -> () + ); + superset + +let tag_target_is_bad superset mem insn targets = + let width = Addr.bitwidth @@ Memory.min_addr mem in + let z = Addr.zero width in + List.iter targets + ~f:(fun (target,_) -> + match target with + | Some(target) -> + if Addr.(target = z) then + Superset.mark_bad superset target + | None -> () + ); + superset + +(* TODO need to add a unit test *) +let tag_target_in_body superset mem insn targets = + let src = Memory.min_addr mem in + List.iter targets + ~f:(fun (target,_) -> + match target with + | Some(target) -> + if (Memory.contains mem target) && + not Addr.(src = target) then + Superset.mark_bad superset src + | None -> () + ); + superset + +let tag_invalid_targets superset mem insn targets = + let superset = tag_target_not_in_mem superset mem insn targets in + let superset = tag_target_is_bad superset mem insn targets in + let superset = tag_target_in_body superset mem insn targets in + superset + +let tag_non_mem_access superset mem insn targets = + let src = Memory.min_addr mem in + if accesses_non_mem superset mem insn targets then ( + (* The instruction reads or writes to memory that is not mapped *) + Superset.mark_bad superset src + ); + superset + +let tag_non_insn superset mem insn targets = + let src = Memory.min_addr mem in + if Option.is_none insn then ( + (* Wasn't a parseable instruction *) + Superset.mark_bad superset src + ); + superset + + +(* TODO This belongs in Superset *) +(* could merge add_to_map and tag_success *) +let tag_success superset mem insn targets = + let src = Memory.min_addr mem in + let insn_risg = Superset.get_graph superset in + (*let superset = Superset.add superset mem insn in*) + (* TODO perhaps the below should be merged with Superset.add *) + List.iter targets ~f:(fun (target,_) -> + match target with + | Some (target) -> + Superset_risg.G.add_edge insn_risg target src + | None -> ()); + superset + +let default_tags = ["Tag non insn", tag_non_insn; + "Tag non mem access", tag_non_mem_access; + "Tag target not in mem", tag_target_not_in_mem; + "Tag target is bad", tag_target_is_bad; + "Tag target in body", tag_target_in_body; + (*tag_success;*)] + +let default_funcs = [ + tag_non_insn; + tag_non_mem_access; + tag_target_not_in_mem; + tag_target_is_bad; + tag_target_in_body; + (*tag_success;*)] + +let tag ?invariants = + let invariants = Option.value invariants ~default:default_funcs in + let f superset mem insn targets = + List.fold_left invariants ~init:superset ~f:(fun superset f -> + (f superset mem insn targets)) in + tag_with ~f + +module type Reducer = sig + type acc + val accu : acc + val check_pre : 'a Superset.t -> acc -> addr -> acc + val check_elim : 'a Superset.t -> acc -> addr -> bool + val check_post : 'a Superset.t -> acc -> addr -> acc + val mark : 'a Superset.t -> acc -> addr -> unit +end + +module type ReducerInstance = sig + include Reducer + val post : 'a Superset.t -> acc -> addr -> acc +end + +module Reduction(R : Reducer) = struct + + let post superset accu addr = + let module G = Superset_risg.G in + let superset_risg = Superset.get_graph superset in + if R.check_elim superset accu addr then ( + R.mark superset accu addr; + G.remove_vertex superset_risg addr; + ); + R.check_post superset accu addr + + let trim superset = + print_endline "trimming..."; + let superset_risg = Superset.get_graph superset in + let module G = Superset_risg.G in + let superset = Superset.rebalance superset in + let orig_size = (G.nb_vertex superset_risg) in + let post = post superset in + let pre = R.check_pre superset in + let _ = Superset.with_bad superset ~pre ~post R.accu in + Superset.clear_all_bad superset; + let trimmed_size = (G.nb_vertex superset_risg) in + let num_removed = orig_size - trimmed_size in + printf "%d vertices after trimming, removing %d\n" + trimmed_size num_removed; + Superset.rebalance superset + +end + +module Disabled = struct + let post _ accu _ = accu + let trim superset = superset +end + +module DefaultReducer = Reduction(struct + type acc = unit + let accu = () + let check_pre _ accu _ = accu + let check_post _ accu _ = accu + let check_elim _ _ _ = true + let mark _ _ _ = () + end) + +module Default = DefaultReducer + +module DeadblockTolerantReducer : Reducer +(*with type acc = Superset.elem option*) = struct + type acc = Superset.elem option + let accu = None + let check_pre superset accu addr = + match accu with + | Some _ -> accu + | None -> ( + match Map.find Superset.(get_map superset) addr with + | Some (mem,insn) -> ( + match insn with + | Some i -> + let i = Insn.of_basic i in + if Insn.may Insn.affect_control_flow i then + Some (mem,insn) else None + | None -> None + ) + | None -> None + ) + + let check_post superset accu addr = + match accu with + | Some(mem,insn) -> + if Memory.(min_addr mem) = addr then + None + else accu + | None -> accu + + let check_elim superset accu addr = + Option.is_none accu + + let mark _ _ _ = () +end + +module DeadblockTolerant = Reduction(DeadblockTolerantReducer) + +let tag_superset ?invariants superset = + let invariants = Option.value invariants ~default:default_funcs in + let insn_map = Superset.get_map superset in + let f superset mem insn targets = + List.fold ~init:superset invariants + ~f:(fun superset invariant -> + invariant superset mem insn targets) in + Addr.Map.fold ~init:superset insn_map ~f:(fun ~key ~data superset -> + let mem, insn = data in + tag_with ~f (mem, insn) superset + ) + +let tagged_disasm_of_file ~data ?f ?invariants ~backend file = + let invariants = Option.value invariants ~default:default_funcs in + let f = Option.value f ~default:[] in + let invariants = Some(List.append f invariants) in + Superset.superset_disasm_of_file ~data ~backend file ~f:(tag ?invariants) + +let trimmed_disasm_of_file ~data ?f ~backend file = + Default.trim (tagged_disasm_of_file ~data ?f ~backend file) diff --git a/oasis/bap-std b/oasis/bap-std index c765d50ea..ae31e3670 100644 --- a/oasis/bap-std +++ b/oasis/bap-std @@ -16,7 +16,9 @@ Library bap bap.types, bap-future, cmdliner, - regular + regular, + graphlib, + ogre Modules: Bap InternalModules: Bap_event, Bap_log, Bap_project, Bap_self @@ -103,7 +105,7 @@ Library disasm bap-future, camlzip, ocamlgraph, - ogre + graphlib InternalModules: Bap_disasm, Bap_disasm_basic, @@ -124,7 +126,24 @@ Library disasm Bap_disasm_target_factory, Bap_disasm_target_intf, Bap_disasm_types, - Bap_insn_kind + Bap_insn_kind, + Superset_risg, + Superset, + Metrics, + Common, + Traverse, + Trim, + Invariants, + Grammar, + Insn_disasm_benchmark, + Sheathed, + Decision_tree_set, + Cfg_dot_layout, + Abstract_ssa, + Markup, + Features, + Report, + Bap_disasm_superset CCOpt: $cc_optimization CCLib: $cxxlibs CSources: disasm.h, disasm.c, disasm_stubs.c diff --git a/opam/opam b/opam/opam index 5a0224bfc..b2fecbed9 100644 --- a/opam/opam +++ b/opam/opam @@ -70,6 +70,7 @@ install: [ ["ocamlfind" "remove" "bap-primus"] ["ocamlfind" "remove" "bap-plugin-arm"] ["ocamlfind" "remove" "bap-plugin-api"] + ["ocamlfind" "remove" "bap-plugin-bir-to-llvm"] ["ocamlfind" "remove" "bap-plugin-ssa"] ["ocamlfind" "remove" "bap-plugin-optimization"] ["ocamlfind" "remove" "bap-plugin-beagle"]