From 4189f8f77a894875293467d9c635368f52ca92b9 Mon Sep 17 00:00:00 2001 From: ivg Date: Tue, 29 Sep 2020 12:43:00 -0400 Subject: [PATCH] overhauls the target/architecture abstraction (2/n) In the second patch of this series (#1225) we completely got rid of Arch.t dependency in the disassembler engine that finally opens the path for seamless integration of targets that are not representable with Arch.t. To achieve this, we introduced a proper dependency injection into the disassembler driver so that it is no longer responsible for creating the llvm MC disassembler. Instead a plugin that implements a target, aka the target support package, has to create a disassembler and is now in full control of all parameters and can choose backend, specify the CPU and other details of encoding. The encoding is a new abstraction in our knowledge base that breaks the tight connection between the target and the way how the program for that target is encoded. Unlike the target, which is a property of a unit of code, the encoding is associated with a program itself, i.e., it is a property of each instruction. That enables targets with context-dependent encodings such ARM's thumb mode and MIPS16e for binary encodings as well as paves the road for non-binary encodings for the same architecture, e.g., text assembly (which also may have several encodings on its own, cf. att vs intel syntax). We base this branch on the enable-interworking (#1188) and this branch fully superseeds and includes it, since encodings made it much more natural. It is still highlty untested how it will work with real thumb binaries but we will get back to it when we will merge #1178. Another big update, is that the disassembler backend (which is responsible for translating bits into machine instructions) is no longer required to be implemented in C++ and it is now possible to write your own backends/disassemblers in pure OCaml, e.g., to support PIC microcontrollers. The Backend interface is pretty low-level and we might provide higher-level interfaces later, see `Disasm_expert.Backend` for the interface and detailed comments. Finally, we rectify the interface introduced in the previous PR and flatten the hierarchy of newly introduced to the Core Theory abtractions, i.e., instead of `Theory.Target.Endiannes` we now have `Theory.Endianness` and so on. We also made the `Enum` module public which introduced enumerated types built on to of `Knowledge.Value`s. In the next episodes of this series we will gradually remove Arch.t from other bap components and further clean up the newly introduced interfaces. --- lib/arm/arm_target.ml | 226 ++++++++---- lib/bap/bap.mli | 380 ++++++++++++++++++++- lib/bap/bap_project.ml | 70 +--- lib/bap/bap_project.mli | 3 - lib/bap_core_theory/bap_core_theory.mli | 62 +++- lib/bap_disasm/bap_disasm_backend_types.ml | 56 +++ lib/bap_disasm/bap_disasm_basic.ml | 186 +++++++--- lib/bap_disasm/bap_disasm_basic.mli | 14 +- lib/bap_disasm/bap_disasm_driver.ml | 64 ++-- lib/bap_disasm/bap_disasm_driver.mli | 1 - lib/bap_disasm/bap_disasm_prim.ml | 114 ++----- lib/bap_disasm/bap_disasm_rec.ml | 2 +- lib/bap_disasm/bap_disasm_std.ml | 1 + lib/bap_image/bap_image.ml | 145 ++++---- lib/bap_image/bap_image.mli | 6 + lib/bap_mips/bap_mips_target.ml | 25 +- lib/bap_mips/bap_mips_target.mli | 6 + lib/bap_powerpc/bap_powerpc_target.ml | 23 +- lib/bap_powerpc/bap_powerpc_target.mli | 5 + lib/bap_types/bap_arch.ml | 4 +- lib/bap_types/bap_ogre.ml | 52 ++- lib/bap_types/bap_ogre.mli | 6 + lib/x86_cpu/x86_target.ml | 34 +- oasis/arm | 3 +- oasis/bap-std | 1 + plugins/arm/arm_main.ml | 51 +-- 26 files changed, 1073 insertions(+), 467 deletions(-) create mode 100644 lib/bap_disasm/bap_disasm_backend_types.ml create mode 100644 lib/bap_types/bap_ogre.mli diff --git a/lib/arm/arm_target.ml b/lib/arm/arm_target.ml index 94c9abb27b..4acc020d4f 100644 --- a/lib/arm/arm_target.ml +++ b/lib/arm/arm_target.ml @@ -1,34 +1,37 @@ +let package = "bap" + open Core_kernel open Bap_core_theory open Bap.Std +open KB.Syntax -let package = "bap" +module CT = Theory type r128 and r80 and r64 and r32 and r16 and r8 -type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort +type 'a bitv = 'a CT.Bitv.t CT.Value.sort -let r128 : r128 bitv = Theory.Bitv.define 128 -let r80 : r80 bitv = Theory.Bitv.define 80 -let r64 : r64 bitv = Theory.Bitv.define 64 -let r32 : r32 bitv = Theory.Bitv.define 32 -let r16 : r16 bitv = Theory.Bitv.define 16 -let r8 : r8 bitv = Theory.Bitv.define 8 -let bool = Theory.Bool.t +let r128 : r128 bitv = CT.Bitv.define 128 +let r80 : r80 bitv = CT.Bitv.define 80 +let r64 : r64 bitv = CT.Bitv.define 64 +let r32 : r32 bitv = CT.Bitv.define 32 +let r16 : r16 bitv = CT.Bitv.define 16 +let r8 : r8 bitv = CT.Bitv.define 8 +let bool = CT.Bool.t -let reg t n = Theory.Var.define t n -let untyped = List.map ~f:Theory.Var.forget +let reg t n = CT.Var.define t n +let untyped = List.map ~f:CT.Var.forget let (@<) xs ys = untyped xs @ untyped ys let array ?(index=string_of_int) t pref size = List.init size ~f:(fun i -> reg t (pref ^ index i)) -let mems = Theory.Mem.define r32 r8 -let data = Theory.Var.define mems (Var.name Arm_env.mem) +let mems = CT.Mem.define r32 r8 +let data = CT.Var.define mems (Var.name Arm_env.mem) let of_bil v = - Theory.Var.define (Var.sort v) (Var.name v) + CT.Var.define (Var.sort v) (Var.name v) let vars32 = List.map ~f:of_bil Arm_env.[ r0; r1; r2; r3; r4; r5; r6; r7; r8; r9; @@ -43,8 +46,8 @@ let gp64 = array r64 "X" 30 let fp64 = array r128 "Q" 32 let sp64 = [reg r64 "SP"] let lr64 = [reg r64 "LR"] -let mems64 = Theory.Mem.define r64 r8 -let data64 = Theory.Var.define mems64 "mem" +let mems64 = CT.Mem.define r64 r8 +let data64 = CT.Var.define mems64 "mem" let flags64 = [ reg bool "NF"; reg bool "ZF"; @@ -54,59 +57,59 @@ let flags64 = [ let vars64 = gp64 @< fp64 @< sp64 @< lr64 @< flags64 @< [data64] -let parent = Theory.Target.declare "arm" +let parent = CT.Target.declare "arm" module type v4 = sig end module type ARM = sig - val endianness : Theory.endianness - val parent : Theory.Target.t - val v4 : Theory.Target.t - val v4t : Theory.Target.t - val v5 : Theory.Target.t - val v5t : Theory.Target.t - val v5te : Theory.Target.t - val v5tej : Theory.Target.t - val v6 : Theory.Target.t - val v6t2 : Theory.Target.t - val v6z : Theory.Target.t - val v6k : Theory.Target.t - val v6m : Theory.Target.t - val v7 : Theory.Target.t - val v7fp : Theory.Target.t - val v7a : Theory.Target.t - val v7afp : Theory.Target.t - val v8a : Theory.Target.t - val v81a : Theory.Target.t - val v82a : Theory.Target.t - val v83a : Theory.Target.t - val v84a : Theory.Target.t - val v85a : Theory.Target.t - val v86a : Theory.Target.t + val endianness : CT.endianness + val parent : CT.Target.t + val v4 : CT.Target.t + val v4t : CT.Target.t + val v5 : CT.Target.t + val v5t : CT.Target.t + val v5te : CT.Target.t + val v5tej : CT.Target.t + val v6 : CT.Target.t + val v6t2 : CT.Target.t + val v6z : CT.Target.t + val v6k : CT.Target.t + val v6m : CT.Target.t + val v7 : CT.Target.t + val v7fp : CT.Target.t + val v7a : CT.Target.t + val v7afp : CT.Target.t + val v8a : CT.Target.t + val v81a : CT.Target.t + val v82a : CT.Target.t + val v83a : CT.Target.t + val v84a : CT.Target.t + val v85a : CT.Target.t + val v86a : CT.Target.t end -module type Endianness = sig val endianness : Theory.endianness end +module type Endianness = sig val endianness : CT.endianness end module Family (Order : Endianness) = struct include Order let ordered name = - let order = Theory.Endianness.name endianness in + let order = CT.Endianness.name endianness in name ^ "+" ^ KB.Name.unqualified order let (<:) parent name = - if Theory.Target.is_unknown parent - then Theory.Target.unknown - else Theory.Target.declare ~package (ordered name) ~parent + if CT.Target.is_unknown parent + then CT.Target.unknown + else CT.Target.declare ~package (ordered name) ~parent ~nicknames:[name] - let is_bi_endian = Theory.Endianness.(equal bi) endianness + let is_bi_endian = CT.Endianness.(equal bi) endianness let v4 = if is_bi_endian - then Theory.Target.unknown - else Theory.Target.declare ~package (ordered "armv4") + then CT.Target.unknown + else CT.Target.declare ~package (ordered "armv4") ~parent ~nicknames:["armv4"] ~bits:32 @@ -127,8 +130,8 @@ module Family (Order : Endianness) = struct let v6k = v6z <: "armv6k" let v6m = v6 <: "armv6-m" - let v7 = if not is_bi_endian then v6t2 <: "armv7" - else Theory.Target.declare ~package (ordered "armv4") + let v7 = if not is_bi_endian then v6t2 <: "armv7" + else CT.Target.declare ~package (ordered "armv4") ~parent ~nicknames:["armv4"] ~bits:32 @@ -138,18 +141,18 @@ module Family (Order : Endianness) = struct ~data:data ~vars:vars32 - let v7fp = Theory.Target.declare ~package (ordered "armv7+fp") ~parent:v7 + let v7fp = CT.Target.declare ~package (ordered "armv7+fp") ~parent:v7 ~nicknames:["armv7+fp"] ~vars:vars32_fp let v7a = v7 <: "armv7-a" - let v7afp = Theory.Target.declare ~package (ordered "armv7-a+fp") + let v7afp = CT.Target.declare ~package (ordered "armv7-a+fp") ~nicknames:["armv7-a+fp"] ~parent:v7a ~vars:vars32_fp let v8a = - Theory.Target.declare ~package (ordered "armv8-a") ~parent:v7 + CT.Target.declare ~package (ordered "armv8-a") ~parent:v7 ~nicknames:["armv8-a"] ~bits:64 ~code:data64 @@ -166,9 +169,9 @@ module Family (Order : Endianness) = struct let parent = if is_bi_endian then v7 else v4 end -module LE = Family(struct let endianness = Theory.Endianness.le end) -module Bi = Family(struct let endianness = Theory.Endianness.bi end) -module EB = Family(struct let endianness = Theory.Endianness.eb end) +module LE = Family(struct let endianness = CT.Endianness.le end) +module Bi = Family(struct let endianness = CT.Endianness.bi end) +module EB = Family(struct let endianness = CT.Endianness.eb end) let family_of_endian is_little : (module ARM) = match is_little with | None -> (module Bi) @@ -203,8 +206,8 @@ let normalize arch sub = let enable_loader () = let open Bap.Std in KB.Rule.(declare ~package "arm-target" |> - require Project.specification_slot |> - provide Theory.Unit.target |> + require Image.Spec.slot |> + provide CT.Unit.target |> comment "computes target from the OGRE specification"); let open KB.Syntax in let request_info doc = @@ -217,10 +220,10 @@ let enable_loader () = match Ogre.eval request doc with | Error _ -> None,None,None | Ok info -> info in - KB.promise Theory.Unit.target @@ fun unit -> - KB.collect Project.specification_slot unit >>| + KB.promise CT.Unit.target @@ fun unit -> + KB.collect Image.Spec.slot unit >>| request_info >>| fun (arch,sub,is_little) -> - if not (in_family arch) then Theory.Target.unknown + if not (in_family arch) then CT.Target.unknown else let module Family = (val family_of_endian is_little) in match normalize arch sub with @@ -261,8 +264,8 @@ type arms = [ | Arch.aarch64 ] -let arms : arms Map.M(Theory.Target).t = - Map.of_alist_exn (module Theory.Target) [ +let arms : arms Map.M(CT.Target).t = + Map.of_alist_exn (module CT.Target) [ LE.v4, `armv4; LE.v4t, `armv4; LE.v5, `armv5; @@ -311,16 +314,105 @@ let arms : arms Map.M(Theory.Target).t = let enable_arch () = let open KB.Syntax in KB.Rule.(declare ~package "arm-arch" |> - require Theory.Unit.target |> + require CT.Unit.target |> provide Arch.unit_slot |> comment "computes Arch.t from the unit's target"); KB.promise Arch.unit_slot @@ fun unit -> - KB.collect Theory.Unit.target unit >>| fun t -> + KB.collect CT.Unit.target unit >>| fun t -> match Map.find arms t with | Some arch -> (arch :> Arch.t) | None -> `unknown +let llvm_a32 = CT.Language.declare ~package "llvm-A32" +let llvm_t32 = CT.Language.declare ~package "llvm-T32" +let llvm_a64 = CT.Language.declare ~package "llvm-A64" + +module Dis = Disasm_expert.Basic + +let register encoding triple = + Dis.register encoding @@ fun _ -> + Dis.create ~backend:"llvm" triple + +let symbol_values doc = + let field = Ogre.Query.(select (from Image.Scheme.symbol_value)) in + match Ogre.eval (Ogre.collect field) doc with + | Ok syms -> syms + | Error err -> + failwithf "Arm_target: broken file specification: %s" + (Error.to_string_hum err) () + +module Encodings = struct + let empty = Map.empty (module Bitvec_order) + + let symbols_encoding spec = + symbol_values spec |> + Seq.fold ~init:empty ~f:(fun symbols (addr,value) -> + let addr = Bitvec.M32.int64 addr in + Map.add_exn symbols ~key:addr + ~data:(match Int64.(value land 1L) with + | 0L -> llvm_a32 + | _ -> llvm_a64)) + + let slot = KB.Class.property CT.Unit.cls + ~package "symbols-encodings" @@ + KB.Domain.flat "encodings" + ~empty + ~equal:(Map.equal CT.Language.equal) + + let () = + let open KB.Syntax in + KB.promise slot @@ fun label -> + KB.collect Image.Spec.slot label >>| + symbols_encoding +end + +let (>>=?) x f = x >>= function + | None -> !!CT.Language.unknown + | Some x -> f x + +let compute_encoding_from_symbol_table default label = + KB.collect CT.Label.unit label >>=? fun unit -> + KB.collect CT.Label.addr label >>=? fun addr -> + KB.collect Encodings.slot unit >>= fun encodings -> + KB.return @@ match Map.find encodings addr with + | Some x -> x + | None -> default + +(* here less than means: was introduced before *) +let (<) t p = not (CT.Target.belongs p t) +let (<=) t p = CT.Target.equal t p || t < p +let (>=) = CT.Target.belongs + +let is_arm = CT.Target.belongs parent + +let before_thumb2 t = + t < LE.v6t2 || + t < EB.v6t2 + +let is_64bit t = + t >= LE.v8a || + t >= EB.v8a || + t >= Bi.v8a + +let guess_encoding label target = + if is_arm target then + if before_thumb2 target + then compute_encoding_from_symbol_table llvm_a32 label + else KB.return @@ + if is_64bit target then llvm_a64 else llvm_a32 + else KB.return CT.Language.unknown + + +let enable_decoder () = + let open KB.Syntax in + register llvm_a32 "armv7"; + register llvm_t32 "thumbv7"; + register llvm_a64 "aarch64"; + KB.promise CT.Label.encoding @@ fun label -> + CT.Label.target label >>= guess_encoding label + let load () = enable_loader (); - enable_arch () + enable_arch (); + enable_decoder () diff --git a/lib/bap/bap.mli b/lib/bap/bap.mli index 1206b75e62..2a490fe09b 100644 --- a/lib/bap/bap.mli +++ b/lib/bap/bap.mli @@ -5472,6 +5472,20 @@ module Std : sig (** [segment_of_symbol image sym] a segment to which [sym] belongs.*) val segment_of_symbol : t -> symbol -> segment + (** Interface to the image specification. + + @since 2.2.0 + *) + module Spec : sig + + (** [from_arch x] constructs a minimal specification + for the given architecture [x]. *) + val from_arch : arch -> Ogre.doc + + (** the slot to access the specification of the uni *) + val slot : (Theory.Unit.cls, Ogre.doc) KB.slot + end + (** Image Segments. Segment is a contiguous region of memory that has permissions. The same as segment in ELF. *) @@ -6005,9 +6019,308 @@ module Std : sig control flow graph, and represents the latter as a table of blocks. *) module Disasm_expert : sig + + + (** The interface for custom backends. + + This is an OCaml interface for defining custom disassembling + backends in pure OCaml. An alternative interface in C++ can + be found at disasm.hpp and disasm.h. + + The interface is pretty low-level and mimics one-to-one the + existing C interface between OCaml and the C/C++ disassemblers + backends, which, in turn, are optimized for performance. + + The [Basic.custom] function wraps the backend interface and + enables seamless integration with the existing [Basic.t] + interface. To make the custom [backend] available for the your + [encoding], use [Basic.register encoding] function to register + a constructor that uses [Basic.custom], e.g., + + {[ + let () = Basic.register encoding @@ fun target -> + let dis = create_custom target in + Ok (Basic.custom ?target encoding backend) + ]} + + where [create_custom] is a user function that creates + the custom backend and [target] contains the detailed + information about the target system. + + The [Basic.lookup] function could be used then to lazily + create the disassembler for the given [encoding], [target] + pair. The constructor will be called only once for each pair. + + @since 2.2.0 + *) + module Backend : sig + + (** possible semantic predicates for instructions *) + type predicate = + | Is_true + | Is_invalid + | Is_return + | Is_call + | Is_barrier + | Is_terminator + | Is_branch + | Is_indirect_branch + | Is_conditional_branch + | Is_unconditional_branch + | May_affect_control_flow + | May_store + | May_load + [@@deriving compare, sexp] + + + (** operand types *) + type op = + | Reg (** a register *) + | Imm (** an integer immediate *) + | Fmm (** a floating-point immediate *) + | Insn (** a sub-instruction *) + [@@deriving compare, sexp] + + + (** The backend interface. + + The backend is a simple automaton that disassembles + instructions and pushes them into the queue. It runs until it + either hits an instruction that matches with one of the + previously set predicates or if it either hits an invalid + instruction or runs out of the bounds of the specified + memory region. On the high level the algorithm of the [run] + function can be described with the following pseudocode. + + 1. disassemble instruction + 2. push result into the queue + 3. update the offset + 4. if exists predicate p such that p(insn) + or off >= length(data) + then stop + else goto 1. + + If it is impossible to decode the given sequence of bytes, + then an invalid instruction is pushed into the queue and + disassembling continues on the next offset. + + Predicates enables fine control over the behavior of the + disassembler. For example, the [Is_true] predicate is always + [true] disassembler will stop after each instruction. The + backend is not required to support all predicates, only + [Is_true] and [Is_invalid] are required. + + The state of the disassembler includes the queue of + disassembled instructions, the last disassembled + instruction, the set of predicates, and the current + offset. At the beginning the queue and the set of predicates + are empty, the offset is zero, and the last disassembled + instruction is invalid. + + To minimize allocations, opcodes and register names are + represented as offsets in the corresponding string + tables. + + *) + module type S = sig + + (** an abs *) + type t + + (** [delete d] is called when the disassembler is no longer needed. + + It is safe now to free any data related with the disassembler. + *) + val delete : t -> unit + + + (** [set_memory dis addr data ~off ~len] sets the current memory region. + + Sets the memory region accessible by disassembler to a + substring of [data] starting at the offset [off] and + having the length [len] with the first byte at [off] + having the address [addr]. + + Parameters [off] and [len] must be non-negative + numbers. The [offset dis] shall be equal to [0] after this + function is executed. + *) + val set_memory : t -> int64 -> Bigstring.t -> off:int -> len:int -> unit + + (** [store_predicates dis on_off] turns predicate storage on or off. + + When it is [off] it is not required to store semantic + predicates for each instruction in the queue, only for the + last disassembled one. + It is [off] by default. + *) + val store_predicates : t -> bool -> unit + + (** [store_asm_string dis on_off] turns assembly string + storage on or off. + + When it is [off] it is not required to store assembly + strings for each instruction in the queue, only for the + last disassembled one. + + It is [off] by default. *) + val store_asm_string : t -> bool -> unit + + (** [insn_table dis] returns a string table for opcodes. + + The table contains a sequence of null-terminated strings. + *) + val insn_table : t -> Bigstring.t + + (** [reg_table dis] returns a string table for register names. + + The table contains a sequence of null-terminated strings. + *) + val reg_table : t -> Bigstring.t + + (** [predicates_clear dis] clears the set of predicates. *) + val predicates_clear : t -> unit + + (** [predicates_push dis p] adds [p] to the set of predicates. + + precondition: [is_supported dis p]. + *) + val predicates_push : t -> predicate -> unit + + (** [is_supported dis p] is [true] if [dis] supports [p].*) + val is_supported : t -> predicate -> bool + + (** [set_offset dis off] sets the current offset to [off]. *) + val set_offset : t -> int -> unit + + (** [offset dis] is the current offset. *) + val offset : t -> int + + (** [run dis] runs the disassembler. + + The disassembler runs until it hits an instruction that + matches one of the predicates in the disassemblers current + set of predicates or it runs out of the boundaries of the + currently specified memory region. + + See the module description for the more detailed + description of the backend algorithm. + *) + val run : t -> unit + + + (** [insns_clear dis] clears the disassembler instructions queue. *) + val insns_clear : t -> unit + + (** [insns_size dis] the length of the instruction queue. *) + val insns_size : t -> int + + (** {3 Instructions} + + Each operation in this section takes a parameter labeled + with [insn] that designates the position of the + instruction in the queue, with [0] being the first + disassembled instruction and [insn_size dis - 1] being the + last disassembled. + *) + (** [insn_size dis ~insn:n] the [n]th instruction length. *) + val insn_size : t -> insn:int -> int + + (** [insn_name dis ~insn:n] the [n]th instruction name. *) + val insn_name : t -> insn:int -> int + + (** [insn_name dis ~insn:n] the [n]th instruction opcode. + + The opcode name is represented as an offset to the + [insn_table dis] string table in which each element is a + null-terminated string. + *) + val insn_code : t -> insn:int -> int + + (** [insn_offset dis ~insn:n] the offset of [n]th instruction. *) + val insn_offset : t -> insn:int -> int + + (** [insn_offset dis ~insn:n] the [n]th instruction assembly + string length. *) + val insn_asm_size : t -> insn:int -> int + + (** [insn_asm_copy dis ~insn:n data] copies the assembly + string of the [n]th instruction. *) + val insn_asm_copy : t -> insn:int -> Bytes.t -> unit + + (** [insn_satisfies dis ~insn:n p] is [true] if + the [n]th instruction satisfies the predicate [p]. *) + val insn_satisfies : t -> insn:int -> predicate -> bool + + (** [insn_ops_size dis ~insn:n] the number of operands. *) + val insn_ops_size : t -> insn:int -> int + + + (** {4 Instruction Operands} + + The following function accesses operands of [n]'th + instruction. Each operand is referenced by its position + [m] with [0] being the first operand (if such exists) and + [insn_ops_size dis - 1] being the last operand. + + The operand type is denoted with the [op] type. + *) + (** [insn_op_type dis ~insn:n ~oper:m] the [m]th operand type. *) + val insn_op_type : t -> insn:int -> oper:int -> op + + (** [insn_op_reg_name dis ~insn:n ~oper:m] the register name. + + Returns the register name of the operand [m]. The name is + represented as an offset to the [reg_table dis], which is + a string table of null-terminated strings. + + Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] + *) + val insn_op_reg_name : t -> insn:int -> oper:int -> int + + (** [insn_op_reg_name dis ~insn:n ~oper:m] the register code. + + Returns the register code of the operand [m]. The code is + a unique number identifying the register (could be the + same as [insn_op_reg_name]. + + Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] + *) + val insn_op_reg_code : t -> insn:int -> oper:int -> int + + + (** [insn_op_imm_value dis ~insn:n ~oper:m] the immediate value. + + Returns the value of the operand [m]. + + Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] + *) + val insn_op_imm_value : t -> insn:int -> oper:int -> int64 + + (** [insn_op_imm_small_value dis ~insn:n ~oper:m] the immediate value. + + If the value [v] of the operand [m] is strictly greater + than [Int.min_val] and is strictly less than [Int.max_val] + then returns [v] otherwise returns [Int.min_val] or [Int.max_val]. + + Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] + *) + val insn_op_imm_small_value : t -> insn:int -> oper:int -> int + + + (** [insn_op_fmm_value] the floating-point immediate value. + + Returns the value of the operand [m]. + + Precondition: [insn_op_type dis ~insn:n ~oper:m = Fmm] *) + val insn_op_fmm_value : t -> insn:int -> oper:int -> float + end + + end + (** Basic disassembler. - This is a target agnostic basic low-level disassembler. *) + This is a target agnostic basic low-level machine code disassembler. *) module Basic : sig (** predicate to drive the disassembler *) type pred = [ @@ -6080,6 +6393,55 @@ module Std : sig be made the same. *) type (+'a,+'k,'s,'r) state + + (** [register encoding constructor] registers a disassembler + [constructor] for the given [encoding]. + + The constructor receives the [target] value that + further specifies the details of the target system, e.g., + a cpu model, limitiations on the instruction set, etc. + + The constructor commonly uses {!create} and passes the + backend and target specific options to it. It can also use + the {!custom} function to create its own + backend. Alternatively, the {!lookup} function could be used + to delegate the decoding to another encoder. + *) + val register : Theory.language -> + (Theory.target -> (empty,empty) t Or_error.t) -> + unit + + (** [lookup target encoding] returns the disassembler for the + specified [target] and [encoding], creates one if necessary. + + Returns an error if there is no constructor for the given + encoding registered (via the {!register} function) or if the + constructor itself fails to create a disassembler. *) + val lookup : Theory.target -> Theory.language -> (empty,empty) t Or_error.t + + (** [create ?debug_level ?cpu ~backend target] creates the + disassmbler from one of the C-level backends. + + The parameters are backend-specific and are commonly + set by the target support plugins via the {!register} + function, therefore the [create] function should only be used + to register a new target. Use {!lookup} to get an appropriate + disassembler for your target/encoding. *) + val create : + ?debug_level:int -> + ?cpu:string -> + ?backend:string -> + string -> (empty, empty) t Or_error.t + + + (** [custom target encoding backend disassembler] creates a + custom backend for the given [target] and [encoding]. + + This function is commonly called by the constructor + function registered with the {!register} function. *) + val custom : Theory.target -> Theory.language -> + (module Backend.S with type t = 'a) -> 'a -> (empty,empty) t + (** [with_disasm ?debug_level ?cpu ~backend ~f target] creates a disassembler passing all options to [create] function and applies function [f] to it. Once [f] is evaluated the @@ -6088,18 +6450,6 @@ module Std : sig ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> f:((empty, empty) t -> 'a Or_error.t) -> 'a Or_error.t - (** [create ?debug_level ?cpu ~backend target] creates a - disassembler for the specified [target]. All parameters are - backend specific, consult the concrete backend for more - information. In general, the greater [debug_level] is, the - more debug information will be outputted by a backend. To - silent backend set it [0]. This is a default value. Example: - - [create ~debug_level:3 ~backend:"llvm" "x86_64" ~f:process] - *) - val create : ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> - (empty, empty) t Or_error.t - (** [close d] closes a disassembler [d]. *) val close : (_,_) t -> unit @@ -9435,10 +9785,6 @@ module Std : sig @since 2.2.0 *) val specification : t -> Ogre.doc - (** the slot to access the specification of a unit. - @since 2.2.0 *) - val specification_slot : (Theory.Unit.cls, Ogre.Doc.t) KB.slot - (** [state project] returns the core state of the [project]. @since 2.0.0 *) diff --git a/lib/bap/bap_project.ml b/lib/bap/bap_project.ml index 0726af517e..77dae3bc55 100644 --- a/lib/bap/bap_project.ml +++ b/lib/bap/bap_project.ml @@ -23,65 +23,10 @@ let query doc attr = (Error.to_string_hum err) () | Ok bias -> bias -let rec guess_arch t = - if Theory.Target.is_unknown t then `unknown - else - Theory.Target.name t |> - KB.Name.unqualified |> - Arch.of_string |> function - | Some arch -> arch - | None -> guess_arch (Theory.Target.parent t) - -module Spec = struct - module Fact = Ogre.Make(KB) - open Fact.Syntax - - type KB.Conflict.t += Spec_inconsistency of Error.t - - let domain = KB.Domain.flat "spec" - ~empty:Ogre.Doc.empty - ~inspect:Ogre.Doc.sexp_of_t - ~join:(fun d1 d2 -> match Ogre.Doc.merge d1 d2 with - | Ok d -> Ok d - | Error err -> - Error (Spec_inconsistency err)) - ~equal:(fun d1 d2 -> Ogre.Doc.compare d1 d2 = 0) - - let slot = KB.Class.property Theory.Unit.cls "unit-spec" domain - ~package - ~persistent:(KB.Persistent.of_binable (module struct - type t = Ogre.Doc.t [@@deriving bin_io] - end)) - - let () = KB.Conflict.register_printer @@ function - | Spec_inconsistency err -> - Some (Error.to_string_hum err) - | _ -> None - - let init arch = - let module Field = Image.Scheme in - let open Ogre.Syntax in - let bits = Int64.of_int (Size.in_bits (Arch.addr_size arch)) in - let statements = Ogre.all [ - Ogre.provide Field.arch (Arch.to_string arch); - Ogre.provide Field.bits bits; - Ogre.provide Field.format "raw"; - Ogre.provide Field.is_little_endian @@ - match Arch.endian arch with - | LittleEndian -> true - | BigEndian -> false - ] in - match Ogre.exec statements Ogre.Doc.empty with - | Error err -> - failwithf "got a malformed ogre document: %s" - (Error.to_string_hum err) (); - | Ok doc -> doc -end - let target_of_spec spec = let open KB.Syntax in KB.Object.scoped Theory.Unit.cls @@ fun unit -> - KB.provide Spec.slot unit spec >>= fun () -> + KB.provide Image.Spec.slot unit spec >>= fun () -> KB.collect Theory.Unit.target unit let with_filename spec target data code path = @@ -95,10 +40,10 @@ let with_filename spec target data code path = if Memmap.contains data addr || Memmap.contains code addr then Theory.Unit.for_file path >>= fun unit -> - KB.provide Spec.slot unit spec >>= fun () -> + KB.provide Image.Spec.slot unit spec >>= fun () -> KB.provide Theory.Unit.bias unit bias >>= fun () -> KB.provide Theory.Unit.target unit target >>= fun () -> - KB.provide Spec.slot unit spec >>= fun () -> + KB.provide Image.Spec.slot unit spec >>= fun () -> KB.provide Theory.Unit.path unit (Some path) >>| fun () -> Some unit else KB.return None) @@ -225,7 +170,7 @@ module Input = struct ?(filename="") ?(code=Memmap.empty) ?(data=Memmap.empty) target () = { - arch=guess_arch target; file=filename; code; data; finish; + arch=`unknown; file=filename; code; data; finish; target; spec = Ogre.Doc.empty } @@ -236,7 +181,7 @@ module Input = struct target = Theory.Target.unknown; spec = match arch with | #Arch.unknown -> Ogre.Doc.empty - | arch -> Spec.init arch; + | arch -> Image.Spec.from_arch arch; } let loaders = String.Table.create () @@ -321,7 +266,7 @@ module Input = struct let section = Value.create Image.section "bap.user" in let code = Memmap.add Memmap.empty mem section in let data = Memmap.empty in - let spec = Spec.init arch in + let spec = Image.Spec.from_arch arch in {arch; data; code; file = filename; finish = ident; spec; target} let binary ?base arch ~filename = @@ -392,7 +337,7 @@ let unused_options = alternative" name)) let empty target = { - arch = guess_arch target; + arch = `unknown; target; spec = Ogre.Doc.empty; state = State.empty; @@ -472,7 +417,6 @@ let create | exn -> Or_error.of_exn ~backtrace:`Get exn let specification = spec -let specification_slot = Spec.slot let symbols {symbols} = Lazy.force symbols let disasm {disasm} = Lazy.force disasm diff --git a/lib/bap/bap_project.mli b/lib/bap/bap_project.mli index 405e28e8ce..ac111fb525 100644 --- a/lib/bap/bap_project.mli +++ b/lib/bap/bap_project.mli @@ -32,8 +32,6 @@ val create : val arch : t -> arch val target : t -> Theory.Target.t val specification : t -> Ogre.doc -val specification_slot : (Theory.Unit.cls, Ogre.Doc.t) KB.slot - val program : t -> program term val with_program : t -> program term -> t val symbols : t -> symtab @@ -56,7 +54,6 @@ module State : sig type t = state val disassembly : t -> Bap_disasm_driver.state val subroutines : t -> Bap_disasm_calls.t - val slot : (Theory.Unit.cls, state) KB.slot end diff --git a/lib/bap_core_theory/bap_core_theory.mli b/lib/bap_core_theory/bap_core_theory.mli index 6b58ef2a83..a94afc904d 100644 --- a/lib/bap_core_theory/bap_core_theory.mli +++ b/lib/bap_core_theory/bap_core_theory.mli @@ -1070,28 +1070,36 @@ module Theory : sig (** label is an object of the program class. *) type label = program KB.Object.t - (** The target execution system. *) + (** The target execution system. + @since 2.2.0 *) type target - (** The ordering of the bytes. *) + (** The ordering of the bytes. + @since 2.2.0 *) type endianness - (** The operating system *) + (** The operating system. + @since 2.2.0 *) type system - (** The application binary interface *) + (** The application binary interface. + @since 2.2.0 *) type abi - (** The floating-point ABI *) + (** The floating-point ABI. + @since 2.2.0*) type fabi - (** The file type *) + (** The file type. + @since 2.2.0 *) type filetype - (** source to code transformer *) + (** source to code transformer. + @since 2.2.0*) type compiler - (** the name of the code encoding *) + (** the name of the code encoding. + @since 2.2.0 *) type language (** The semantics of programs. @@ -1100,12 +1108,14 @@ module Theory : sig program produces, so effectively [Program.Semantics = Effect], but we reexport it in a separate module here, to separate the concerns. + + @since 2.2.0 (was {!Program.Semantics} before that *) module Semantics : sig type cls = Effect.cls type t = unit Effect.t - (** the cl program semantics values. *) + (** the class of program semantics values. *) val cls : (cls, unit Effect.sort) Knowledge.cls (** the slot to store program semantics. *) @@ -1126,11 +1136,12 @@ module Theory : sig val cls : (program,unit) KB.cls module Semantics = Semantics + [@@deprecated "[since 2020-10] use [Semantics] (without Program)"] + include Knowledge.Value.S with type t := t end - (** The source code artifact of a compilation unit. Contains the information about the source code of a program @@ -1417,6 +1428,8 @@ module Theory : sig in the knowledge base is partitioned into units, so that each instruction belongs to at most one code unit, see the {!Label.unit} property. + + @since 2.2.0 *) module Unit : sig @@ -1528,7 +1541,10 @@ module Theory : sig (** the program encoding. - The language used to encode the program. *) + The language used to encode the program. + + @since 2.2.0 + *) val encoding : (program, language) KB.slot (** possible aliases under which the label might be known. @@ -1622,7 +1638,10 @@ module Theory : sig The enumerated type had to be declared before used and is commonly referenced as a module declared constant. It is possible, however to reference the enumerated type value using - its string representation, via the [read] function. *) + its string representation, via the [read] function. + + @since 2.2.0 + *) module Enum : sig (** The enumerated type interface *) @@ -1681,7 +1700,9 @@ module Theory : sig module Make() : S end - (** The source code language. *) + (** The source code language. + + @since 2.2.0 *) module Language : Enum.S with type t = language (** Defines how multibyte words are stored in the memory. @@ -1690,7 +1711,8 @@ module Theory : sig there is an infinite number of variants of the byte and word sizes, but the two orderings are the most common: little and big endian. More orderings could be declared when necessary. - *) + + @since 2.2.0 *) module Endianness : sig include Enum.S with type t = endianness (** In the big endian ordering the most significant byte of the @@ -1701,7 +1723,6 @@ module Theory : sig the word is stored at the largest address. *) val le : endianness - (** In the bi-endian order the endianness is essentially unspecified and depends on the execution context, e.g., on the status register or memory page descriptor. *) @@ -1709,13 +1730,16 @@ module Theory : sig end - (** The Operating System.*) + (** The Operating System. + @since 2.2.0 *) module System : Enum.S with type t = system - (** The Application Binary Interface name *) + (** The Application Binary Interface name. + @since 2.2.0 *) module Abi : Enum.S with type t = abi - (** The Application Floating-point Binary Interface name *) + (** The Application Floating-point Binary Interface name. + @since 2.2.0 *) module Fabi : Enum.S with type t = fabi (** Information about the compiler. @@ -1723,6 +1747,8 @@ module Theory : sig A compiler is a translator that was used to translate the code in this unit from the source to the target representation. + + @since 2.2.0 *) module Compiler : sig include Base.Comparable.S with type t = compiler diff --git a/lib/bap_disasm/bap_disasm_backend_types.ml b/lib/bap_disasm/bap_disasm_backend_types.ml new file mode 100644 index 0000000000..f892eb1fcd --- /dev/null +++ b/lib/bap_disasm/bap_disasm_backend_types.ml @@ -0,0 +1,56 @@ +open Core_kernel + +type predicate = + | Is_true + | Is_invalid + | Is_return + | Is_call + | Is_barrier + | Is_terminator + | Is_branch + | Is_indirect_branch + | Is_conditional_branch + | Is_unconditional_branch + | May_affect_control_flow + | May_store + | May_load +[@@deriving compare, sexp] + +type op = + | Reg + | Imm + | Fmm + | Insn +[@@deriving compare, sexp] + +module type S = sig + type t + val delete : t -> unit + val set_memory : t -> int64 -> Bigstring.t -> off:int -> len:int -> unit + val store_predicates : t -> bool -> unit + val store_asm_string : t -> bool -> unit + val insn_table : t -> Bigstring.t + val reg_table : t -> Bigstring.t + val predicates_clear : t -> unit + val predicates_push : t -> predicate -> unit + val is_supported : t -> predicate -> bool + val set_offset : t -> int -> unit + val offset : t -> int + val run : t -> unit + val insns_clear : t -> unit + val insns_size : t -> int + val insn_size : t -> insn:int -> int + val insn_name : t -> insn:int -> int + val insn_code : t -> insn:int -> int + val insn_offset : t -> insn:int -> int + val insn_asm_size : t -> insn:int -> int + val insn_asm_copy : t -> insn:int -> Bytes.t -> unit + val insn_satisfies : t -> insn:int -> predicate -> bool + val insn_ops_size : t -> insn:int -> int + val insn_op_type : t -> insn:int -> oper:int -> op + val insn_op_reg_name : t -> insn:int -> oper:int -> int + val insn_op_reg_code : t -> insn:int -> oper:int -> int + val insn_op_imm_value : t -> insn:int -> oper:int -> int64 + val insn_op_imm_small_value : t -> insn:int -> oper:int -> int + val insn_op_fmm_value : t -> insn:int -> oper:int -> float +end diff --git a/lib/bap_disasm/bap_disasm_basic.ml b/lib/bap_disasm/bap_disasm_basic.ml index f51e32a924..cc7cd7dd44 100644 --- a/lib/bap_disasm/bap_disasm_basic.ml +++ b/lib/bap_disasm/bap_disasm_basic.ml @@ -3,10 +3,11 @@ open Regular.Std open Bap_types.Std open Bap_core_theory open Or_error +open Bap_disasm_backend_types module Kind = Bap_insn_kind module Mem = Bap_memory -module C = Bap_disasm_prim +module Prim = Bap_disasm_prim type empty type asm @@ -42,6 +43,43 @@ type reg = reg_info oper [@@deriving bin_io, compare, sexp] type imm = imm_info oper [@@deriving bin_io, compare, sexp] type fmm = float oper [@@deriving bin_io, compare, sexp] +module C : sig + include S + val create : (module S with type t = 'a) -> 'a -> t +end = struct + type t = D : (module S with type t = 'd) * 'd -> t + let create c d = D (c,d) + let delete (D ((module C),d)) = C.delete d + let set_memory (D ((module C),d)) = C.set_memory d + let store_predicates (D ((module C),d)) = C.store_predicates d + let store_asm_string (D ((module C),d)) = C.store_asm_string d + let insn_table (D ((module C),d)) = C.insn_table d + let reg_table (D ((module C),d)) = C.reg_table d + let predicates_clear (D ((module C),d)) = C.predicates_clear d + let predicates_push (D ((module C),d)) = C.predicates_push d + let is_supported (D ((module C),d)) = C.is_supported d + let set_offset (D ((module C),d)) = C.set_offset d + let offset (D ((module C),d)) = C.offset d + let run (D ((module C),d)) = C.run d + let insns_clear (D ((module C),d)) = C.insns_clear d + let insns_size (D ((module C),d)) = C.insns_size d + let insn_size (D ((module C),d)) = C.insn_size d + let insn_name (D ((module C),d)) = C.insn_name d + let insn_code (D ((module C),d)) = C.insn_code d + let insn_offset (D ((module C),d)) = C.insn_offset d + let insn_asm_size (D ((module C),d)) = C.insn_asm_size d + let insn_asm_copy (D ((module C),d)) = C.insn_asm_copy d + let insn_satisfies (D ((module C),d)) = C.insn_satisfies d + let insn_ops_size (D ((module C),d)) = C.insn_ops_size d + let insn_op_type (D ((module C),d)) = C.insn_op_type d + let insn_op_reg_name (D ((module C),d)) = C.insn_op_reg_name d + let insn_op_reg_code (D ((module C),d)) = C.insn_op_reg_code d + let insn_op_imm_value (D ((module C),d)) = C.insn_op_imm_value d + let insn_op_imm_small_value (D ((module C),d)) = C.insn_op_imm_small_value d + let insn_op_fmm_value (D ((module C),d)) = C.insn_op_fmm_value d +end + + module Table = struct (* Bigstring.length is very slow... we should report a bug to the mantis. They need to add "noalloc" to it, otherwise on each call @@ -72,28 +110,34 @@ module Table = struct Bytes.to_string dst) end + type disassembler = { - dd : int; + dd : C.t; insn_table : Table.t; reg_table : Table.t; mutable users : int; } -let last_id = ref 0 -let disassemblers = Hashtbl.create (module String) - - - type dis = { name : string; + enc : string; asm : bool; kinds : bool; } +type ('a,'k) t = dis + +let last_id = ref 0 +let disassemblers = Hashtbl.create (module String) +type constructor = Theory.target -> (empty,empty) t Or_error.t +let constructors : (Theory.language, constructor) Hashtbl.t= + Hashtbl.create (module Theory.Language) + + let get {name} = match Hashtbl.find disassemblers name with - | None -> - failwith "Trying to access a closed disassembler" | Some d -> d + | None -> invalid_argf "Trying to access a closed disassembler %s" + name () let (!!) h = (get h).dd @@ -259,18 +303,18 @@ end type op = Op.t [@@deriving bin_io, compare, sexp] -let cpred_of_pred : pred -> C.pred = function - | `Valid -> C.Is_true - | `Conditional_branch -> C.Is_conditional_branch - | `Unconditional_branch -> C.Is_unconditional_branch - | `Indirect_branch -> C.Is_indirect_branch - | `Return -> C.Is_return - | `Call -> C.Is_call - | `Barrier -> C.Is_barrier - | `Terminator -> C.Is_terminator - | `May_affect_control_flow -> C.May_affect_control_flow - | `May_store -> C.May_store - | `May_load -> C.May_load +let cpred_of_pred = function + | `Valid -> Is_true + | `Conditional_branch -> Is_conditional_branch + | `Unconditional_branch -> Is_unconditional_branch + | `Indirect_branch -> Is_indirect_branch + | `Return -> Is_return + | `Call -> Is_call + | `Barrier -> Is_barrier + | `Terminator -> Is_terminator + | `May_affect_control_flow -> May_affect_control_flow + | `May_store -> May_store + | `May_load -> May_load module Insn = struct type ins_info = { @@ -300,7 +344,6 @@ module Insn = struct let equal x y = Kind.compare x y = 0 in List.mem ~equal op.kinds x - let create ~asm ~kinds dis ~insn = let code = C.insn_code !!dis ~insn in let name = @@ -323,11 +366,11 @@ module Insn = struct let opers = Array.init (C.insn_ops_size !!dis ~insn) ~f:(fun oper -> match C.insn_op_type !!dis ~insn ~oper with - | C.Reg -> Op.Reg Reg.(create dis ~insn ~oper) - | C.Imm -> Op.Imm Imm.(create dis ~insn ~oper) - | C.Fmm -> Op.Fmm Fmm.(create dis ~insn ~oper) - | C.Insn -> assert false) in - {code; name; asm; kinds; opers; encoding = dis.name } + | Reg -> Op.Reg Reg.(create dis ~insn ~oper) + | Imm -> Op.Imm Imm.(create dis ~insn ~oper) + | Fmm -> Op.Fmm Fmm.(create dis ~insn ~oper) + | Insn -> assert false) in + {code; name; asm; kinds; opers; encoding = dis.enc } let encoding x = x.encoding @@ -437,7 +480,7 @@ let with_preds s (ps : pred list) = else begin C.predicates_clear !!(s.dis); Preds.iter ps ~f:(add); - C.predicates_push !!(s.dis) C.Is_invalid; + C.predicates_push !!(s.dis) Is_invalid; end; {s with current = {s.current with preds = ps}} @@ -467,7 +510,7 @@ let step s data = let {asm; kinds} = s.dis in let insns = Array.init n ~f:(fun insn -> begin let is_valid = - not(C.insn_satisfies !!(s.dis) ~insn C.Is_invalid) in + not(C.insn_satisfies !!(s.dis) ~insn Is_invalid) in insn_mem s ~insn, Option.some_if is_valid (Insn.create ~asm ~kinds s.dis ~insn) @@ -476,7 +519,7 @@ let step s data = if stop then match s.stopped with | Some f -> f s data | None -> s.return data - else if C.insn_satisfies !!(s.dis) ~insn C.Is_invalid + else if C.insn_satisfies !!(s.dis) ~insn Is_invalid then match s.invalid with | Some f -> f s (insn_mem s ~insn) data | None -> loop s data @@ -502,28 +545,76 @@ let back s data = | x :: xs -> x,xs in step { s with current; history} data +let init dd = { + dd; + insn_table = Table.create (C.insn_table dd); + reg_table = Table.create (C.reg_table dd); + users = 1; +} + +let make_name target encoding = + sprintf "%s-%s" + (Theory.Language.to_string encoding) + (Theory.Target.to_string target) + +let register encoding construct = + if Hashtbl.mem constructors encoding + then invalid_argf "A disassembler backend for the encoding %s \ + is already provided. Please, disable the old \ + one before registering a new one." + (Theory.Language.to_string encoding) (); + Hashtbl.add_exn constructors ~key:encoding ~data:(fun target -> + match construct target with + | Error _ as err -> err + | Ok dis -> + let name = make_name target encoding in + if name <> dis.name + then begin + let d = Hashtbl.find_exn disassemblers dis.name in + d.users <- d.users + 1; + Hashtbl.add_exn disassemblers ~key:name ~data:d + end; + Ok dis) + +let encoding_name encoding = + KB.Name.unqualified @@ Theory.Language.name encoding + +let lookup target encoding = + let name = make_name target encoding in + match Hashtbl.find disassemblers name with + | Some d -> + d.users <- d.users + 1; + Ok {name; asm=false; kinds=false; enc=encoding_name encoding} + | None -> match Hashtbl.find constructors encoding with + | None -> errorf "no disassembler for encoding %s" + (Theory.Language.to_string encoding) + | Some create -> create target + let create ?(debug_level=0) ?(cpu="") ?(backend="llvm") triple = let name = sprintf "%s-%s%s" backend triple cpu in match Hashtbl.find disassemblers name with | Some d -> d.users <- d.users + 1; - Ok {name; asm=false; kinds=false} + Ok {name; asm=false; kinds=false; enc=name} + | None -> match Prim.create ~backend ~triple ~cpu ~debug_level with + | n when n >= 0 -> + let disassembler = init @@ C.create (module Prim) n in + Hashtbl.add_exn disassemblers name disassembler; + Ok {name; asm = false; kinds = false; enc=name} + | -2 -> errorf "Unknown backend: %s" backend + | -3 -> errorf "Unsupported target: %s %s" triple cpu + | n -> errorf "Disasm.Basic: Unknown error %d" n + +let custom target encoding backend t = + let name = make_name target encoding in + match Hashtbl.find disassemblers name with + | Some d -> + d.users <- d.users + 1; + {name; asm=false; kinds=false; enc=encoding_name encoding} | None -> - let dd = match C.create ~backend ~triple ~cpu ~debug_level with - | n when n >= 0 -> Ok n - | -2 -> errorf "Unknown backend: %s" backend - | -3 -> errorf "Unsupported target: %s %s" triple cpu - | n -> errorf "Disasm.Basic: Unknown error %d" n in - dd >>= fun dd -> - let disassembler = { - dd; - insn_table = Table.create (C.insn_table dd); - reg_table = Table.create (C.reg_table dd); - users = 1; - } in + let disassembler = init @@ C.create backend t in Hashtbl.add_exn disassemblers name disassembler; - Ok {name; asm = false; kinds = false} - + {name; asm=false; kinds=false; enc=encoding_name encoding} let close dis = let disassembler = get dis in @@ -534,13 +625,10 @@ let close dis = C.delete disassembler.dd; end - let with_disasm ?debug_level ?cpu ?backend triple ~f = create ?debug_level ?cpu ?backend triple >>= fun dis -> f dis >>| fun res -> close dis; res -type ('a,'k) t = dis - let switch : ('a,'k,'s,'r) state -> ('a,'k) t -> ('a,'k,'s,'r) state = fun s dis -> {s with dis} let run ?backlog ?(stop_on=[]) ?invalid ?stopped ?hit dis ~return ~init mem = @@ -572,7 +660,7 @@ let insn_of_mem dis mem = let available_backends () = - C.backends_size () |> List.init ~f:C.backend_name + Prim.backends_size () |> List.init ~f:Prim.backend_name module Trie = struct diff --git a/lib/bap_disasm/bap_disasm_basic.mli b/lib/bap_disasm/bap_disasm_basic.mli index 499ffa9533..744792a464 100644 --- a/lib/bap_disasm/bap_disasm_basic.mli +++ b/lib/bap_disasm/bap_disasm_basic.mli @@ -2,6 +2,8 @@ open Core_kernel open Regular.Std open Bap_types.Std open Bap_core_theory +open Bap_disasm_backend_types + type mem = Bap_memory.t [@@deriving sexp_of] type kind = Bap_insn_kind.t [@@deriving compare, sexp] @@ -24,12 +26,20 @@ type full_insn = (asm,kinds) insn [@@deriving compare, sexp_of] type ('a,'k) t type (+'a,+'k,'s,'r) state +val register : Theory.language -> + (Theory.target -> (empty, empty) t Or_error.t) -> unit + +val lookup : Theory.target -> Theory.language -> (empty,empty) t Or_error.t +val custom : Theory.target -> Theory.language -> + (module S with type t = 'a) -> 'a -> (empty, empty) t +val create : ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> + (empty, empty) t Or_error.t + + val with_disasm : ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> f:((empty, empty) t -> 'a Or_error.t) -> 'a Or_error.t -val create : ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> - (empty, empty) t Or_error.t val close : (_,_) t -> unit diff --git a/lib/bap_disasm/bap_disasm_driver.ml b/lib/bap_disasm/bap_disasm_driver.ml index a8a7126563..d2f973bcd7 100644 --- a/lib/bap_disasm/bap_disasm_driver.ml +++ b/lib/bap_disasm/bap_disasm_driver.ml @@ -13,8 +13,15 @@ type full_insn = Dis.full_insn [@@deriving sexp_of] type insn = Insn.t [@@deriving sexp_of] type edge = [`Jump | `Cond | `Fall] [@@deriving compare] -type encoding = Theory.Language.t [@@deriving bin_io, compare, equal] -let unknown = Theory.Language.unknown +type encoding = { + coding : Theory.Language.t; + target : Theory.Target.t; +} [@@deriving bin_io, compare, equal] + +let unknown = { + coding = Theory.Language.unknown; + target = Theory.Target.unknown; +} type jump = { encoding : encoding; @@ -295,6 +302,12 @@ end = struct } mem end + +let pp_encoding ppf {target; coding} = + Format.fprintf ppf "%a-%a" + Theory.Target.pp target + Theory.Language.pp coding + let new_insn mem insn = let addr = Addr.to_bitvec (Memory.min_addr mem) in Theory.Label.for_addr addr >>= fun code -> @@ -302,10 +315,14 @@ let new_insn mem insn = KB.provide Dis.Insn.slot code (Some insn) >>| fun () -> code +let get_encoding label = + Theory.Label.target label >>= fun target -> + KB.collect Theory.Label.encoding label >>| fun coding -> + {coding; target} + let collect_dests code = - Theory.Label.target code >>= fun target -> - KB.collect Theory.Label.encoding code >>= fun encoding -> - KB.collect Theory.Program.Semantics.slot code >>= fun insn -> + KB.collect Theory.Semantics.slot code >>= fun insn -> + get_encoding code >>= fun encoding -> let init = { encoding; call = Insn.(is call insn); @@ -318,12 +335,14 @@ let collect_dests code = | Some dests -> Set.to_sequence dests |> KB.Seq.fold ~init ~f:(fun dest label -> - KB.collect Theory.Label.encoding label >>= fun encoding -> + get_encoding label >>= fun encoding -> KB.collect Theory.Label.addr label >>| function | Some d -> { dest with encoding; - resolved = Set.add dest.resolved (Word.code_addr target d) + resolved = + Set.add dest.resolved @@ + Word.code_addr encoding.target d } | None -> {dest with indirect=true; encoding}) >>= fun res -> @@ -335,7 +354,7 @@ let pp_addr_opt ppf = function let delay mem insn = new_insn mem insn >>= fun code -> - KB.collect Theory.Program.Semantics.slot code >>| fun insn -> + KB.collect Theory.Semantics.slot code >>| fun insn -> KB.Value.get Insn.Slot.delay insn |> function | None -> 0 | Some x -> x @@ -358,25 +377,22 @@ let classify_mem mem = | Some true -> (code,data,Set.add root addr) | _ -> (code,data,root)) -let create_disassembler language = - let name = Theory.Language.name language in - let backend = KB.Name.package name - and triple = KB.Name.unqualified name in - Dis.create ~backend triple +let create_disassembler {target; coding} = + Dis.lookup target coding -let switch lang s = - match create_disassembler lang with +let switch encoding s = + match create_disassembler encoding with | Error _ -> s | Ok dis -> Dis.switch s dis let rec next_encoding state current code = - if Theory.Language.equal unknown current + if Theory.Language.is_unknown current.coding then let addr = Memory.min_addr code in KB.Object.scoped Theory.Program.cls @@ fun obj -> KB.provide Theory.Label.addr obj (Some (Word.to_bitvec addr)) >>= fun () -> - KB.collect Theory.Label.encoding obj >>= fun encoding -> - if Theory.Language.equal unknown encoding + get_encoding obj >>= fun encoding -> + if Theory.Language.is_unknown encoding.coding then skip state addr code else KB.return encoding else KB.return current @@ -405,7 +421,7 @@ let scan_mem ~code ~data ~funs debt base : Machine.state KB.t = step d (Machine.stopped s encoding)) ~hit:(fun d mem insn s -> new_insn mem insn >>= fun label -> - KB.provide Theory.Label.encoding label encoding >>= fun () -> + KB.provide Theory.Label.encoding label encoding.coding >>= fun () -> collect_dests label >>= fun dests -> if Set.is_empty dests.resolved && not dests.indirect then @@ -517,21 +533,19 @@ let rec insert pos x xs = let execution_order stack = KB.List.fold stack ~init:[] ~f:(fun insns insn -> - KB.collect Theory.Program.Semantics.slot insn >>| fun s -> + KB.collect Theory.Semantics.slot insn >>| fun s -> match KB.Value.get Insn.Slot.delay s with | None -> insn::insns | Some d -> insert d insn insns) let always _ = KB.return true - - let with_disasm beg cfg f = Theory.Label.for_addr (Word.to_bitvec beg) >>= - KB.collect Theory.Label.encoding >>= fun language -> - match create_disassembler language with + get_encoding >>= fun encoding -> + match create_disassembler encoding with | Error _ -> KB.return (cfg,None) - | Ok dis -> f language dis + | Ok dis -> f encoding dis let explore ?entry:start ?(follow=always) ~block ~node ~edge ~init diff --git a/lib/bap_disasm/bap_disasm_driver.mli b/lib/bap_disasm/bap_disasm_driver.mli index a784750e42..e3d36d649c 100644 --- a/lib/bap_disasm/bap_disasm_driver.mli +++ b/lib/bap_disasm/bap_disasm_driver.mli @@ -3,7 +3,6 @@ open Bap_types.Std open Bap_image_std open Bap_knowledge open Bap_core_theory -module Dis = Bap_disasm_basic type state [@@deriving bin_io] type insns diff --git a/lib/bap_disasm/bap_disasm_prim.ml b/lib/bap_disasm/bap_disasm_prim.ml index 1ba0fa0781..596c7785aa 100644 --- a/lib/bap_disasm/bap_disasm_prim.ml +++ b/lib/bap_disasm/bap_disasm_prim.ml @@ -1,71 +1,29 @@ open Core_kernel - -(**/**) - -(** This interface mimics dism.h look there for a documentation. - - The only difference, is that interfaces adds new [imm_small_value] - that retrieves imm value of operand as an int (and thus is a - "noalloc" function). Values that don't fit, are represented as - [Int.max_val], and [Int.min_val]. (So this functions actually - narrows a bit a range of values representable by int type - min_val and - max_val are no longer included, since they're used as sentinels). - - All this functions are unsafe. They can cause segfaults or worse, - if used incorrectly. Don't use this interface, unless you're - perfectly sure what you're doing. -*) - +open Bap_disasm_backend_types type t = int -type pred = - | Is_true - | Is_invalid - | Is_return - | Is_call - | Is_barrier - | Is_terminator - | Is_branch - | Is_indirect_branch - | Is_conditional_branch - | Is_unconditional_branch - | May_affect_control_flow - | May_store - | May_load -[@@deriving compare, sexp] - -type op = - | Reg - | Imm - | Fmm - | Insn -[@@deriving compare, sexp] - -[@@@ocaml.warning "-3"] - -external create - : backend:string - -> triple:string - -> cpu:string - -> debug_level:int - -> t - = "bap_disasm_create_stub" "noalloc" +external create : + backend:string -> + triple:string -> + cpu:string -> + debug_level:int -> t = + "bap_disasm_create_stub" [@@noalloc] external backends_size : unit -> int = - "bap_disasm_backends_size_stub" "noalloc" + "bap_disasm_backends_size_stub" [@@noalloc] external backend_name : int -> string = "bap_disasm_backend_name_stub" -external delete : t -> unit = "bap_disasm_delete_stub" "noalloc" +external delete : t -> unit = "bap_disasm_delete_stub" [@@noalloc] external set_memory : t -> int64 -> Bigstring.t -> off:int -> len:int -> unit - = "bap_disasm_set_memory_stub" "noalloc" + = "bap_disasm_set_memory_stub" [@@noalloc] external store_predicates : t -> bool -> unit = - "bap_disasm_store_predicates_stub" "noalloc" + "bap_disasm_store_predicates_stub" [@@noalloc] external store_asm_string : t -> bool -> unit = - "bap_disasm_store_asm_strings_stub" "noalloc" + "bap_disasm_store_asm_strings_stub" [@@noalloc] external insn_table : t -> Bigstring.t = "bap_disasm_insn_table_stub" @@ -74,70 +32,68 @@ external reg_table : t -> Bigstring.t = "bap_disasm_reg_table_stub" external predicates_clear : t -> unit = - "bap_disasm_predicates_clear_stub" "noalloc" + "bap_disasm_predicates_clear_stub" [@@noalloc] -external predicates_push : t -> pred -> unit = - "bap_disasm_predicates_push_stub" "noalloc" +external predicates_push : t -> predicate -> unit = + "bap_disasm_predicates_push_stub" [@@noalloc] -external is_supported : t -> pred -> bool = - "bap_disasm_predicate_is_supported_stub" "noalloc" +external is_supported : t -> predicate -> bool = + "bap_disasm_predicate_is_supported_stub" [@@noalloc] external set_offset : t -> int -> unit = - "bap_disasm_set_offset_stub" "noalloc" + "bap_disasm_set_offset_stub" [@@noalloc] external offset : t -> int = - "bap_disasm_offset_stub" "noalloc" + "bap_disasm_offset_stub" [@@noalloc] external run : t -> unit = - "bap_disasm_run_stub" "noalloc" + "bap_disasm_run_stub" [@@noalloc] external insns_clear : t -> unit = - "bap_disasm_insns_clear_stub" "noalloc" + "bap_disasm_insns_clear_stub" [@@noalloc] external insns_size : t -> int = - "bap_disasm_insns_size_stub" "noalloc" + "bap_disasm_insns_size_stub" [@@noalloc] external insn_size : t -> insn:int -> int = - "bap_disasm_insn_size_stub" "noalloc" + "bap_disasm_insn_size_stub" [@@noalloc] external insn_name : t -> insn:int -> int = - "bap_disasm_insn_name_stub" "noalloc" + "bap_disasm_insn_name_stub" [@@noalloc] external insn_code : t -> insn:int -> int = - "bap_disasm_insn_code_stub" "noalloc" + "bap_disasm_insn_code_stub" [@@noalloc] external insn_offset : t -> insn:int -> int = - "bap_disasm_insn_offset_stub" "noalloc" + "bap_disasm_insn_offset_stub" [@@noalloc] external insn_asm_size : t -> insn:int -> int = - "bap_disasm_insn_asm_size_stub" "noalloc" + "bap_disasm_insn_asm_size_stub" [@@noalloc] external insn_asm_copy : t -> insn:int -> Bytes.t -> unit = - "bap_disasm_insn_asm_copy_stub" "noalloc" + "bap_disasm_insn_asm_copy_stub" [@@noalloc] -external insn_satisfies : t -> insn:int -> pred -> bool = - "bap_disasm_insn_satisfies_stub" "noalloc" +external insn_satisfies : t -> insn:int -> predicate -> bool = + "bap_disasm_insn_satisfies_stub" [@@noalloc] external insn_ops_size : t -> insn:int -> int = - "bap_disasm_insn_ops_size_stub" "noalloc" + "bap_disasm_insn_ops_size_stub" [@@noalloc] external insn_op_type : t -> insn:int -> oper:int -> op = - "bap_disasm_insn_op_type_stub" "noalloc" + "bap_disasm_insn_op_type_stub" [@@noalloc] external insn_op_reg_name : t -> insn:int -> oper:int -> int = - "bap_disasm_insn_op_reg_name_stub" "noalloc" + "bap_disasm_insn_op_reg_name_stub" [@@noalloc] external insn_op_reg_code : t -> insn:int -> oper:int -> int = - "bap_disasm_insn_op_reg_code_stub" "noalloc" + "bap_disasm_insn_op_reg_code_stub" [@@noalloc] external insn_op_imm_value : t -> insn:int -> oper:int -> int64 = "bap_disasm_insn_op_imm_value_stub" external insn_op_imm_small_value : t -> insn:int -> oper:int -> int = - "bap_disasm_insn_op_imm_small_value_stub" "noalloc" + "bap_disasm_insn_op_imm_small_value_stub" [@@noalloc] external insn_op_fmm_value : t -> insn:int -> oper:int -> float = "bap_disasm_insn_op_fmm_value_stub" - -(**/**) diff --git a/lib/bap_disasm/bap_disasm_rec.ml b/lib/bap_disasm/bap_disasm_rec.ml index 9f92bcd6c3..a468eaa3d4 100644 --- a/lib/bap_disasm/bap_disasm_rec.ml +++ b/lib/bap_disasm/bap_disasm_rec.ml @@ -119,7 +119,7 @@ let with_unit = KB.collect Theory.Label.addr label >>= function | Some p when Memory.contains mem @@ Word.create p width -> Theory.Unit.for_region ~lower ~upper >>= fun unit -> - KB.provide Arch.unit_slot unit arch >>| fun () -> + KB.provide Image.Spec.slot unit (Image.Spec.from_arch arch) >>| fun () -> Some unit | _ -> KB.return None) diff --git a/lib/bap_disasm/bap_disasm_std.ml b/lib/bap_disasm/bap_disasm_std.ml index 26ec320752..68bae23031 100644 --- a/lib/bap_disasm/bap_disasm_std.ml +++ b/lib/bap_disasm/bap_disasm_std.ml @@ -16,6 +16,7 @@ module Disasm_expert = struct module Linear = Bap_disasm_linear_sweep module Kind = Bap_insn_kind module Insn = Bap_disasm_basic.Insn + module Backend = Bap_disasm_backend_types end module Cfg = Bap_disasm_rec.Cfg diff --git a/lib/bap_image/bap_image.ml b/lib/bap_image/bap_image.ml index 2a7cef794e..4317f9262d 100644 --- a/lib/bap_image/bap_image.ml +++ b/lib/bap_image/bap_image.ml @@ -92,6 +92,66 @@ end type segment = Segment.t [@@deriving bin_io, compare, sexp] type symbol = Symbol.t [@@deriving bin_io,compare, sexp] +module Scheme = struct + open Ogre.Type + type addr = int64 + type size = int64 + type off = int64 + type value = int64 + type 'a region = {addr : int64; size: int64; info : 'a} + let region addr size info = {addr; size; info} + let void_region addr size = {addr; size; info = ()} + + let off = "off" %: int + let size = "size" %: int + let addr = "addr" %: int + let value = "value" %: int + let name = "name" %: str + let root = "root" %: int + let readable = "r" %: bool + let writable = "w" %: bool + let executable = "x" %: bool + let flag = "flag" %: bool + let fixup = "fixup" %: int + + let location () = scheme addr $ size + let declare name scheme f = Ogre.declare ~name scheme f + let named n scheme f = declare n (scheme $ name) f + let arch () = declare "arch" (scheme name) ident + let subarch () = declare "subarch" (scheme name) ident + let vendor () = declare "vendor" (scheme name) ident + let system () = declare "system" (scheme name) ident + let format () = declare "format" (scheme name) ident + let abi () = declare "abi" (scheme name) ident + let bits () = declare "bits" (scheme size) ident + let is_little_endian () = declare "is-little-endian" (scheme flag) ident + let is_executable () = declare "is-executable" (scheme flag) ident + let bias () = declare "bias" (scheme off) ident + let section () = declare "section" (location ()) void_region + let code_start () = declare "code-start" (scheme addr) ident + let entry_point () = declare "entry-point" (scheme addr) ident + let symbol_chunk () = declare "symbol-chunk" (location () $ root) region + let named_region () = named "named-region" (location ()) region + let named_symbol () = named "named-symbol" (scheme addr) (fun x y -> x,y) + let rwx scheme = scheme $ readable $ writable $ executable + let segment () = declare "segment" (location () |> rwx) + (fun addr size r w x -> {addr; size; info=(r,w,x)}) + let mapped () = declare "mapped" (location () $off) + (fun addr size off -> region addr size off) + + let code_region () = + declare "code-region" (scheme addr $ size $ off) Tuple.T3.create + + let relocation () = + declare "relocation" (scheme fixup $ addr) Tuple.T2.create + let external_reference () = + declare "external-reference" (scheme addr $ name) Tuple.T2.create + let base_address () = declare "base-address" (scheme addr) ident + + let symbol_value () = + declare "symbol-value" (scheme addr $ value) Tuple.T2.create +end + module Spec = struct type t = { arch : arch; @@ -101,6 +161,27 @@ module Spec = struct sections : unit region list; code : mapped region list; } [@@deriving bin_io, compare, sexp] + + let slot = Bap_ogre.slot + + let from_arch arch = + let module Field = Scheme in + let open Ogre.Syntax in + let bits = Int64.of_int (Size.in_bits (Arch.addr_size arch)) in + let statements = Ogre.all [ + Ogre.provide Field.arch (Arch.to_string arch); + Ogre.provide Field.bits bits; + Ogre.provide Field.format "raw"; + Ogre.provide Field.is_little_endian @@ + match Arch.endian arch with + | LittleEndian -> true + | BigEndian -> false + ] in + match Ogre.exec statements Ogre.Doc.empty with + | Error err -> + failwithf "got a malformed ogre document: %s" + (Error.to_string_hum err) (); + | Ok doc -> doc end @@ -162,10 +243,11 @@ let file = Value.Tag.register (module String) ~name:"file" ~uuid:"c119f700-4069-47ad-ba99-fc29791e0d47" -let specification = Value.Tag.register (module Bap_ogre.Doc) +let specification = Value.Tag.register (module Bap_ogre) ~name:"image-specification" ~uuid:"a0c98f1f-3693-412a-a11a-2b6c3f6935a7" + let mem_of_locn mem {addr;size} : mem Or_error.t = match Memory.view ~from:addr ~words:size mem with | Error err -> @@ -330,67 +412,6 @@ let register_loader ~name backend = let find_loader = Hashtbl.find backends -module Scheme = struct - open Ogre.Type - type addr = int64 - type size = int64 - type off = int64 - type value = int64 - type 'a region = {addr : int64; size: int64; info : 'a} - let region addr size info = {addr; size; info} - let void_region addr size = {addr; size; info = ()} - - let off = "off" %: int - let size = "size" %: int - let addr = "addr" %: int - let value = "value" %: int - let name = "name" %: str - let root = "root" %: int - let readable = "r" %: bool - let writable = "w" %: bool - let executable = "x" %: bool - let flag = "flag" %: bool - let fixup = "fixup" %: int - - let location () = scheme addr $ size - let declare name scheme f = Ogre.declare ~name scheme f - let named n scheme f = declare n (scheme $ name) f - let arch () = declare "arch" (scheme name) ident - let subarch () = declare "subarch" (scheme name) ident - let vendor () = declare "vendor" (scheme name) ident - let system () = declare "system" (scheme name) ident - let format () = declare "format" (scheme name) ident - let abi () = declare "abi" (scheme name) ident - let bits () = declare "bits" (scheme size) ident - let is_little_endian () = declare "is-little-endian" (scheme flag) ident - let is_executable () = declare "is-executable" (scheme flag) ident - let bias () = declare "bias" (scheme off) ident - let section () = declare "section" (location ()) void_region - let code_start () = declare "code-start" (scheme addr) ident - let entry_point () = declare "entry-point" (scheme addr) ident - let symbol_chunk () = declare "symbol-chunk" (location () $ root) region - let named_region () = named "named-region" (location ()) region - let named_symbol () = named "named-symbol" (scheme addr) (fun x y -> x,y) - let rwx scheme = scheme $ readable $ writable $ executable - let segment () = declare "segment" (location () |> rwx) - (fun addr size r w x -> {addr; size; info=(r,w,x)}) - let mapped () = declare "mapped" (location () $off) - (fun addr size off -> region addr size off) - - let code_region () = - declare "code-region" (scheme addr $ size $ off) Tuple.T3.create - - let relocation () = - declare "relocation" (scheme fixup $ addr) Tuple.T2.create - let external_reference () = - declare "external-reference" (scheme addr $ name) Tuple.T2.create - let base_address () = declare "base-address" (scheme addr) ident - - let symbol_value () = - declare "symbol-value" (scheme addr $ value) Tuple.T2.create -end - - module Derive = struct open Fact.Syntax open Scheme diff --git a/lib/bap_image/bap_image.mli b/lib/bap_image/bap_image.mli index 0c5f3edefb..669a488326 100644 --- a/lib/bap_image/bap_image.mli +++ b/lib/bap_image/bap_image.mli @@ -1,5 +1,6 @@ (** A loadable memory image of an executable. *) +open Bap_core_theory open Core_kernel open Regular.Std open Bap_types.Std @@ -44,6 +45,11 @@ val memory_of_symbol : t -> symbol -> mem * mem seq val symbols_of_segment : t -> segment -> symbol seq val segment_of_symbol : t -> symbol -> segment +module Spec : sig + val from_arch : arch -> Ogre.doc + val slot : (Theory.Unit.cls, Ogre.doc) KB.slot +end + module Segment : sig type t = segment include Regular.S with type t := t diff --git a/lib/bap_mips/bap_mips_target.ml b/lib/bap_mips/bap_mips_target.ml index baccec8b47..b5a51ae5e9 100644 --- a/lib/bap_mips/bap_mips_target.ml +++ b/lib/bap_mips/bap_mips_target.ml @@ -67,7 +67,7 @@ let mips64eb = define r64 Theory.Endianness.eb ~parent:mips64bi let enable_loader () = KB.Rule.(declare ~package "mips-target" |> - require Project.specification_slot |> + require Image.Spec.slot |> provide Theory.Unit.target |> comment "computes target from the OGRE specification"); let open KB.Syntax in @@ -81,7 +81,7 @@ let enable_loader () = | Error _ -> None,None | Ok info -> info in KB.promise Theory.Unit.target @@ fun unit -> - KB.collect Project.specification_slot unit >>| + KB.collect Image.Spec.slot unit >>| request_info >>| function | Some "mips", None -> mips32bi | Some "mips64",None -> mips64bi @@ -111,7 +111,28 @@ let map_mips () = | Some arch -> arch | None -> `unknown +module Dis = Disasm_expert.Basic + +let llvm_mips32 = Theory.Language.declare ~package "llvm-mips32" +let llvm_mips64 = Theory.Language.declare ~package "llvm-mips64" + +let register encoding triple = + Dis.register encoding @@ fun _ -> + Dis.create ~backend:"llvm" triple + +let enable_decoder () = + let open KB.Syntax in + register llvm_mips32 "mips"; + register llvm_mips64 "mips64"; + KB.promise Theory.Label.encoding @@ fun label -> + Theory.Label.target label >>| fun t -> + if Theory.Target.belongs parent t then + if Theory.Target.belongs mips32bi t + then llvm_mips32 + else llvm_mips64 + else Theory.Language.unknown let load () = enable_loader (); + enable_decoder (); map_mips () diff --git a/lib/bap_mips/bap_mips_target.mli b/lib/bap_mips/bap_mips_target.mli index bac8e5eebc..997a70ca89 100644 --- a/lib/bap_mips/bap_mips_target.mli +++ b/lib/bap_mips/bap_mips_target.mli @@ -26,6 +26,12 @@ val mips64eb : Theory.Target.t (** The big endian MIPS64 *) val mips64le : Theory.Target.t (** The little endian MIPS64 *) + +(** {2 MIPS encodings} *) + +val llvm_mips32 : Theory.language +val llvm_mips64 : Theory.language + (** [load ()] loads the knowledge base rules for the MIPS targets. This includes parsing the loader output and enabling backward diff --git a/lib/bap_powerpc/bap_powerpc_target.ml b/lib/bap_powerpc/bap_powerpc_target.ml index ab168ebff3..c000a7f6fd 100644 --- a/lib/bap_powerpc/bap_powerpc_target.ml +++ b/lib/bap_powerpc/bap_powerpc_target.ml @@ -81,7 +81,7 @@ let enable_loader () = | Error _ -> None,None | Ok info -> info in KB.promise Theory.Unit.target @@ fun unit -> - KB.collect Project.specification_slot unit >>| + KB.collect Image.Spec.slot unit >>| request_info >>| function | Some "powerpc", None -> powerpc32bi | Some "powerpc64",None -> powerpc64bi @@ -106,7 +106,28 @@ let map_powerpc () = | Some arch -> arch | None -> `unknown +module Dis = Disasm_expert.Basic + +let llvm_powerpc32 = Theory.Language.declare ~package "llvm-powerpc32" +let llvm_powerpc64 = Theory.Language.declare ~package "llvm-powerpc64" + +let register encoding triple = + Dis.register encoding @@ fun _ -> + Dis.create ~backend:"llvm" triple + +let enable_decoder () = + let open KB.Syntax in + register llvm_powerpc32 "powerpc"; + register llvm_powerpc64 "powerpc64"; + KB.promise Theory.Label.encoding @@ fun label -> + Theory.Label.target label >>| fun t -> + if Theory.Target.belongs parent t then + if Theory.Target.belongs powerpc32bi t + then llvm_powerpc32 + else llvm_powerpc64 + else Theory.Language.unknown let load () = enable_loader (); + enable_decoder (); map_powerpc () diff --git a/lib/bap_powerpc/bap_powerpc_target.mli b/lib/bap_powerpc/bap_powerpc_target.mli index a3c6edb7b6..a39c444573 100644 --- a/lib/bap_powerpc/bap_powerpc_target.mli +++ b/lib/bap_powerpc/bap_powerpc_target.mli @@ -26,6 +26,11 @@ val powerpc64eb : Theory.Target.t (** The big endian PowerPC64 *) val powerpc64le : Theory.Target.t (** The little endian PowerPC64 *) +(** {2 The PowerPC encodings} *) + +val llvm_powerpc32 : Theory.language +val llvm_powerpc64 : Theory.language + (** [load ()] loads the knowledge base rules for the PowerPC targets. This includes parsing the loader output and enabling backward diff --git a/lib/bap_types/bap_arch.ml b/lib/bap_types/bap_arch.ml index 6971a380da..3ee98b8f5f 100644 --- a/lib/bap_types/bap_arch.ml +++ b/lib/bap_types/bap_arch.ml @@ -93,9 +93,7 @@ module T = struct KB.promise slot @@ fun obj -> KB.collect Theory.Label.unit obj >>= function | None -> KB.return `unknown - | Some unit -> KB.collect unit_slot unit >>| function - | #arm -> `unknown - | arch -> arch + | Some unit -> KB.collect unit_slot unit end include T diff --git a/lib/bap_types/bap_ogre.ml b/lib/bap_types/bap_ogre.ml index 83cc842134..580e45faf5 100644 --- a/lib/bap_types/bap_ogre.ml +++ b/lib/bap_types/bap_ogre.ml @@ -1,19 +1,41 @@ +let package = "bap" +open Bap_core_theory open Core_kernel -module Doc = struct - type t = Ogre.Doc.t [@@deriving compare] +type t = Ogre.Doc.t [@@deriving compare] - module Stringable = struct - type t = Ogre.doc - let to_string = Ogre.Doc.to_string - let of_string data = match Ogre.Doc.from_string data with - | Ok doc -> doc - | Error err -> - failwithf "can't deserialize Ogre doc - %s" - (Error.to_string_hum err) () - end - - let pp = Ogre.Doc.pp - include Sexpable.Of_stringable(Stringable) - include Binable.Of_stringable(Stringable) +module Stringable = struct + type t = Ogre.doc + let to_string = Ogre.Doc.to_string + let of_string data = match Ogre.Doc.from_string data with + | Ok doc -> doc + | Error err -> + failwithf "can't deserialize Ogre doc - %s" + (Error.to_string_hum err) () end + +let pp = Ogre.Doc.pp +include Sexpable.Of_stringable(Stringable) +include Binable.Of_stringable(Stringable) + +type KB.Conflict.t += Spec_inconsistency of Error.t + +let domain = KB.Domain.flat "spec" + ~empty:Ogre.Doc.empty + ~inspect:Ogre.Doc.sexp_of_t + ~join:(fun d1 d2 -> match Ogre.Doc.merge d1 d2 with + | Ok d -> Ok d + | Error err -> + Error (Spec_inconsistency err)) + ~equal:(fun d1 d2 -> Ogre.Doc.compare d1 d2 = 0) + +let slot = KB.Class.property Theory.Unit.cls "unit-spec" domain + ~package + ~persistent:(KB.Persistent.of_binable (module struct + type t = Ogre.Doc.t [@@deriving bin_io] + end)) + +let () = KB.Conflict.register_printer @@ function + | Spec_inconsistency err -> + Some (Error.to_string_hum err) + | _ -> None diff --git a/lib/bap_types/bap_ogre.mli b/lib/bap_types/bap_ogre.mli new file mode 100644 index 0000000000..50731be819 --- /dev/null +++ b/lib/bap_types/bap_ogre.mli @@ -0,0 +1,6 @@ +open Bap_core_theory +open Core_kernel + +type t = Ogre.doc [@@deriving bin_io, compare, sexp] +val pp : Format.formatter -> t -> unit +val slot : (Theory.Unit.cls, t) KB.slot diff --git a/lib/x86_cpu/x86_target.ml b/lib/x86_cpu/x86_target.ml index 8059cc6f98..a610f9115e 100644 --- a/lib/x86_cpu/x86_target.ml +++ b/lib/x86_cpu/x86_target.ml @@ -1,5 +1,6 @@ open Bap_core_theory open Core_kernel +open Bap.Std let package = "bap" @@ -194,10 +195,9 @@ let amd64 = Theory.Target.declare ~package "amd64" let family = [amd64; i686; i586; i486; i386; i86] let enable_loader () = - let open Bap.Std in let open KB.Syntax in KB.Rule.(declare ~package "x86-target" |> - require Project.specification_slot |> + require Image.Spec.slot |> provide Theory.Unit.target |> comment "computes target from the OGRE specification"); let request_arch doc = @@ -205,14 +205,20 @@ let enable_loader () = | Error _ -> None | Ok arch -> arch in KB.promise Theory.Unit.target @@ fun unit -> - KB.collect Project.specification_slot unit >>| + KB.Object.repr Theory.Unit.cls unit >>= fun str -> + Format.eprintf "requesting specification for unit %s@\n%!" str; + KB.collect Image.Spec.slot unit >>| request_arch >>| function | Some ("amd64"|"x86-64"|"x86_64") -> amd64 | Some ("x86"|"i386"|"i486"|"i586"|"i686") -> i686 - | _ -> Theory.Target.unknown + | None -> + Format.eprintf "No arch in spec or no spec@\n%!"; + Theory.Target.unknown + | Some arch -> + Format.eprintf "Not an x86 arch - %s@\n%!" arch; + Theory.Target.unknown let enable_arch () = - let open Bap.Std in let open KB.Syntax in KB.Rule.(declare ~package "x86-arch" |> require Theory.Unit.target |> @@ -220,22 +226,34 @@ let enable_arch () = comment "computes Arch.t from the unit's target"); KB.promise Arch.unit_slot @@ fun unit -> KB.collect Theory.Unit.target unit >>| fun t -> + Format.eprintf "deducing Arch.t for %a@\n%!" Theory.Target.pp t; if Theory.Target.belongs amd64 t then `x86_64 else if Theory.Target.belongs i386 t then `x86 else `unknown - let llvm_x86_encoding = - Theory.Language.declare ~package:"llvm" "x86" + Theory.Language.declare ~package "llvm-x86" + let llvm_x86_64_encoding = - Theory.Language.declare ~package:"llvm" "x86-64" + Theory.Language.declare ~package "llvm-x86_64" + +let register_x86_llvm_disassembler () = + Disasm_expert.Basic.register llvm_x86_encoding @@ fun _ -> + Disasm_expert.Basic.create ~backend:"llvm" "x86" + +let register_x86_64_llvm_disassembler () = + Disasm_expert.Basic.register llvm_x86_64_encoding @@ fun _ -> + Disasm_expert.Basic.create ~backend:"llvm" "x86_64" let enable_decoder () = let open KB.Syntax in + register_x86_llvm_disassembler (); + register_x86_64_llvm_disassembler (); KB.promise Theory.Label.encoding @@ fun label -> Theory.Label.target label >>| fun t -> + Format.eprintf "deducing encoding for %a@\n%!" Theory.Target.pp t; if Theory.Target.belongs amd64 t then llvm_x86_64_encoding else if Theory.Target.belongs parent t diff --git a/oasis/arm b/oasis/arm index bfc2f3fbcc..17442a38ef 100644 --- a/oasis/arm +++ b/oasis/arm @@ -7,7 +7,8 @@ Library "bap-arm" Path: lib/arm Build$: flag(everything) || flag(arm) BuildDepends: bap, core_kernel, ppx_jane, regular, - bap-core-theory, bap-knowledge, ogre + bap-core-theory, bap-knowledge, ogre, + bitvec, bitvec-order FindlibName: bap-arm Modules: ARM, diff --git a/oasis/bap-std b/oasis/bap-std index 37bddef607..cf8c74023f 100644 --- a/oasis/bap-std +++ b/oasis/bap-std @@ -151,6 +151,7 @@ Library disasm bap-main InternalModules: Bap_disasm, + Bap_disasm_backend_types, Bap_disasm_basic, Bap_disasm_block, Bap_disasm_brancher, diff --git a/plugins/arm/arm_main.ml b/plugins/arm/arm_main.ml index 0d36184a7a..fe2bec6791 100644 --- a/plugins/arm/arm_main.ml +++ b/plugins/arm/arm_main.ml @@ -34,58 +34,9 @@ module ARM = struct Error (Error.of_string "type error") end -let symbol_values doc = - let field = Ogre.Query.(select (from Image.Scheme.symbol_value)) in - match Ogre.eval (Ogre.collect field) doc with - | Ok syms -> syms - | Error err -> error "the file specification is ill-formed: %a" - Error.pp err; - failwith "broken file specification" - - - -(* let compute_arch_from_symbol_table file spec = - * let open KB.Syntax in - * let init = Map.empty (module Bitvec_order) in - * let (>>=?) x f = x >>= function - * | None -> KB.return `unknown - * | Some x -> f x in - * let require_arm unit f = - * KB.collect Theory.Unit.target unit >>= fun t -> - * if Theory.Target.belongs Arm_target.parent t - * then f t - * else KB.return `unknown in - * let symbols = - * symbol_values spec |> - * Seq.fold ~init ~f:(fun symbols (addr,value) -> - * let arch = match Int64.(value land 1L) with - * | 0L -> `armv7 - * | _ -> `thumbv7 in - * let addr = Bitvec.M32.int64 addr in - * Map.add_exn symbols addr arch) in - * KB.promise Arch.slot @@ fun label -> - * KB.collect Theory.Label.unit label >>=? fun unit -> - * require_arm unit @@ fun target -> - * KB.collect Theory.Unit.path unit >>= function - * | None -> KB.return arch - * | Some path when path <> file -> KB.return `unknown - * | Some _ -> - * KB.collect Theory.Label.addr label >>=? fun addr -> - * KB.return @@ match Map.find symbols addr with - * | Some arch -> arch - * | None -> - * if Map.is_empty symbols then arch else `unknown *) - - - let () = Config.when_ready @@ fun _ -> Arm_target.load (); List.iter Arch.all_of_arm ~f:(fun arch -> register_target (arch :> arch) (module ARM); - Arm_gnueabi.setup ()); - (* let inputs = Stream.merge ~f:Tuple.T2.create - * Project.Info.file Project.Info.spec in - * Stream.observe inputs @@ fun (file,spec) -> - * info "computing arch from symbol table"; - * compute_arch_from_symbol_table file spec *) + Arm_gnueabi.setup ())