diff --git a/.merlin b/.merlin index ac80d5fe3..c22c7b637 100644 --- a/.merlin +++ b/.merlin @@ -19,6 +19,7 @@ S lib/bap_types PKG core_kernel PKG ocamlbuild +PKG jsonm EXT ounit EXT here EXT nonrec diff --git a/.travis-ci.sh b/.travis-ci.sh index f75b93ae3..2f17773d9 100644 --- a/.travis-ci.sh +++ b/.travis-ci.sh @@ -1,20 +1,25 @@ -OPAM_DEPENDS="core_kernel oasis piqi zarith bitstring utop cmdliner" -SYS_DEPENDS="libgmp-dev time llvm-3.4-dev" +OPAM_DEPENDS="core_kernel.111.28.00 oasis zarith bitstring utop cmdliner faillib ezjsonm lwt-zmq uri.1.7.2 re cohttp.0.15.0" +SYS_DEPENDS="libgmp-dev time llvm-3.4-dev libzmq3-dev aspcud" case "$OCAML_VERSION,$OPAM_VERSION" in -4.02.0,1.2.0) ppa=avsm/ocaml42+opam12 ;; -4.02.0,1.1.0) ppa=avsm/ocaml42+opam11 ;; +4.02.1,1.2.0) ppa=avsm/ocaml42+opam12 ;; +4.02.1,1.1.0) ppa=avsm/ocaml42+opam11 ;; 4.01.0,1.2.0) ppa=avsm/ocaml41+opam12 ;; 4.01.0,1.1.0) ppa=avsm/ocaml41+opam11 ;; *) echo Unknown $OCAML_VERSION,$OPAM_VERSION; exit 1 ;; esac - install_on_linux () { - echo "yes" | sudo add-apt-repository ppa:$ppa + echo 'yes' | sudo add-apt-repository ppa:$ppa + echo 'yes' | sudo apt-add-repository ppa:chris-lea/zeromq sudo apt-get update -qq sudo apt-get install -qq ocaml ocaml-native-compilers camlp4-extra opam $SYS_DEPENDS - opam init + + if [ $OPAM_VERSION = "1.1.0" ]; then + opam init default https://opam.ocaml.org/1.1 + else + opam init + fi } install_on_osx () { @@ -40,13 +45,13 @@ ocaml -version echo OPAM versions opam --version opam --git-version +opam repository list -opam init opam install ${OPAM_DEPENDS} eval `opam config env` oasis setup -./configure --prefix=$(opam config var prefix) --enable-tests --enable-serialization --with-cxx=`which $CXX` +./configure --prefix=$(opam config var prefix) --enable-tests --with-cxx=`which $CXX` make make install make test diff --git a/.travis.yml b/.travis.yml index 779d07fc7..52f55b104 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,6 @@ compiler: env: - OCAML_VERSION=4.01.0 OPAM_VERSION=1.1.0 - OCAML_VERSION=4.01.0 OPAM_VERSION=1.2.0 - - OCAML_VERSION=4.02.0 OPAM_VERSION=1.2.0 TEST_TARGETS=arm - - OCAML_VERSION=4.02.0 OPAM_VERSION=1.2.0 TEST_TARGETS=x86 - - OCAML_VERSION=4.02.0 OPAM_VERSION=1.2.0 TEST_TARGETS=x86_64 + - OCAML_VERSION=4.02.1 OPAM_VERSION=1.2.0 TEST_TARGETS=arm + - OCAML_VERSION=4.02.1 OPAM_VERSION=1.2.0 TEST_TARGETS=x86 + - OCAML_VERSION=4.02.1 OPAM_VERSION=1.2.0 TEST_TARGETS=x86_64 diff --git a/_oasis b/_oasis index 1444dd562..270caa201 100644 --- a/_oasis +++ b/_oasis @@ -16,11 +16,9 @@ BuildDepends: comparelib.syntax, core_kernel, enumerate.syntax, + faillib.syntax, fieldslib.syntax, herelib.syntax, - herelib.syntax, - pa_bench.syntax, - pa_ounit.syntax, pa_ounit.syntax, sexplib.syntax, variantslib.syntax @@ -53,13 +51,21 @@ Flag benchmarks Default: false Flag disassemblers - Description: Builds C++ interface to dissassemblers backend + Description: Build C++ interface to dissassemblers backend + Default: true + +Flag server + Description: Build BAP server Default: true Flag llvm Description: Build with llvm backend Default: true +Flag server + Description: Build BAP server + Default: true + Library bap Path: lib/bap FindLibName: bap @@ -129,7 +135,7 @@ Library serialization BuildTools: ocamlbuild, piqi DataFiles: *.piqi CompiledObject: best - BuildDepends: piqirun.pb, piqirun.ext, bap.types + BuildDepends: piqirun, bap.types Modules: Bil_piqi, Stmt_piqi, Stmt_piqi_ext @@ -250,6 +256,7 @@ Library benchmarks Install: false Modules: Bench_image + Library dwarf_test Path: lib_test/bap_dwarf Build$: flag(tests) @@ -266,6 +273,29 @@ Library disasm_test Install: false Modules: Test_disasm +Library core_lwt + Path: lwt + Build$: flag(server) + CompiledObject: best + BuildDepends: lwt, lwt.unix, lwt.log + Modules: Core_lwt, + Core_lwt_basic, + Core_lwt_container, + Core_lwt_container_intf, + Core_lwt_extra, + Core_lwt_or_error, + Core_lwt_pool, + Core_lwt_stream + + +Executable "bap-server" + Path: src/server + Build$: flag(server) + CompiledObject: best + BuildDepends: bap, lwt-zmq, ezjsonm, uri, cohttp.lwt, core_lwt + Install: true + MainIs: start_server.ml + Executable readbin Path: src/readbin MainIs: readbin.ml diff --git a/configure b/configure index 3af98c08f..c58eb6c73 100755 --- a/configure +++ b/configure @@ -22,5 +22,14 @@ for i in "$@"; do done [ -f setup.ml -a setup.ml -nt _oasis ] || oasis -quiet setup + + ocaml preconfig.ml + +case `ocaml -vnum` in + 4.01.*) + sed -i 's/mark_tag_used/ignore/' myocamlbuild.ml + ;; +esac + ocaml setup.ml -configure "$@" diff --git a/lib/bap_disasm/bap_disasm.ml b/lib/bap_disasm/bap_disasm.ml index b71b17539..16b071922 100644 --- a/lib/bap_disasm/bap_disasm.ml +++ b/lib/bap_disasm/bap_disasm.ml @@ -15,6 +15,7 @@ type reg = Reg.t with bin_io, compare, sexp type imm = Imm.t with bin_io, compare, sexp type fmm = Fmm.t with bin_io, compare, sexp type (+'a,+'k) insn +type kind = Insn.Kind.t with bin_io, compare, sexp (** ARM instruction set *) diff --git a/lib/bap_disasm/bap_disasm_arm_env.ml b/lib/bap_disasm/bap_disasm_arm_env.ml index 7f7721053..7f677db9a 100644 --- a/lib/bap_disasm/bap_disasm_arm_env.ml +++ b/lib/bap_disasm/bap_disasm_arm_env.ml @@ -6,7 +6,7 @@ module Arm = Bap_disasm_arm let (%:) name typ = Var.create name typ -let nil = Arm.Reg.to_string `nil %: reg32_t +let nil = Arm.Reg.to_string `Nil %: reg32_t let make_register reg ty = Arm.Reg.to_string reg %: ty let reg32 reg = make_register reg reg32_t @@ -80,7 +80,7 @@ let var_of_ccr : Arm.Reg.ccr -> var = function | `ITSTATE -> itstate let of_reg : Arm.Reg.t -> var = function - | `nil -> nil + | `Nil -> nil | #Arm.Reg.gpr as reg -> var_of_gpr reg | #Arm.Reg.ccr as reg -> var_of_ccr reg diff --git a/lib/bap_disasm/bap_disasm_arm_lifter.ml b/lib/bap_disasm/bap_disasm_arm_lifter.ml index 54f300951..fa510cc30 100644 --- a/lib/bap_disasm/bap_disasm_arm_lifter.ml +++ b/lib/bap_disasm/bap_disasm_arm_lifter.ml @@ -661,7 +661,7 @@ let lift_mem ops insn = (* POS_SIGN_BIT *) | `LDRSHTi, [|dest1; _unknown; base; Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos (Op.Reg `nil) imm_off in + let offset = Mem_shift.mem_offset_reg_or_imm_pos (Op.Reg `Nil) imm_off in let insns = Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld in diff --git a/lib/bap_disasm/bap_disasm_arm_types.ml b/lib/bap_disasm/bap_disasm_arm_types.ml index 0319fe747..9481dee1b 100644 --- a/lib/bap_disasm/bap_disasm_arm_types.ml +++ b/lib/bap_disasm/bap_disasm_arm_types.ml @@ -36,7 +36,7 @@ with bin_io, compare, sexp, enumerate module Reg = struct - type nil = [ `nil ] + type nil = [ `Nil ] with bin_io, compare, sexp, enumerate (** General purpose registers *) diff --git a/lib/bap_disasm/bap_disasm_basic.ml b/lib/bap_disasm/bap_disasm_basic.ml index edcd282da..3590a1cdc 100644 --- a/lib/bap_disasm/bap_disasm_basic.ml +++ b/lib/bap_disasm/bap_disasm_basic.ml @@ -14,7 +14,7 @@ type mem = Mem.t with sexp_of type kind = Kind.t with compare, sexp type pred = [ - | `valid + | `Valid | kind ] with sexp,compare @@ -88,7 +88,7 @@ module Reg = struct let data = let reg_code = C.insn_op_reg_code dis.id ~insn ~oper in let reg_name = - if reg_code = 0 then lazy "nil" + if reg_code = 0 then lazy "Nil" else let off = C.insn_op_reg_name dis.id ~insn ~oper in lazy (Table.lookup dis.reg_table off) in @@ -215,17 +215,17 @@ with bin_io, compare, sexp let cpred_of_pred : pred -> C.pred = function - | `valid -> C.Is_true - | `conditional_branch -> C.Is_conditional_branch - | `unconditional_branch -> C.Is_unconditional_branch - | `indirect_branch -> C.Is_indirect_branch - | `return -> C.Is_return - | `call -> C.Is_call - | `barrier -> C.Is_barrier - | `terminator -> C.Is_terminator - | `may_affect_control_flow -> C.May_affect_control_flow - | `may_store -> C.May_store - | `may_load -> C.May_load + | `Valid -> C.Is_true + | `Conditional_branch -> C.Is_conditional_branch + | `Unconditional_branch -> C.Is_unconditional_branch + | `Indirect_branch -> C.Is_indirect_branch + | `Return -> C.Is_return + | `Call -> C.Is_call + | `Barrier -> C.Is_barrier + | `Terminator -> C.Is_terminator + | `May_affect_control_flow -> C.May_affect_control_flow + | `May_store -> C.May_store + | `May_load -> C.May_load module Insn = struct type ins_info = { @@ -291,6 +291,13 @@ let compare_insn (i1 : ('a,'b) insn) (i2 : ('a,'b) insn) = Insn.compare i1 i2 let sexp_of_insn : ('a,'b) insn -> Sexp.t = Insn.sexp_of_t + +type full_insn = (asm,kinds) insn + +let sexp_of_full_insn = sexp_of_insn + + + type (+'a,+'k) insns = (mem * ('a,'k) insn option) list module Pred = struct @@ -473,7 +480,7 @@ let insn_of_mem dis mem = if Mem.(max_addr mem' = max_addr mem) then Ok `finished else Mem.view mem ~from:Addr.(Mem.max_addr mem' ++ 1) >>| fun r -> `left r in - run ~stop_on:[`valid] dis mem ~return ~init + run ~stop_on:[`Valid] dis mem ~return ~init ~hit:(fun s mem' insn _ -> split mem' >>= fun r -> stop s (mem',Some insn,r)) ~invalid:(fun s mem' _ -> diff --git a/lib/bap_disasm/bap_disasm_basic.mli b/lib/bap_disasm/bap_disasm_basic.mli index 4a7b38ca0..7ba7a0b70 100644 --- a/lib/bap_disasm/bap_disasm_basic.mli +++ b/lib/bap_disasm/bap_disasm_basic.mli @@ -12,7 +12,7 @@ type kind = Bap_insn_kind.t with compare, sexp (** predicate to drive the disassembler *) type pred = [ - | `valid (** stop on first valid insn *) + | `Valid (** stop on first valid insn *) | kind (** stop on first insn of the specified kind *) ] with sexp @@ -22,6 +22,11 @@ type imm with bin_io, compare, sexp type fmm with bin_io, compare, sexp type (+'a,+'k) insn type (+'a,+'k) insns = (mem * ('a,'k) insn option) list +type empty (** set when information is not stored *) +type asm (** set when assembler information is stored *) +type kinds (** set when instruction kind information is stored *) + +type full_insn = (asm,kinds) insn with sexp_of @@ -43,13 +48,10 @@ type (+'a,+'k) insns = (mem * ('a,'k) insn option) list store extra information about instruction kind. Note: at some points you can have an access to this information - even if you don't enable it explicitely. + even if you don't enable it explicitly. *) type ('a,'k) t -type empty (** set when information is not stored *) -type asm (** set when assembler information is stored *) -type kinds (** set when instruction kind information is stored *) (** Disassembler state. @@ -109,15 +111,14 @@ val store_kinds : ('a,_) t -> ('a,kinds) t monad, like [Or_error], or [Lwt]. Otherwise, just use [ident] function and assume that ['s == 'r]. - In a process of disassembly three user provided callbacks are - invoked by the engine. To each callback at least two parameters - are passed: [state] and [user_data]. [user_data] is arbitrary data - of type ['s] with which the folding over the memory is actually + In a process of disassembly user provided callbacks are invoked by + the engine. To each callback at least two parameters are passed: + [state] and [user_data]. [user_data] is arbitrary data of type ['s] + with which the folding over the memory is actually performed. [state] incapsulates the current state of the disassembler, and provides continuation functions, namely [stop], - [next] and [back], that drives the process of - disassembly. This functions are used to pass control back to the - disassembler. + [next] and [back], that drives the process of disassembly. This + functions are used to pass control back to the disassembler. [stopped state user_data] is called when there is no more data to disassemble. This handler is optional and defaults to [stop]. diff --git a/lib/bap_disasm/bap_insn_kind.ml b/lib/bap_disasm/bap_insn_kind.ml index 994180a0c..683f88c9b 100644 --- a/lib/bap_disasm/bap_insn_kind.ml +++ b/lib/bap_disasm/bap_insn_kind.ml @@ -1,24 +1,24 @@ type branch = [ - | `conditional_branch - | `unconditional_branch - | `indirect_branch -] with compare, enumerate, sexp + | `Conditional_branch + | `Unconditional_branch + | `Indirect_branch +] with bin_io, compare, enumerate, sexp type affecting_control = [ | branch - | `return - | `call - | `barrier - | `terminator - | `may_affect_control_flow -] with compare, enumerate, sexp + | `Return + | `Call + | `Barrier + | `Terminator + | `May_affect_control_flow +] with bin_io, compare, enumerate, sexp type having_side_effect = [ - | `may_load - | `may_store -] with compare, enumerate, sexp + | `May_load + | `May_store +] with bin_io, compare, enumerate, sexp type t = [ | affecting_control | having_side_effect -] with compare, enumerate, sexp +] with bin_io, compare, enumerate, sexp diff --git a/lib/bap_image/bap_image.ml b/lib/bap_image/bap_image.ml index 44a20bcec..93753bfb4 100644 --- a/lib/bap_image/bap_image.ml +++ b/lib/bap_image/bap_image.ml @@ -7,8 +7,7 @@ open Image_internal_std open Backend type 'a m = 'a Or_error.t -type img = Backend.Img.t -type mem = Memory.t +type img = Backend.Img.t with sexp_of type path = string let backends : Backend.t String.Table.t = @@ -71,16 +70,17 @@ type words = { } type t = { - img : img; - name : string; + img : img ; + name : string option; + data : Bigstring.t; symbols : sym table; sections : sec table; - words : words; - memory_of_section : sec -> mem; - memory_of_symbol : (sym -> mem * mem seq) Lazy.t; - symbols_of_section : (sec -> sym seq) Lazy.t; - section_of_symbol : (sym -> sec) Lazy.t; -} + words : words sexp_opaque; + memory_of_section : sec -> mem sexp_opaque; + memory_of_symbol : (sym -> mem * mem seq) Lazy.t sexp_opaque; + symbols_of_section : (sec -> sym seq) Lazy.t sexp_opaque; + section_of_symbol : (sym -> sec) Lazy.t sexp_opaque; +} with sexp_of type result = (t * Error.t list) Or_error.t @@ -209,20 +209,23 @@ let of_img img data name = let section_of_symbol () : sym -> sec = Table.(link ~one_to:one Sym.hashable syms secs) in return ({ - img; name; symbols = syms; sections = secs; words; + img; name; data; symbols = syms; sections = secs; words; memory_of_section; memory_of_symbol = Lazy.from_fun memory_of_symbol; symbols_of_section = Lazy.from_fun symbols_of_section; section_of_symbol = Lazy.from_fun section_of_symbol; }, errs) +let data t = t.data + let of_backend backend data path : result = match String.Table.find backends backend with | None -> errorf "no such backend: '%s'" backend | Some load -> match load data with - | None -> errorf "%s: failed to read file «%s»" backend path | Some img -> of_img img data path + | None -> error "create image" (backend,`path path) + <:sexp_of> let autoload data path = let bs = String.Table.data backends in @@ -237,7 +240,7 @@ let create_image path ?backend data : result = | Some backend -> of_backend backend data path let of_bigstring ?backend data = - create_image "memory" ?backend data + create_image None ?backend data let of_string ?backend data = of_bigstring ?backend (Bigstring.of_string data) @@ -261,8 +264,8 @@ let readfile path : Bigstring.t = let create ?backend path : result = try_with (fun () -> readfile path) >>= fun data -> match backend with - | None -> autoload data path - | Some backend -> of_backend backend data path + | None -> autoload data (Some path) + | Some backend -> of_backend backend data (Some path) let entry_point t = Img.entry t.img let filename t = t.name diff --git a/lib/bap_image/bap_image.mli b/lib/bap_image/bap_image.mli index da646aee5..d4e3426db 100644 --- a/lib/bap_image/bap_image.mli +++ b/lib/bap_image/bap_image.mli @@ -8,7 +8,7 @@ open Image_internal_std (** {2 Type definitions} *) -type t (** image *) +type t with sexp_of (** image *) (** section *) type sec with bin_io, compare, sexp (** symbol *) @@ -39,14 +39,17 @@ val of_string : ?backend:string -> string -> result [data]. See {!create} for [backend] parameter. *) val of_bigstring : ?backend:string -> Bigstring.t -> result + (** {2 Attributes} *) val entry_point : t -> addr -val filename : t -> string +val filename : t -> string option val arch: t -> arch -val addr_size : t -> Word_size.t +val addr_size : t -> addr_size val endian : t -> endian +val data : t -> Bigstring.t + (** {2 Tables } *) val words : t -> size -> word table val sections : t -> sec table @@ -71,8 +74,9 @@ end module Sym : sig type t = sym include Regular with type t := t - val is_function : t -> bool val name : t -> string + val is_function : t -> bool + val is_debug : t -> bool end (** {2 Backend Interface} *) diff --git a/lib/bap_image/bap_memory.ml b/lib/bap_image/bap_memory.ml index 37e2ce6fb..156d5d210 100644 --- a/lib/bap_image/bap_memory.ml +++ b/lib/bap_image/bap_memory.ml @@ -52,6 +52,8 @@ let to_repr mem = { let sexp_of_t mem = Repr.sexp_of_t (to_repr mem) +let endian t = t.endian + (** [create_getters endian addr offset size data] creates a getters class according to the specified parameters. All parameters will be encapsulated inside a closure (cf, [getter] type). diff --git a/lib/bap_image/bap_memory.mli b/lib/bap_image/bap_memory.mli index e6bcc348e..22b630472 100644 --- a/lib/bap_image/bap_memory.mli +++ b/lib/bap_image/bap_memory.mli @@ -27,6 +27,9 @@ val first_byte : t -> t (** [last_byte m] returns last byte of [m] as a memory *) val last_byte : t -> t +(** returns the order of bytes in a word *) +val endian : t -> endian + (** [get word_size mem addr] reads memory value from the specified address. [word_size] default to [`r8] *) val get : ?disp:int -> ?index:int -> ?scale:size -> ?addr:addr -> t -> word Or_error.t diff --git a/lib/bap_image/bap_table.ml b/lib/bap_image/bap_table.ml index 945791d3a..f545079de 100644 --- a/lib/bap_image/bap_table.ml +++ b/lib/bap_image/bap_table.ml @@ -29,13 +29,13 @@ module Map = Mem.Map type mem = Mem.t with sexp_of type 'a cache = 'a Cache.t -type 'a map = 'a Map.t +type 'a map = 'a Map.t with sexp_of type 'a hashable = 'a Hashtbl.Hashable.t type 'a t = { - cache : 'a map -> 'a cache Lazy.t; + cache : 'a map -> 'a cache Lazy.t sexp_opaque; map : 'a map; -} +} with sexp_of type 'a ranged = ?start:mem (** defaults to the lowest mapped region *) diff --git a/lib/bap_image/bap_table.mli b/lib/bap_image/bap_table.mli index 80e710a39..d22163af9 100644 --- a/lib/bap_image/bap_table.mli +++ b/lib/bap_image/bap_table.mli @@ -1,7 +1,7 @@ open Core_kernel.Std open Bap_types.Std -type 'a t +type 'a t with sexp_of type mem = Bap_memory.t type 'a hashable = 'a Hashtbl.Hashable.t diff --git a/lib/bap_image/image_backend.ml b/lib/bap_image/image_backend.ml index 91ffaa088..6a6fa9b22 100644 --- a/lib/bap_image/image_backend.ml +++ b/lib/bap_image/image_backend.ml @@ -27,15 +27,17 @@ module Sym = struct } with bin_io, compare, fields, sexp end + + module Img = struct type t = { arch : arch; - addr_size: Word_size.t; + addr_size: addr_size; endian : endian; entry : addr; sections : Section.t * Section.t list; symbols : Sym.t list; - } with fields + } with bin_io, compare, fields, sexp end type t = Bigstring.t -> Img.t option diff --git a/lib/bap_image/image_elf.ml b/lib/bap_image/image_elf.ml index 34b6383a9..5129b5743 100644 --- a/lib/bap_image/image_elf.ml +++ b/lib/bap_image/image_elf.ml @@ -103,16 +103,16 @@ let create_section make_addr i es : Section.t Or_error.t option = | ok -> Some ok let addr_maker = function - | Word_size.W32 -> fun x -> Addr.of_int32 (Int64.to_int32_exn x) - | Word_size.W64 -> Addr.of_int64 + | `r32 -> Addr.of_int64 ~width:32 + | `r64 -> Addr.of_int64 ~width:64 let img_of_elf data elf : Img.t Or_error.t = let endian = match elf.e_data with | ELFDATA2LSB -> LittleEndian | ELFDATA2MSB -> BigEndian in let addr_size = match elf.e_class with - | ELFCLASS32 -> Word_size.W32 - | ELFCLASS64 -> Word_size.W64 in + | ELFCLASS32 -> `r32 + | ELFCLASS64 -> `r64 in let addr = addr_maker addr_size in let entry = addr elf.e_entry in let arch = match elf.e_machine with diff --git a/lib/bap_image/image_internal_std.ml b/lib/bap_image/image_internal_std.ml index c7c203666..197b8ddc9 100644 --- a/lib/bap_image/image_internal_std.ml +++ b/lib/bap_image/image_internal_std.ml @@ -4,10 +4,10 @@ open Bap_types.Std open Image_common module Table = Bap_table -type 'a table = 'a Table.t +type 'a table = 'a Table.t with sexp_of module Backend = Image_backend type backend = Backend.t module Memory = Bap_memory -type mem = Memory.t +type mem = Memory.t with sexp_of diff --git a/lib/bap_types/bap_addr.ml b/lib/bap_types/bap_addr.ml index 755d6b2f7..326d64573 100644 --- a/lib/bap_types/bap_addr.ml +++ b/lib/bap_types/bap_addr.ml @@ -58,14 +58,14 @@ module Make(Int : Core_int) = struct end module I32 = struct - let of_bv = Bitvector.to_int32 - let to_bv = Bitvector.of_int32 + let of_bv x = Bitvector.to_int32 x + let to_bv x = Bitvector.of_int32 x include Int32 end module I64 = struct - let of_bv = Bitvector.to_int64 - let to_bv = Bitvector.of_int64 + let of_bv x = Bitvector.to_int64 x + let to_bv x = Bitvector.of_int64 x include Int64 end diff --git a/lib/bap_types/bap_arch.ml b/lib/bap_types/bap_arch.ml index e92bc808b..5077162c6 100644 --- a/lib/bap_types/bap_arch.ml +++ b/lib/bap_types/bap_arch.ml @@ -15,14 +15,16 @@ module T = struct | X86_64 -> "X86_64" | ARM -> "ARM" + let pp ch arch = Format.fprintf ch "%s" (to_string arch) + let of_string s = match Fn.compose String.uppercase String.strip s with - | "X86" | "X86-32" | "X86_32" | "IA32" | "IA-32" | "I386" -> X86_32 - | "X86-64" | "X86_64" | "AMD64" | "x64" -> X86_64 - | "arm" | "ARM" -> ARM - | s -> failwithf "Arch.of_string: Unknown arch '%s'" s () + | "X86" | "X86-32" | "X86_32" | "IA32" | "IA-32" | "I386" -> Some X86_32 + | "X86-64" | "X86_64" | "AMD64" | "x64" -> Some X86_64 + | "arm" | "ARM" -> Some ARM + | s -> None end (* derive Identifiable interface from Core *) include T -include Identifiable.Make(T) +include Regular.Make(T) diff --git a/lib/bap_types/bap_arch.mli b/lib/bap_types/bap_arch.mli index c0693ccc7..523e37fa3 100644 --- a/lib/bap_types/bap_arch.mli +++ b/lib/bap_types/bap_arch.mli @@ -2,5 +2,7 @@ open Core_kernel.Std open Bap_common +val of_string : string -> arch option + (** [arch] type implements [Idenfifiable] interface *) -include Identifiable with type t := arch +include Regular with type t := arch diff --git a/lib/bap_types/bap_bitvector.ml b/lib/bap_types/bap_bitvector.ml index 7f106eed0..f61591aac 100644 --- a/lib/bap_types/bap_bitvector.ml +++ b/lib/bap_types/bap_bitvector.ml @@ -153,8 +153,8 @@ let b0 = create (Bignum.of_int 0) 1 let b1 = create (Bignum.of_int 1) 1 let of_bool v = if v then b1 else b0 -let of_int32 n = create (Bignum.of_int32 n) 32 -let of_int64 n = create (Bignum.of_int64 n) 64 +let of_int32 ?(width=32) n = create (Bignum.of_int32 n) width +let of_int64 ?(width=64) n = create (Bignum.of_int64 n) width let of_int ~width v = create (Bignum.of_int v) width let ones n = of_int (-1) ~width:n let zeros n = of_int (0) ~width:n @@ -168,6 +168,11 @@ let to_int = unop (safe Bignum.to_int) let to_int32 = unop (safe Bignum.to_int32) let to_int64 = unop (safe Bignum.to_int64) +let string_of_value ?(hex=true) = + unop (Bignum.format (if hex then "0x%x" else "%d")) + + + let of_binary ?width endian num = let num = match endian with | LittleEndian -> num diff --git a/lib/bap_types/bap_bitvector.mli b/lib/bap_types/bap_bitvector.mli index 908010054..3dfa696af 100644 --- a/lib/bap_types/bap_bitvector.mli +++ b/lib/bap_types/bap_bitvector.mli @@ -149,8 +149,8 @@ module Mono : Comparable with type t := t *) val of_bool : bool -> t val of_int : width:int -> int -> t -val of_int32 : int32 -> t -val of_int64 : int64 -> t +val of_int32 : ?width:int -> int32 -> t +val of_int64 : ?width:int -> int64 -> t (** { 3 Some predefined constant constructors } *) @@ -190,6 +190,7 @@ val of_binary : ?width:int -> endian -> string -> t val to_int : t -> int Or_error.t val to_int32 : t -> int32 Or_error.t val to_int64 : t -> int64 Or_error.t +val string_of_value : ?hex:bool -> t -> string (** [signed t] casts t to a signed type, so that any operations applied on [t] will be signed *) diff --git a/lib/bap_types/bap_common.ml b/lib/bap_types/bap_common.ml index c31b740c8..2975f0e35 100644 --- a/lib/bap_types/bap_common.ml +++ b/lib/bap_types/bap_common.ml @@ -121,7 +121,7 @@ module Arch = struct | X86_32 | X86_64 | ARM - with bin_io, compare, sexp, variants + with bin_io, compare, enumerate, sexp, variants end diff --git a/lib/bap_types/bap_regular.ml b/lib/bap_types/bap_regular.ml index 0dc2b3f80..3ec16de68 100644 --- a/lib/bap_types/bap_regular.ml +++ b/lib/bap_types/bap_regular.ml @@ -3,6 +3,10 @@ open Core_kernel.Std module type S = sig type t with bin_io, sexp, compare val to_string: t -> string + val str : unit -> t -> string + val pps : unit -> t -> string + val ppo : out_channel -> t -> unit + val ppb : Buffer.t -> t -> unit include Comparable.S_binable with type t := t include Hashable.S_binable with type t := t include Pretty_printer.S with type t := t @@ -21,6 +25,20 @@ module Make(M : sig pp str_formatter t; flush_str_formatter () + let pps () t = + to_string t + + let str = pps + + let ppo out x : unit = + pp Format.(formatter_of_out_channel out) x + + let ppb buf x : unit = + pp Format.(formatter_of_buffer buf) x + + + + let () = Pretty_printer.register (M.module_name ^ ".pp") include Comparable.Make_binable(M) include Hashable.Make_binable(M) diff --git a/lib/bap_types/bap_regular.mli b/lib/bap_types/bap_regular.mli index 5fe6c1a6b..44a493bbf 100644 --- a/lib/bap_types/bap_regular.mli +++ b/lib/bap_types/bap_regular.mli @@ -9,11 +9,37 @@ open Core_kernel.Std but doesn't require [of_string] function, that is usually much harder to implement in comparison with [to_string] function. Also, instead of [to_string] it requires [pp] function that can be - implemented much more efficiently and elegantly. + implemented much more efficiently and elegantly. From the [pp] + function the whole plethora of printing functions are derived: + [str], [pps], [ppo], [ppb] + *) module type S = sig type t with bin_io, sexp, compare val to_string : t -> string + + (** [str () t] is formatted output function that matches "%a" + conversion format specifier in functions, that prints to string, + e.g., [sprintf], [failwithf], [errorf] and, suprisingly all + [Lwt] printing function, including [Lwt_io.printf] and logging + (or any other function with type ('a,unit,string,...) + formatN`. Example: + + [Or_error.errorf "type %a is not valid for %a" + Type.str ty Exp.str exp] + *) + val str : unit -> t -> string + + (** synonym for [str] *) + val pps : unit -> t -> string + + (** will print to a standard [output_channel], useful for using in + [printf], [fprintf], etc. *) + val ppo : out_channel -> t -> unit + + (** will output to [Buffer], useful for [bprintf] *) + val ppb : Buffer.t -> t -> unit + include Comparable.S_binable with type t := t include Hashable.S_binable with type t := t include Pretty_printer.S with type t := t diff --git a/lib/bap_types/bap_types.ml b/lib/bap_types/bap_types.ml index aedf69539..7c970bcb2 100644 --- a/lib/bap_types/bap_types.ml +++ b/lib/bap_types/bap_types.ml @@ -26,8 +26,9 @@ - each type provides a [Set] with values of type [t]; - for those, courage enough there is also an AVL tree; - hastable is exposed via [Table] module; - - [to_string] and [pp] functions for pretty-printing; - - sexpable and binable interface. + - sexpable and binable interface; + - [to_string], [str],[pp], [ppo], [pps], [ppb] functions + for pretty-printing. And most types usually provides much more. @@ -35,7 +36,9 @@ You should start any code relying on [bap-types] library with a - [open Bap_types.Std] + [open Bap.Std] + + (or open [Bap_types.Std] if you're developing inside bap) It is a good idea, to open [Core_kenel.Std] before. You should never use modules that are not exposed by the [Std] module directly, diff --git a/lib_test/bap_disasm/test_disasm.ml b/lib_test/bap_disasm/test_disasm.ml index c14536db9..79803956b 100644 --- a/lib_test/bap_disasm/test_disasm.ml +++ b/lib_test/bap_disasm/test_disasm.ml @@ -14,18 +14,18 @@ let x86_64 = "x86_64", [ (* callq 942040 *) "\xe8\x47\xee\xff\xff", ["CALL64pcrel32"; "-0x11b9";], - [`call; `may_affect_control_flow]; + [`Call; `May_affect_control_flow]; (* mov 0x10(%rax),%eax *) - "\x8b\x40\x10", ["MOV32rm"; "EAX"; "RAX"; "0x1"; "nil"; "0x10"; "nil"], - [`may_load]; + "\x8b\x40\x10", ["MOV32rm"; "EAX"; "RAX"; "0x1"; "Nil"; "0x10"; "Nil"], + [`May_load]; (* add $0x8, %rsp *) "\x48\x83\xc4\x08", ["ADD64ri8"; "RSP"; "RSP"; "0x8"], []; (* "retq" *) "\xc3", ["RET"], - [`return; `barrier; `terminator; `may_affect_control_flow] + [`Return; `Barrier; `Terminator; `May_affect_control_flow] ] let memory_of_string data = diff --git a/lib_test/bap_image/test_image.ml b/lib_test/bap_image/test_image.ml index a38502b4d..56ea82619 100644 --- a/lib_test/bap_image/test_image.ml +++ b/lib_test/bap_image/test_image.ml @@ -8,8 +8,8 @@ open Image_common open Image_backend let create_addr = function - | W32 -> Addr.of_int ~width:32 - | W64 -> Addr.of_int ~width:64 + | `r32 -> Addr.of_int ~width:32 + | `r64 -> Addr.of_int ~width:64 let create_section ?(name=".test") @@ -53,7 +53,7 @@ let nonempty = function | [] -> invalid_arg "list should be non empty" | x :: xs -> x, xs -let create ?(addr_size=W32) ?(endian=LittleEndian) ~syms ss name = +let create ?(addr_size=`r32) ?(endian=LittleEndian) ~syms ss name = let sections = nonempty (ss addr_size name) in let symbols = syms in let arch = Arch.ARM in @@ -66,10 +66,10 @@ let backends = let le = LittleEndian and be = BigEndian in List.fold ~init:[] ~f:(fun acc (n,syms,secs) -> - (n^"_32LE", create ~addr_size:W32 ~endian:le ~syms secs) :: - (n^"_32BE", create ~addr_size:W32 ~endian:be ~syms secs) :: - (n^"_64LE", create ~addr_size:W64 ~endian:le ~syms secs) :: - (n^"_64BE", create ~addr_size:W64 ~endian:be ~syms secs) :: + (n^"_32LE", create ~addr_size:`r32 ~endian:le ~syms secs) :: + (n^"_32BE", create ~addr_size:`r32 ~endian:be ~syms secs) :: + (n^"_64LE", create ~addr_size:`r64 ~endian:le ~syms secs) :: + (n^"_64BE", create ~addr_size:`r64 ~endian:be ~syms secs) :: acc) [ "0-15", [], data seq [0, 15]; "16x4", [], data seq [0, 15; 16,31; 32,47; 48,63]; diff --git a/lwt/core_lwt.ml b/lwt/core_lwt.ml new file mode 100644 index 000000000..a0f88f6d5 --- /dev/null +++ b/lwt/core_lwt.ml @@ -0,0 +1,30 @@ +open Core_kernel.Std +open Core_lwt_container + + +module Std = struct + module Lwt = struct + include Lwt + include (Core_lwt_basic : Monad with type 'a t := 'a t) + include Core_lwt_extra + module Main = Lwt_main + module Unix = Lwt_unix + module Chan = Lwt_chan + module Mutex = Lwt_mutex + module IO = Lwt_io + module Pool = Core_lwt_pool + module Or_error = Core_lwt_or_error + module Stream = Core_lwt_stream + module Seq = Lift_sequence(Core_lwt_basic) + module List = struct + include Lift_list(Core_lwt_basic) + let partition_tf ?(how = `Sequential) t ~f = + match how with + | `Sequential -> Lwt_list.partition_s f t + | `Parallel -> Lwt_list.partition_p f t + end + end + include Core_lwt_basic + let (>>=?) = Lwt.Or_error.(>>=) + let (>>|?) = Lwt.Or_error.(>>|) +end diff --git a/lwt/core_lwt_basic.ml b/lwt/core_lwt_basic.ml new file mode 100644 index 000000000..4583faccf --- /dev/null +++ b/lwt/core_lwt_basic.ml @@ -0,0 +1,10 @@ +open Core_kernel.Std + +type 'a t = 'a Lwt.t +include Monad.Make(struct + type 'a t = 'a Lwt.t + let return = Lwt.return + let bind = Lwt.bind + let map m ~f = Lwt.map f m + let map = `Custom map + end) diff --git a/lwt/core_lwt_basic.mli b/lwt/core_lwt_basic.mli new file mode 100644 index 000000000..68a7799bb --- /dev/null +++ b/lwt/core_lwt_basic.mli @@ -0,0 +1,3 @@ +open Core_kernel.Std +type 'a t = 'a Lwt.t +include Monad with type 'a t := 'a Lwt.t diff --git a/lwt/core_lwt_container.ml b/lwt/core_lwt_container.ml new file mode 100644 index 000000000..937f6b864 --- /dev/null +++ b/lwt/core_lwt_container.ml @@ -0,0 +1,156 @@ +open Core_kernel.Std + +module Lift_sequence(M : Monad) = struct + open M + type 'a t = 'a Sequence.t + + let of_list = Sequence.of_list + + let foldi t ~init ~f = + Sequence.delayed_fold t ~init:(0, init) + ~f:(fun (i, b) a ~k -> f i b a >>= fun b -> k (i + 1, b)) + ~finish:(fun (_, b) -> return b) + + let fold t ~init ~f = + Sequence.delayed_fold t ~init + ~f:(fun b a ~k -> f b a >>= k) + ~finish:return + + + let all t = + fold t ~init:[] ~f:(fun accum d -> d >>| fun a -> a :: accum) + >>| fun res -> + Sequence.of_list (List.rev res) + + let all_unit t = fold t ~init:() ~f:(fun () v -> v) + + let rec find_map t ~f = + match Sequence.next t with + | None -> return None + | Some (v, rest) -> + f v >>= function + | None -> find_map rest ~f + | Some _ as some -> return some + + let find t ~f = + find_map t ~f:(fun elt -> f elt >>| fun b -> if b then Some elt else None) + + let maybe_force ?(how = `Sequential) t = + match how with + | `Parallel -> Sequence.force_eagerly t + | `Sequential -> t + + let iteri ?how t ~f = all_unit (maybe_force ?how (Sequence.mapi t ~f)) + + let iter ?how t ~f = iteri ?how t ~f:(fun _ a -> f a) + + let map ?how t ~f = all (maybe_force ?how (Sequence.map t ~f)) + + (* [filter_map] is implemented separately from [map] so that we never need to keep a + long stream of intermediate [None] results in the accumulator, only to later filter + them all out. *) + let filter_map ?how t ~f = + fold (maybe_force ?how (Sequence.map t ~f)) ~init:[] ~f:(fun acc maybe_v -> + maybe_v + >>| function + | None -> acc + | Some v -> v :: acc) + >>| fun s -> + Sequence.of_list (List.rev s) + + let filter ?how t ~f = + filter_map ?how t ~f:(fun a -> + f a + >>| function + | true -> Some a + | false -> None) + + let init ?how n ~f = map ?how (Sequence.init n ~f:Fn.id) ~f +end + +module Lift_list(M : Monad) = struct + module Sequence = Lift_sequence(M) + open M + + type 'a t = 'a List.t + + let foldi t ~init ~f = + Sequence.foldi (Sequence.of_list t) ~init ~f + + let fold t ~init ~f = foldi t ~init ~f:(fun _ a -> f a) + + let seqmap t ~f = + fold t ~init:[] ~f:(fun bs a -> f a >>| fun b -> b :: bs) + >>| List.rev + + let all ds = seqmap ds ~f:Fn.id + + let all_unit ds = ignore (fold ds ~init:() ~f:(fun () d -> d)) + + let iteri ?(how = `Sequential) t ~f = + match how with + | `Parallel -> all_unit (List.mapi t ~f) + | `Sequential -> foldi t ~init:() ~f:(fun i () x -> f i x) + + let iter ?how t ~f = iteri ?how t ~f:(fun _ a -> f a) + + let map ?(how = `Sequential) t ~f = + match how with + | `Parallel -> all (List.map t ~f) + | `Sequential -> seqmap t ~f + + let init ?how n ~f = map ?how (List.init n ~f:Fn.id) ~f + + let filter ?how t ~f = + map t ?how ~f + >>| fun bools -> + List.rev (List.fold2_exn t bools ~init:[] + ~f:(fun ac x b -> if b then x :: ac else ac)) + + let filter_map ?how t ~f = map t ?how ~f >>| List.filter_opt + + let rec find_map t ~f = + match t with + | [] -> return None + | hd :: tl -> + f hd >>= function + | None -> find_map tl ~f + | Some _ as some -> return some + + let find t ~f = + find_map t ~f:(fun elt -> f elt >>| fun b -> if b then Some elt + else None) + +end + +module Lift(M:Monad)(T : sig + type 'a t + val to_sequence : 'a t -> 'a Sequence.t + val of_sequence : 'a Sequence.t -> 'a t + + end) = +struct + module Seq = Lift_sequence(M) + open Seq + open T + open M + + type 'a monad = 'a M.t + type 'a t = 'a T.t + + let foldi t ~init ~f = foldi ~init (to_sequence t) ~f + let fold t ~init ~f = fold ~init (to_sequence t) ~f + let all t = Seq.all (to_sequence t) >>| of_sequence + let all_unit t = all_unit (to_sequence t) + let iter ?how t ~f = iter ?how (to_sequence t) ~f + let iteri ?how t ~f = iteri ?how (to_sequence t) ~f + let map ?how t ~f = Seq.map ?how (to_sequence t) ~f >>| of_sequence + + let init ?how n ~f = Seq.init ?how n ~f >>| of_sequence + let filter ?how t ~f = + filter ?how (to_sequence t) ~f >>| of_sequence + let filter_map ?how t ~f = + filter_map ?how (to_sequence t) ~f >>| of_sequence + let find_map t ~f = find_map (to_sequence t) ~f + let find t ~f = find (to_sequence t) ~f +end diff --git a/lwt/core_lwt_container.mli b/lwt/core_lwt_container.mli new file mode 100644 index 000000000..4e431b6dd --- /dev/null +++ b/lwt/core_lwt_container.mli @@ -0,0 +1,19 @@ +open Core_kernel.Std +open Core_lwt_container_intf + +module Lift_sequence(M : Monad) + : Monad_sequence with type 'a monad := 'a M.t + and type 'a t = 'a Sequence.t + +module Lift_list(M : Monad) + : Monad_sequence with type 'a monad := 'a M.t + and type 'a t := 'a list + + +module Lift(M:Monad)(T : sig + type 'a t + val to_sequence : 'a t -> 'a Sequence.t + val of_sequence : 'a Sequence.t -> 'a t + end) + : Monad_sequence with type 'a monad := 'a M.t + and type 'a t = 'a T.t diff --git a/lwt/core_lwt_container_intf.ml b/lwt/core_lwt_container_intf.ml new file mode 100644 index 000000000..34d5b7196 --- /dev/null +++ b/lwt/core_lwt_container_intf.ml @@ -0,0 +1,23 @@ +type how = [ `Parallel | `Sequential ] with sexp_of + +module type Monad_sequence = sig + type 'a monad + type 'a t + + val foldi : 'a t -> init:'b -> f:(int -> 'b -> 'a -> 'b monad) -> 'b monad + val fold : 'a t -> init:'b -> f:( 'b -> 'a -> 'b monad) -> 'b monad + + (** default [how] is [`Sequential] *) + val init : ?how:how -> int -> f:(int -> 'a monad) -> 'a t monad + val iter : ?how:how -> 'a t -> f:( 'a -> unit monad) -> unit monad + val iteri : ?how:how -> 'a t -> f:(int -> 'a -> unit monad) -> unit monad + val map : ?how:how -> 'a t -> f:( 'a -> 'b monad) -> 'b t monad + val filter : ?how:how -> 'a t -> f:( 'a -> bool monad) -> 'a t monad + val filter_map : ?how:how -> 'a t -> f:( 'a -> 'b option monad) -> 'b t monad + + val find : 'a t -> f:('a -> bool monad) -> 'a option monad + val find_map : 'a t -> f:('a -> 'b option monad) -> 'b option monad + + val all : 'a monad t -> 'a t monad + val all_unit : unit monad t -> unit monad +end diff --git a/lwt/core_lwt_extra.ml b/lwt/core_lwt_extra.ml new file mode 100644 index 000000000..d9514755a --- /dev/null +++ b/lwt/core_lwt_extra.ml @@ -0,0 +1,8 @@ +let catch t ~exn = Lwt.catch t exn +let try_bind t ~ok ~exn = Lwt.try_bind t ok exn +let protect t ~finally = Lwt.finalize t finally +let don't_wait = Lwt.ignore_result +let failwith msg = Lwt.fail (Failure msg) +let failwithf fmt = Printf.ksprintf failwith fmt +let invalid_arg msg = Lwt.fail (Invalid_argument msg) +let invalid_argf fmt = Printf.ksprintf invalid_arg fmt diff --git a/lwt/core_lwt_extra.mli b/lwt/core_lwt_extra.mli new file mode 100644 index 000000000..5ee0cd74f --- /dev/null +++ b/lwt/core_lwt_extra.mli @@ -0,0 +1,18 @@ +(** Overlay over some Lwt functions *) +open Lwt + +val catch : (unit -> 'a t) -> exn:(exn -> 'a t) -> 'a t + +val try_bind : (unit -> 'a t) -> ok:('a -> 'b t) -> exn:(exn -> 'b t) -> 'b t + +val protect : (unit -> 'a t) -> finally:(unit -> unit t) -> 'a t + +val don't_wait : 'a t -> unit + +val failwith : string -> 'a t + +val failwithf : ('a, unit, string, 'b Lwt.t) format4 -> 'a + +val invalid_arg : string -> 'a t + +val invalid_argf : ('a, unit, string, 'b Lwt.t) format4 -> 'a diff --git a/lwt/core_lwt_or_error.ml b/lwt/core_lwt_or_error.ml new file mode 100644 index 000000000..29c968a06 --- /dev/null +++ b/lwt/core_lwt_or_error.ml @@ -0,0 +1,69 @@ +open Core_kernel.Std +open Core_lwt_container + + +module Basic = struct + type 'a t = 'a Or_error.t Lwt.t + let bind m f = + Lwt.bind m (function + | Ok r -> f r + | Error _ as err -> Lwt.return err) + let map m ~f = Lwt.map (fun r -> Result.map r ~f) m + let map = `Custom map + let return x = Lwt.return (Ok x) +end + +module M = struct + type 'a t = 'a Or_error.t Lwt.t + include Monad.Make(Basic) +end + +type 'a t = 'a Or_error.t Lwt.t + +let fail err : _ t = Lwt.return (Error err) + +let errorf fmt = + Printf.ksprintf (fun msg -> Lwt.return (Or_error.error_string msg)) fmt + +let error msg x sox : _ t = + Lwt.return Or_error.(error msg x sox) + +let error_string msg : _ t = + Lwt.return Or_error.(error_string msg) + +let unimplemented msg : _ t = + Lwt.return Or_error.(unimplemented msg) + +let combine_errors errs : 'a list t = + let open Lwt in (* in Async they use sequential map *) + Lwt_list.map_p ident errs >>= fun errs -> + return Or_error.(combine_errors errs) + +let combine_errors_unit errs : unit t = + let open Lwt in + combine_errors errs >>= function + | Ok _ -> return (Ok ()) + | Error err -> Lwt.return (Error err) + +let ok_unit = Lwt.return (Ok ()) +let ok_true = Lwt.return (Ok true) +let ok_false = Lwt.return (Ok false) +let ok_nil = Lwt.return (Ok []) + +let try_with ?(backtrace=false) f : 'a t = + Lwt.try_bind f + (fun r -> Lwt.return (Ok r)) + (fun exn -> + let backtrace = if backtrace then Some `Get else None in + fail (Error.of_exn ?backtrace exn)) + +let of_exn ?backtrace exn = + fail (Error.of_exn ?backtrace exn) + +let try_with_join ?backtrace (f : unit -> 'a t) : 'a t = + let open Lwt in + try_with ?backtrace f >>= fun err -> return (Or_error.join err) + +module Seq = Lift_sequence(M) +module List = Lift_list(M) +include Monad.Make(Basic) diff --git a/lwt/core_lwt_or_error.mli b/lwt/core_lwt_or_error.mli new file mode 100644 index 000000000..1f31ce215 --- /dev/null +++ b/lwt/core_lwt_or_error.mli @@ -0,0 +1,36 @@ +open Core_kernel.Std +open Core_lwt_container_intf + +type 'a t = 'a Or_error.t Lwt.t + +include Monad with type 'a t := 'a t + +val fail : Error.t -> _ t +val of_exn : ?backtrace:[`Get | `This of string] -> exn -> _ t +val errorf : ('a, unit, string, _ t) format4 -> 'a +val error : string -> 'a -> ('a -> Sexp.t) -> _ t +val error_string : string -> _ t +val unimplemented : string -> _ t +val combine_errors : 'a t list -> 'a list t +val combine_errors_unit : unit t list -> unit t + +val ok_unit : unit t +val ok_true : bool t +val ok_false : bool t +val ok_nil : 'a list t + + +val try_with + : ?backtrace:bool (** default is [false] *) + -> (unit -> 'a Lwt.t) + -> 'a t + +val try_with_join + : ?backtrace:bool (** default is [false] *) + -> (unit -> 'a t) + -> 'a t + +module List : Monad_sequence with type 'a monad := 'a t + and type 'a t := 'a list +module Seq : Monad_sequence with type 'a monad := 'a t + and type 'a t := 'a Sequence.t diff --git a/lwt/core_lwt_pool.ml b/lwt/core_lwt_pool.ml new file mode 100644 index 000000000..2b11cacb5 --- /dev/null +++ b/lwt/core_lwt_pool.ml @@ -0,0 +1,2 @@ +include Lwt_pool +let use pool ~f = use pool f diff --git a/lwt/core_lwt_pool.mli b/lwt/core_lwt_pool.mli new file mode 100644 index 000000000..7e2812a73 --- /dev/null +++ b/lwt/core_lwt_pool.mli @@ -0,0 +1,28 @@ +(** Creating pools (for example pools of connections to a database). *) + +(** Instead of creating a new connection each time you need one, + keep a pool of opened connections and reuse opened connections + that are free. +*) + +type 'a t +(** [create n ?check ?validate f] creates a new pool with at most + [n] members. [f] is the function to use to create a new pool + member. + + An element of the pool is validated by the optional [validate] + function before its {!use}. Invalid elements are re-created. + + The optional function [check] is called after a [use] of an + element failed. It must call its argument exactly once with + [true] if the pool member is still valid and [false] + otherwise. *) +val create : + int -> + ?check : ('a -> (bool -> unit) -> unit) -> + ?validate : ('a -> bool Lwt.t) -> + (unit -> 'a Lwt.t) -> 'a t + +val use : 'a t -> f:('a -> 'b Lwt.t) -> 'b Lwt.t +(** [use p ~f] takes one free member of the pool [p] and gives it to + the function [f]. *) diff --git a/lwt/core_lwt_stream.ml b/lwt/core_lwt_stream.ml new file mode 100644 index 000000000..af91f3d57 --- /dev/null +++ b/lwt/core_lwt_stream.ml @@ -0,0 +1,68 @@ +open Core_kernel.Std +open Core_lwt_container_intf +open Lwt + + +module Lwt_or_error = Core_lwt_or_error + +include Lwt_stream + + +let wrap_push push = + (); fun msg -> Or_error.try_with (fun () -> push msg) + + +let create () = + let stream, push = create () in + stream, wrap_push push + +let create_with_reference () = + let stream, push, refer = create_with_reference () in + stream, wrap_push push, refer + + +let next s = Lwt_or_error.try_with (fun () -> next s) +let last_new s = Lwt_or_error.try_with (fun () -> last_new s) +let junk ?(n=1) s = njunk n s + +let map ~f = map f +let map_s ~f = map_s f +let filter ~f = filter f +let filter_s ~f = filter_s f +let filter_map ~f = filter_map f +let filter_map_s ~f = filter_map_s f +let map_list ~f = map_list f +let map_list_s ~f = map_list_s f +let fold s ~f ~init = fold (fun x z -> f z x) s init +let fold_s s ~f ~init = fold_s (fun x z -> f z x) s init +let iter ~f = iter f +let iter_s ~f = iter_s f +let iter_p ~f = iter_p f + +module Push_queue = struct + type 'a t = 'a bounded_push + let size q = q#size + let resize q = q#resize + let push q x = + Lwt_or_error.try_with (fun () -> q#push x) + + let push_all q xs = + Lwt_or_error.List.iter xs ~f:(push q) + + let wrap (q : 'a t) ~f : 'b t = object + method size = q#size + method resize n = q#resize n + method push x = q#push (f x) + method close = q#close + method count = q#count + method closed = q#closed + method blocked = q#blocked + method set_reference : 'a. 'a -> unit = q#set_reference + end + + let close q = q#close + let length q = q#size + let blocked q = q#blocked + let closed q = q#closed + let set_reference (q : 'a t) (x : 'b) = q#set_reference x +end diff --git a/lwt/core_lwt_stream.mli b/lwt/core_lwt_stream.mli new file mode 100644 index 000000000..865c81aad --- /dev/null +++ b/lwt/core_lwt_stream.mli @@ -0,0 +1,271 @@ +open Core_kernel.Std +open Core_lwt_container_intf +open Lwt_stream + +(** Data streams *) + +type 'a t = 'a Lwt_stream.t +(** Type of a stream holding values of type ['a] *) + +(** Type of sources for bounded push-streams. *) +type 'a bounded_push + + +(** Naming convention: in this module all function taking a function + which is applied to all element of the streams are suffixed by: + + - [_s] when the function returns a thread and calls are serialised + - [_p] when the function returns a thread and calls are parallelised +*) + +(** {2 Construction} *) + +(** [from f] creates an stream from the given input function. [f] is + called each time more input is needed, and the stream ends when + [f] returns [None]. *) +val from : (unit -> 'a option Lwt.t) -> 'a t + +(** [from_direct f] does the same as {!from} but with a function + that does not return a thread. It is better than wrapping [f] + into a function which return a thread. *) +val from_direct : (unit -> 'a option) -> 'a t + +val create : unit -> 'a t * ('a option -> unit Or_error.t) +(** [create ()] returns a new stream and a push function. *) + +val create_with_reference : unit -> 'a t * ('a option -> unit Or_error.t) * ('b -> unit) +(** [create_with_reference ()] returns a new stream and a push + function. The last function allows to set a reference to an + external source. This prevent the external source from being + garbage collected. + + For example, to convert a reactive event to a stream: + + {[ + let stream, push, set_ref = Lwt_stream.create_with_reference () in + set_ref (map_event push event) + ]} +*) + +(** [create_bounded size] returns a new stream and a bounded push + source. The stream can hold a maximum of [size] elements. When + this limit is reached, pushing a new element will block until + one is consumed. + + Note that you cannot clone or parse (with {!parse}) a bounded + stream. These functions will raise [Invalid_argument] if you try + to do so. + + It raises [Invalid_argument] if [size < 0]. *) +val create_bounded : int -> 'a t * 'a bounded_push + +(** [of_list l] creates a stream returning all elements of [l] *) +val of_list : 'a list -> 'a t + +(** [of_array a] creates a stream returning all elements of [a] *) +val of_array : 'a array -> 'a t + +(** [of_string str] creates a stream returning all characters of + [str] *) +val of_string : string -> char t + + +(** [clone st] clone the given stream. Operations on each stream + will not affect the other. + + For example: + + {[ + # let st1 = Lwt_stream.of_list [1; 2; 3];; + val st1 : int Lwt_stream.t = + # let st2 = Lwt_stream.clone st1;; + val st2 : int Lwt_stream.t = + # lwt x = Lwt_stream.next st1;; + val x : int = 1 + # lwt y = Lwt_stream.next st2;; + val y : int = 1 + ]} + + It raises [Invalid_argument] if [st] is a bounded + push-stream. *) +val clone : 'a t -> 'a t + +(** {2 Destruction} *) + +(** Returns the list of elements of the given stream. + Returns an empty list if the stream is closed, *) +val to_list : 'a t -> 'a list Lwt.t + + +(** Returns the word composed of all characters of the given + stream. Returns an empty string if the stream is closed. *) +val to_string : char t -> string Lwt.t + +(** {2 Data retreival} *) + +val peek : 'a t -> 'a option Lwt.t +(** [peek st] returns the first element of the stream, if any, + without removing it. *) + +val npeek : int -> 'a t -> 'a list Lwt.t +(** [npeek n st] returns at most the first [n] elements of [st], + without removing them. *) + +val get : 'a t -> 'a option Lwt.t +(** [get st] remove and returns the first element of the stream, if + any. *) + +val nget : int -> 'a t -> 'a list Lwt.t +(** [nget n st] remove and returns at most the first [n] elements of + [st]. *) + +val get_while : ('a -> bool) -> 'a t -> 'a list Lwt.t +val get_while_s : ('a -> bool Lwt.t) -> 'a t -> 'a list Lwt.t +(** [get_while f st] returns the longest prefix of [st] where all + elements satisfy [f]. *) + +(** [next st] remove and returns the next element of the stream, of + fails if the stream is empty. *) +val next : 'a t -> 'a Or_error.t Lwt.t + +(** [last_new st] returns the last element that can be obtained + without sleepping, or wait for one if no one is already + available. + + If fails if the stream has no more elements *) +val last_new : 'a t -> 'a Or_error.t Lwt.t + +(** [junk st] remove the first element of [st]. *) +val junk : ?n:int -> 'a t -> unit Lwt.t + +(** [junk_while f st] removes all elements at the beginning of the + streams which satisfy [f]. *) +val junk_while : ('a -> bool) -> 'a t -> unit Lwt.t +val junk_while_s : ('a -> bool Lwt.t) -> 'a t -> unit Lwt.t + +(** [junk_old st] removes all elements that are ready to be read + without yeilding from [st]. + + For example the [read_password] function of [Lwt_read_line] use + that to junk key previously typed by the user. +*) +val junk_old : 'a t -> unit Lwt.t + +(** [get_available st] returns all available elements of [l] without + blocking *) +val get_available : 'a t -> 'a list + +(** [get_available_up_to n st] returns up to [n] elements of [l] + without blocking *) +val get_available_up_to : int -> 'a t -> 'a list + +(** [is_empty st] returns whether the given stream is empty *) +val is_empty : 'a t -> bool Lwt.t + +val on_terminate : 'a t -> (unit -> unit) -> unit +(** [on_terminate st f] executes [f] when the end of the stream [st] + is reached. Note that the stream may still contains elements if + {!peek} or similar was used. *) + +(** {2 Stream transversal} *) + +(** Note: all the following functions are destructive. + + For example: + + {[ + # let st1 = Lwt_stream.of_list [1; 2; 3];; + val st1 : int Lwt_stream.t = + # let st2 = Lwt_stream.map string_of_int st1;; + val st2 : string Lwt_stream.t = + # lwt x = Lwt_stream.next st1;; + val x : int = 1 + # lwt y = Lwt_stream.next st2;; + val y : string = "2" + ]} +*) + + + +(** [choose l] creates an stream from a list of streams. The + resulting stream will returns elements returned by any stream of + [l] in an unspecified order. *) +val choose : 'a t list -> 'a t + +(** [combine s1 s2] combine two streams. The stream will ends when + the first stream ends. *) +val combine : 'a t -> 'b t -> ('a * 'b) t + +(** [append s1 s2] returns a stream which returns all elements of + [s1], then all elements of [s2] *) +val append : 'a t -> 'a t -> 'a t + +(** [concat st] returns the concatenation of all streams of [st]. *) +val concat : 'a t t -> 'a t + +(** [flatten st = map_list (fun l -> l) st] *) +val flatten : 'a list t -> 'a t + +(** [map f st] maps the value returned by [st] with [f] *) +val map : f:('a -> 'b) -> 'a t -> 'b t +val map_s : f:('a -> 'b Lwt.t) -> 'a t -> 'b t + +val filter : f:('a -> bool) -> 'a t -> 'a t +val filter_s : f:('a -> bool Lwt.t) -> 'a t -> 'a t + +val filter_map : f:('a -> 'b option) -> 'a t -> 'b t +val filter_map_s : f:('a -> 'b option Lwt.t) -> 'a t -> 'b t + +(** [map_list f st] applies [f] on each element of [st] and flattens + the lists returned *) +val map_list : f:('a -> 'b list) -> 'a t -> 'b t +val map_list_s : f:('a -> 'b list Lwt.t) -> 'a t -> 'b t + +val fold : 'a t -> f:('b -> 'a -> 'b) -> init:'b -> 'b Lwt.t +val fold_s : 'a t -> f:('b -> 'a -> 'b Lwt.t) -> init:'b -> 'b Lwt.t +(** [fold f s x] fold_like function for streams. *) + +(** [iter f s] iterates over all elements of the stream *) +val iter : f:('a -> unit) -> 'a t -> unit Lwt.t +val iter_p : f:('a -> unit Lwt.t) -> 'a t -> unit Lwt.t +val iter_s : f:('a -> unit Lwt.t) -> 'a t -> unit Lwt.t + + +module Push_queue : sig + + type 'a t = 'a bounded_push + + val wrap : 'a t -> f:('b -> 'a) -> 'b t + + (** Size of the stream. *) + val size : _ t -> int + + (** Change the size of the stream queue. Note that the new size + can smaller than the current stream queue size. + + It raises [Invalid_argument] if [size < 0]. *) + val resize : _ t -> int -> unit + + (** Pushes a new element to the stream. If the stream is full then + it will block until one element is consumed. Fails if another thread + is already blocked on {!push}. *) + val push : 'a t -> 'a -> unit Or_error.t Lwt.t + + val push_all : 'a t -> 'a list -> unit Or_error.t Lwt.t + + (** Closes the stream. Any thread currently blocked on {!push} + will fail with {!Closed}. *) + val close : _ t -> unit + + (** Number of elements in the stream queue. *) + val length : _ t -> int + + (** Is a thread is blocked on {!push} ? *) + val blocked : _ t -> bool + + (** Is the stream closed ? *) + val closed : _ t -> bool + + (** Set the reference to an external source. *) + val set_reference : _ t -> 'b -> unit +end diff --git a/myocamlbuild.ml.in b/myocamlbuild.ml.in index 99bc08485..1784542d0 100644 --- a/myocamlbuild.ml.in +++ b/myocamlbuild.ml.in @@ -41,5 +41,15 @@ let dispatch = function ["ocamldep"; "compile"; "doc"]; | _ -> () -let () = Ocamlbuild_plugin.dispatch (fun hook -> - dispatch hook; dispatch_default hook) + +let mark_tags () = + let open Ocamlbuild_plugin in + mark_tag_used "pkg_core_bench"; + mark_tag_used "tests"; + mark_tag_used "pkg_piqirun" + + +let () = + mark_tags (); + Ocamlbuild_plugin.dispatch (fun hook -> + dispatch hook; dispatch_default hook) diff --git a/preconfig.ml b/preconfig.ml index d66db9b07..1794932ca 100644 --- a/preconfig.ml +++ b/preconfig.ml @@ -21,12 +21,12 @@ let search_stop dst = let patch (src_file,dst_file) = let src = open_in_bin src_file in let patch_len = in_channel_length src in - let patch = String.create patch_len in + let patch = String.make patch_len '\x00' in really_input src patch 0 patch_len; close_in src; let dst = open_in_bin dst_file in let dst_len = in_channel_length dst in - let buf = String.create (patch_len + dst_len) in + let buf = String.make (patch_len + dst_len) '\x00' in really_input dst buf 0 dst_len; close_in dst; let pos = try search_stop buf with End_of_file -> dst_len in diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 000000000..a287215af --- /dev/null +++ b/python/__init__.py @@ -0,0 +1,104 @@ +r"""Python inteface to BAP. + +In a few keystrokes: + + >>> import bap + >>> print '\n'.join(insn.asm for insn in bap.disasm("\x48\x83\xec\x08")) + decl %eax + subl $0x8, %esp + +A more complex example: + + >>> img = bap.image('coreutils_O0_ls') + >>> sym = img.get_symbol('main') + >>> print '\n'.join(insn.asm for insn in bap.disasm(sym)) + push {r11, lr} + add r11, sp, #0x4 + sub sp, sp, #0xc8 + ... ... + +Bap package exposes two functions: + +#. ``disasm`` returns a disassembly of the given object +#. ``image`` loads given file + +Disassembling things +==================== + +``disasm`` is a swiss knife for disassembling things. It takes either a +string object, or something returned by an ``image`` function, e.g., +images, sections and symbols. + +``disasm`` function returns a generator yielding instances of class +``Insn`` defined in module :mod:`asm`. It has the following attributes: + +* name - instruction name, as undelying backend names it +* addr - address of the first byte of instruction +* size - overall size of the instruction +* operands - list of instances of class ``Op`` +* asm - assembler string, in native assembler +* kinds - instruction meta properties, see :mod:`asm` +* target - instruction lifter to a target platform, e.g., see :mod:`arm` +* bil - a list of BIL statements, describing instruction semantics. + +``disasm`` function also accepts a bunch of keyword arguments, to name a few: + +* server - either an url to a bap server or a dictionay containing port + and/or executable name +* arch +* endian (instance of ``bil.Endian``) +* addr (should be an instance of type ``bil.Int``) +* backend +* stop_conditions + +All attributes are self-describing I hope. ``stop_conditions`` is a list of +``Kind`` instances defined in :mod:`asm`. If disassembler meets instruction +that is instance of one of this kind, it will stop. + +Reading files +============= + +To read and analyze file one should load it with ``image`` +function. This function returns an instance of class ``Image`` that +allows one to discover information about the file, and perform different +queries. It has function ``get_symbol`` function to lookup symbol in +file by name, and the following set of attributes (self describing): + +* arch +* entry_point +* addr_size +* endian +* file (file name) +* sections + +Sections is a list of instances of ``Section`` class, that also has a +``get_symbol`` function and the following attributes: + +* name +* perm (a list of ['r', 'w', 'x']) +* addr +* size +* memory +* symbols + +Symbols is a list of, you get it, ``Symbol`` class, each having the +following attributes: + +* name +* is_function +* is_debug +* addr +* chunks + +Where chunks is a list of instances of ``Memory`` class, each having the +following attributes: + +* addr +* size +* data + +Where data is actual string of bytes. +""" +__all__ = ['disasm', 'image'] + +from .bap import disasm, image diff --git a/python/adt.py b/python/adt.py new file mode 100644 index 000000000..97b0ddf17 --- /dev/null +++ b/python/adt.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +""" +Algebraic Data Types (ADT) is used to represent two kinds of things: + +1. A discrimintated union of types, called sum +2. A combination of some types, called product. + +# Sum types + +Sum types represents a concept of generalizing. For example, +on ARM R0 and R1 are all general purpose registers (GPR). Also on ARM +we have Condition Code registers (CCR) : + + class Reg(ADT) : pass + class GPR(Reg) : pass + class CCR(Reg) : pass + class R0(GPR) : pass + class R1(GPR) : pass + + +That states that a register can be either R0 or R1, but not both. + +# Product types + +Product types represent a combination of other types. For example, +mov instruction has two arguments, and the arguments are also ADT's by +itself: + + def Insn(ADT) : pass + def Mov(Insn) : pass + + Mov(R0(), R1()) + + +# Comparison + +ADT objects are compared structurally: if they have the same class and +and their values are structurally the same, then they are equal, i.e., + + assert(R0() == R0()) + assert(R1() != R0()) + +""" + +from collections import Iterable + +class ADT(object): + """ Algebraic Data Type. + + This is a base class for all ADTs. ADT represented by a tuple of arguments, + stored in a val field. Arguments should be instances of ADT class, or numbers, + or strings. Empty set of arguments is permitted. + A one-tuple is automatically untupled, i.e., `Int(12)` has value `12`, not `(12,)`. + For convenience, a name of the constructor is provided in `name` field. + + A structural comparison is provided. + + """ + def __init__(self, *args): + self.name = self.__class__.__name__ + self.val = args if len(args) != 1 else args[0] + + def __cmp__(self,other): + return self.__dict__.__cmp__(other.__dict__) + + def __repr__(self): + def qstr(x): + if isinstance(x, int) or isinstance(x, ADT): + return str(x) + else: + return '"{0}"'.format(x) + def args(): + if isinstance(self.val, tuple): + return ", ".join(qstr(x) for x in self.val) + else: + return qstr(self.val) + + return "{0}({1})".format(self.name, args()) + + +class Visitor(object): + """ ADT Visitor. + This class helps to perform iterations over arbitrary ADTs. + + This visitor supports, subtyping, i.e. you can match not only on + leaf constructors, but also on their bases. For example, with + the `Exp` hierarchy, provided below, you can visit all binary operators, + by overriding `visit_BinOp` method. See `run` method description for + more infromation. + """ + + def visit_ADT(self, adt): + """Default visitor. + + This method will be called for those data types that has + no specific visitors. It will recursively descent into all + ADT values. + """ + if isinstance(adt.val, tuple): + for e in adt.val: + self.run(e) + + def run(self, adt): + """ADT.run(adt-or-iterable) -> None + + if adt is iterable, the run is called recursively for each member + of adt. + + Otherwise, for an ADT of type C the method `visit_C` is looked up in the + visitors methods dictionary. If it doesn't exist, then `visit_B` is + looked up, where `D` is the base class of `C`. The process continues, + until the method is found. This is guaranteed to terminate, + since visit_ADT method is defined. + + Note: Non ADTs will be silently ignored. + + Once the method is found it is called. It is the method's responsiblity + to recurse into sub-elements, e.g., call run method. + + For example, suppose that we want to count negative values in a given + BIL expression: + + class CountNegatives(Visitor): + def __init__(self): + self.neg = False + self.count = 0 + + def visit_Int(self, int): + if int.val < 0 and not self.neg \ + or int.val > 0 and self.neg: + self.count += 1 + + def visit_NEG(self, op): + was = self.neg + self.neg = not was + self.run(op.val) + self.neg = was + + We need to keep track on the unary negation operator, and, of + course, we need to look for immediates, so we override two methods: + visit_Int for Int constructor and visit_NEG for counting unary minuses. + (Actually we should count for bitwise NOT operation also, since it will + change the sign bit also, but lets forget about it for the matter of the + excercise (and it can be easily fixed just by matching visit_UnOp)). + + When we hit visit_NEG we toggle current sign, storing its previous value + and recurse into the operand. After we return from the recursion, we restore + the sign. + """ + if isinstance(adt, Iterable): + for s in adt: + self.run(s) + if isinstance(adt, ADT): + for c in adt.__class__.mro(): + name = ("visit_%s" % c.__name__) + fn = getattr(self, name, None) + if fn is not None: + return fn(adt) + + +if __name__ == "__main__": + class Fruit(ADT) : pass + class Bannana(Fruit) : pass + class Apple(Fruit) : pass + + assert(Bannana() == Bannana()) + assert(Bannana() != Apple()) + assert( Apple() < Bannana()) diff --git a/python/arm.py b/python/arm.py new file mode 100644 index 000000000..a3d7b1e08 --- /dev/null +++ b/python/arm.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python + +"""Lifted ARM instruction""" + +from adt import * +from asm import * +from bil import * + +class Reg(ADT) : pass +class Nil(Reg) : pass +class GPR(Reg) : pass +class CCR(Reg) : pass + +class R0(GPR) : pass +class R1(GPR) : pass +class R2(GPR) : pass +class R3(GPR) : pass +class R4(GPR) : pass +class R5(GPR) : pass +class R6(GPR) : pass +class R7(GPR) : pass +class R8(GPR) : pass +class R9(GPR) : pass +class R10(GPR) : pass +class R11(GPR) : pass +class R12(GPR) : pass +class LR(GPR) : pass +class PC(GPR) : pass +class SP(GPR) : pass + +class CPSR(CCR) : pass +class SPSR(CCR) : pass +class ITSTATE(CCR) : pass + +class Insn(ADT) : pass +class Move(Insn) : pass +class Bits(Insn) : pass +class Mult(Insn) : pass +class Mem(Insn) : pass +class Branch(Insn) : pass +class Special(Insn) : pass + +class ADCri(Move) : pass +class ADCrr(Move) : pass +class ADCrsi(Move) : pass +class ADCrsr(Move) : pass +class ADDri(Move) : pass +class ADDrr(Move) : pass +class ADDrsi(Move) : pass +class ADDrsr(Move) : pass +class ANDri(Move) : pass +class ANDrr(Move) : pass +class ANDrsi(Move) : pass +class ANDrsr(Move) : pass +class BICri(Move) : pass +class BICrr(Move) : pass +class BICrsi(Move) : pass +class BICrsr(Move) : pass +class CMNri(Move) : pass +class CMNzrr(Move) : pass +class CMNzrsi(Move) : pass +class CMNzrsr(Move) : pass +class CMPri(Move) : pass +class CMPrr(Move) : pass +class CMPrsi(Move) : pass +class CMPrsr(Move) : pass +class EORri(Move) : pass +class EORrr(Move) : pass +class EORrsi(Move) : pass +class EORrsr(Move) : pass +class MOVTi16(Move) : pass +class MOVi(Move) : pass +class MOVi16(Move) : pass +class MOVr(Move) : pass +class MOVsi(Move) : pass +class MOVsr(Move) : pass +class MVNi(Move) : pass +class MVNr(Move) : pass +class MVNsi(Move) : pass +class MVNsr(Move) : pass +class ORRri(Move) : pass +class ORRrr(Move) : pass +class ORRrsi(Move) : pass +class ORRrsr(Move) : pass +class RSBri(Move) : pass +class RSBrr(Move) : pass +class RSBrsi(Move) : pass +class RSBrsr(Move) : pass +class RSCri(Move) : pass +class RSCrr(Move) : pass +class RSCrsi(Move) : pass +class RSCrsr(Move) : pass +class SBCri(Move) : pass +class SBCrr(Move) : pass +class SBCrsi(Move) : pass +class SBCrsr(Move) : pass +class SUBri(Move) : pass +class SUBrr(Move) : pass +class SUBrsi(Move) : pass +class SUBrsr(Move) : pass +class TEQri(Move) : pass +class TEQrr(Move) : pass +class TEQrsi(Move) : pass +class TEQrsr(Move) : pass +class TSTri(Move) : pass +class TSTrr(Move) : pass +class TSTrsi(Move) : pass +class TSTrsr(Move) : pass + +class BFC(Bits) : pass +class BFI(Bits) : pass +class PKHTB(Bits) : pass +class RBIT(Bits) : pass +class SBFX(Bits) : pass +class SWPB(Bits) : pass +class SXTAB(Bits) : pass +class SXTAH(Bits) : pass +class SXTB(Bits) : pass +class SXTH(Bits) : pass +class UBFX(Bits) : pass +class UXTAB(Bits) : pass +class UXTAH(Bits) : pass +class UXTB(Bits) : pass +class UXTH(Bits) : pass +class REV(Bits) : pass +class REV16(Bits) : pass +class CLZ(Bits) : pass + + +class MLA(Mult) : pass +class MLS(Mult) : pass +class MUL(Mult) : pass +class SMLABB(Mult) : pass +class SMLAD(Mult) : pass +class SMLAL(Mult) : pass +class SMLALBT(Mult) : pass +class SMLAWB(Mult) : pass +class SMUAD(Mult) : pass +class SMULBB(Mult) : pass +class SMULL(Mult) : pass +class SMULTB(Mult) : pass +class UMLAL(Mult) : pass +class UMULL(Mult) : pass + +class LDMDA(Mem) : pass +class LDMDA_UPD(Mem) : pass +class LDMDB(Mem) : pass +class LDMDB_UPD(Mem) : pass +class LDMIA(Mem) : pass +class LDMIA_UPD(Mem) : pass +class LDMIB(Mem) : pass +class LDMIB_UPD(Mem) : pass +class STMDA(Mem) : pass +class STMDA_UPD(Mem) : pass +class STMDB(Mem) : pass +class STMDB_UPD(Mem) : pass +class STMIA(Mem) : pass +class STMIA_UPD(Mem) : pass +class STMIB(Mem) : pass +class STMIB_UPD(Mem) : pass +class LDRBT_POST_IMM(Mem) : pass +class LDRBT_POST_REG(Mem) : pass +class LDRB_POST_IMM(Mem) : pass +class LDRB_POST_REG(Mem) : pass +class LDRB_PRE_IMM(Mem) : pass +class LDRB_PRE_REG(Mem) : pass +class LDRBi12(Mem) : pass +class LDRBrs(Mem) : pass +class LDRD(Mem) : pass +class LDRD_POST(Mem) : pass +class LDRD_PRE(Mem) : pass +class LDREX(Mem) : pass +class LDREXB(Mem) : pass +class LDREXD(Mem) : pass +class LDREXH(Mem) : pass +class LDRH(Mem) : pass +class LDRHTr(Mem) : pass +class LDRH_POST(Mem) : pass +class LDRH_PRE(Mem) : pass +class LDRSB(Mem) : pass +class LDRSBTr(Mem) : pass +class LDRSB_POST(Mem) : pass +class LDRSB_PRE(Mem) : pass +class LDRSH(Mem) : pass +class LDRSHTi(Mem) : pass +class LDRSHTr(Mem) : pass +class LDRSH_POST(Mem) : pass +class LDRSH_PRE(Mem) : pass +class LDRT_POST_REG(Mem) : pass +class LDR_POST_IMM(Mem) : pass +class LDR_POST_REG(Mem) : pass +class LDR_PRE_IMM(Mem) : pass +class LDR_PRE_REG(Mem) : pass +class LDRi12(Mem) : pass +class LDRrs(Mem) : pass +class STRBT_POST_IMM(Mem) : pass +class STRBT_POST_REG(Mem) : pass +class STRB_POST_IMM(Mem) : pass +class STRB_POST_REG(Mem) : pass +class STRB_PRE_IMM(Mem) : pass +class STRB_PRE_REG(Mem) : pass +class STRBi12(Mem) : pass +class STRBrs(Mem) : pass +class STRD(Mem) : pass +class STRD_POST(Mem) : pass +class STRD_PRE(Mem) : pass +class STREX(Mem) : pass +class STREXB(Mem) : pass +class STREXD(Mem) : pass +class STREXH(Mem) : pass +class STRH(Mem) : pass +class STRHTr(Mem) : pass +class STRH_POST(Mem) : pass +class STRH_PRE(Mem) : pass +class STRT_POST_REG(Mem) : pass +class STR_POST_IMM(Mem) : pass +class STR_POST_REG(Mem) : pass +class STR_PRE_IMM(Mem) : pass +class STR_PRE_REG(Mem) : pass +class STRi12(Mem) : pass +class STRrs(Mem) : pass + +class BL(Branch) : pass +class BLX(Branch) : pass +class BLX_pred(Branch) : pass +class BLXi(Branch) : pass +class BL_pred(Branch) : pass +class BX(Branch) : pass +class BX_RET(Branch) : pass +class BX_pred(Branch) : pass +class Bcc(Branch) : pass + +class CPS2p(Special) : pass +class DMB(Special) : pass +class DSB(Special) : pass +class HINT(Special) : pass +class MRS(Special) : pass +class MSR(Special) : pass +class PLDi12(Special) : pass +class SVC(Special) : pass + + +def loads(s): + return eval(s) diff --git a/python/asm.py b/python/asm.py new file mode 100644 index 000000000..20afc5037 --- /dev/null +++ b/python/asm.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +"""Disassembled instuctions""" + +from adt import ADT + +class Kind(ADT) : pass +class Having_side_effects(Kind) : pass +class Affecting_control(Kind) : pass +class Branch(Affecting_control) : pass +class Conditional_branch(Branch) : pass +class Unconditional_branch(Branch) : pass +class Indirect_branch(Branch) : pass +class Return(Affecting_control) : pass +class Call(Affecting_control) : pass +class Barrier(Affecting_control) : pass +class Terminator(Affecting_control): pass +class May_affect_control_flow(Affecting_control) : pass +class May_load(Having_side_effects) : pass +class May_store(Having_side_effects) : pass +class Valid(Kind) : pass + + +def eval_if_not_adt(s): + if isinstance(s, ADT): + return s + else: + return eval(s) + + +def map_eval(ss): + return [eval_if_not_adt(s) for s in ss] + + + +class Insn(object) : + def __init__(self, name, addr, size, asm, kinds, operands, target=None, bil=[], **kw): + self.name = name + self.addr = int(addr) + self.size = int(size) + self.operands = map_eval(operands) + self.asm = str(asm) + self.kinds = map_eval(kinds) + self.target = target + self.bil = bil + self.__dict__.update(kw) + + def __repr__(self): + return 'Insn("{name}", {addr:#010x}, {size}, "{asm}", {kinds}, {operands}, {target}, {bil})'.\ + format(**self.__dict__) + +class Op(ADT) : pass +class Reg(Op) : pass +class Imm(Op) : pass +class Fmm(Op) : pass + + +if __name__ == "__main__": + print Reg('R0') + for insn in ["Reg(\"R0\")", "Imm(5)", "Imm(14)", "Reg(\"Nil\")", "Reg(\"Nil\")"]: + print eval(insn) diff --git a/python/bap.py b/python/bap.py new file mode 100644 index 000000000..f7359ea45 --- /dev/null +++ b/python/bap.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os, time, atexit +import requests +from subprocess import Popen +from mmap import mmap +from urlparse import urlparse, parse_qs +from tempfile import NamedTemporaryFile +import json +import adt, arm, asm, bil + + +__all__ = ["disasm", "image"] + +DEBUG_LEVEL = ["Critical", "Error"] + +instance = None + +def del_instance(): + if instance is not None: + instance.close() + +def get_instance(**kwargs): + global instance + if 'server' in kwargs or instance is None: + if instance is not None: + instance.close() + args = kwargs.get('server', {}) + instance = Bap(args) + return instance + +atexit.register(del_instance) + +def disasm(obj, **kwargs): + r""" disasm(obj) disassembles provided object. + Returns a generator object yield instructions. + + """ + def ret(obj): + return get_instance(**kwargs).insns(obj) + if isinstance(obj, Id): + return ret(obj) + elif isinstance(obj, Resource): + return ret(obj.ident) + else: + return ret(load_chunk(obj, **kwargs)) + +def image(f, **kwargs): + bap = get_instance(**kwargs) + if os.path.isfile(f) and not os.path.isabs(f): + f = os.path.abspath(f) + return Image(bap.load_file(f), bap) + +def load_chunk(s, **kwargs): + return get_instance(**kwargs).load_chunk(s, **kwargs) + + +class Resource(object): + def __init__(self, name, ident, bap): + self.ident = Id(ident) + self.bap = bap + self.msg = None + self._name = name + + def load(self): + if self.msg is None: + self.msg = self.bap.get_resource(self.ident) + if not self._name in self.msg: + if 'error' in msg: + raise ServerError(response) + else: + msg = "Expected {0} msg but got {1}".format( + self._name, msg) + raise RuntimeError(msg) + + def get(self, child): + self.load() + return self.msg[self._name].get(child) + + +class Image(Resource): + def __init__(self, ident, bap): + super(Image,self).__init__('image', ident, bap) + + def load_sections(self): + ss = self.get('sections') + self.sections = [Section(s, self) for s in ss] + + def get_symbol(self, name, d=None): + for sec in self.sections: + sym = sec.get_symbol(name, d) + if sym is not d: + return sym + return d + + def __getattr__(self, name): + if name == 'sections': + self.load_sections() + return self.sections + else: + return self.get(name) + +class Section(Resource): + def __init__(self, ident, parent): + super(Section, self).__init__('section', ident, parent.bap) + self.parent = parent + + def load_symbols(self): + self.symbols = [Symbol(s, self) for s in self.get('symbols')] + + def get_symbol(self, name, d=None): + try: + return (s for s in self.symbols if s.name == name).next() + except StopIteration: + return d + + def __getattr__(self, name): + if name == 'symbols': + self.load_symbols() + return self.symbols + elif name == 'addr' or name == 'size': + return self.get('memory')[name] + elif name == 'memory': + self.memory = Memory(self.get('memory'), self) + return self.memory + else: + return self.get(name) + +class Symbol(Resource): + def __init__(self, ident, parent): + super(Symbol, self).__init__('symbol', ident, parent.bap) + self.parent = parent + + def load_chunks(self): + self.chunks = [Memory(s, self) for s in self.get('chunks')] + + def __getattr__(self, name): + if name == 'chunks': + self.load_chunks() + return self.chunks + elif name == 'addr': + self.load_chunks() + return self.chunks[0].addr + else: + return self.get(name) + +class Memory(object): + def __init__(self, mem, parent): + self.parent = parent + self.size = int(mem['size']) + self.addr = int(mem['addr']) + self.links = mem['links'] + + def load_data(self): + try: + url = (urlparse(url) for url in self.links + if urlparse(url).scheme == 'mmap').next() + qs = parse_qs(url.query) + offset = int(qs['offset'][0]) + with open(url.path, "rw+b") as f: + mm = mmap(f.fileno(), length=0) + mm.seek(offset) + self.data = mm.read(self.size) + mm.close() + except StopIteration: + self.data = None + + def __getattr__(self, name): + if name == 'data': + self.load_data() + return self.data + raise AttributeError(name) + + +class ServerError(Exception): + def __init__(self, err): + self.msg = str(Error(err)) + + def __str__(self): + return self.msg + +class Error(object): + def __init__(self, err): + self.__dict__.update(err) + self.__dict__.update(err['error']) + + def __str__(self): + return "{severity}: {description}".format(**self.error) + +class Id(object): + def __init__(self, r): + self.value = r + def __str__(self): + return str(self.value) + +RETRIES = 10 + +class Bap(object): + def __init__(self, server={}): + if isinstance(server, dict): + self.__dict__.update(spawn_server(**server)) + else: + self.url = server + + self.last_id = 0 + for attempt in range(RETRIES): + try: + self.capabilities = self.call({'init' : { + 'version' : '0.1'}}) + except Exception: + if attempt + 1 == RETRIES: + raise + else: + time.sleep(0.1 * attempt) + + if not "capabilities" in self.__dict__: + raise RuntimeError("Failed to connect to BAP server") + + self.data = {} + self.temp = NamedTemporaryFile('rw+b') + + def insns(self, src): + res = self.call({'get_insns' : {'resource' : src}}) + for msg in res: + if 'error' in msg: + err = Error(msg) + if err.severity in DEBUG_LEVEL: + print err + else: + return (parse_insn(js) for js in msg['insns']) + + def close(self): + self.__exit__() + + def load_file(self, name): + return self._load_resource({'load_file' : { + 'url' : 'file://' + name}}) + + def get_resource(self, name): + return self.call({'get_resource' : name}).next() + + def load_chunk(self, data, **kwargs): + kwargs.setdefault('url', self.mmap(data)) + kwargs.setdefault('arch', 'x86_32') + kwargs.setdefault('address', bil.Int(0,32)) + kwargs.setdefault('endian', bil.LittleEndian()) + return self._load_resource({'load_memory_chunk' : kwargs}) + + def __exit__(self): + if 'server' in self.__dict__: + self.server.terminate() + self.temp.close() + + def call(self, data): + def dumps(dic): + self.last_id += 1 + dic['id'] = Id(self.last_id) + return json.dumps(dic, default=str) + + if isinstance(data, dict): + method = requests.post + if 'get_insns' or 'get_resource' in data: + method = requests.get + return jsons(method(self.url, data=dumps(data))) + else: + gen = (dumps(msg) for msg in data) + return jsons(requests.post(self.uri, data=gen)) + + def mmap(self, data): + url = "mmap://{0}?offset=0&length={1}".format( + self.temp.name, len(data)) + os.ftruncate(self.temp.fileno(), len(data)) + mm = mmap(self.temp.fileno(), len(data)) + mm.write(data) + mm.close() + return url + + def _load_resource(self, res): + rep = self.call(res).next() + if 'error' in rep: + raise ServerError(rep) + return Id(rep['resource']) + +def spawn_server(**kwargs): + port = kwargs.get('port', 8080) + name = kwargs.get('name', 'bap-server') + server = Popen([name, '--port=' + str(port)]) + return { + 'server' : server, + 'url' : "http://127.0.0.1:{0}".format(port) + } + +def jsons(r, p=0): + dec = json.JSONDecoder(encoding='utf-8') + while True: + obj,p = dec.scan_once(r.text,p) + yield obj + +def parse_target(js): + if 'target' in js: + return arm.loads(js['target']) + else: + return None + +def parse_bil(js): + if 'bil' in js: + return [bil.loads(s) for s in js['bil']] + else: + return None + +def parse_insn(js): + js.update(js['memory'], bil=parse_bil(js), target=parse_target(js)) + return asm.Insn(**js) + +def hexs(data): + return ' '.join(x.encode('hex') for x in data) + +##### Examples + +def demo_chunk(): + data = b"\x48\x83\xec\x08" + insns = disasm(data, arch="x86_64") + print list(insns) + +def demo_image(): + img = image("coreutils_O0_ls") + print "Arch: {1}".format(img.name, img.arch) + print "Sections:" + for sec in img.sections: + print "\t{0}\t{1}".format(sec.name, "".join(sec.perm)) + print "\t\tSymbols:" + for sym in sec.symbols: + print "\t\t\t{0}".format(sym.name) + sym = img.get_symbol('to_uchar') + print "Disassembly of the `{0}` function:".format(sym.name) + for insn in disasm(sym): + print insn.asm + +if "__main__" == __name__: + demo_chunk() + demo_image() diff --git a/python/bil.py b/python/bil.py new file mode 100644 index 000000000..fb13acd09 --- /dev/null +++ b/python/bil.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +"""BAP BIL Python representation""" + +from adt import * + + +class Exp(ADT) : pass # Abstract base for all expressions +class Load(Exp) : pass # Load(mem,idx,endian,size) +class Store(Exp): pass # Store(mem,idx,val,endian,size) +class BinOp(Exp): pass # Abstract base for all binary operators +class UnOp(Exp) : pass # Abstract base for all unary operators +class Var(Exp) : pass # Var(name,type) +class Int(Exp) : pass # Int(int,size) +class Cast(Exp) : pass # Abstract base for all cast operations +class Let(Exp) : pass # Let(var,val,body) +class Unknown(Exp): pass # Unknown(string,type) +class Ite(Exp): pass # Ite (cond,if_true,if_false) +class Extract(Exp): pass # Extract(hb,lb, exp) +class Concat(Exp): pass # Concat(lhs,rhs) + +class Stmt(ADT) : pass # Abstract base for all statements + +class Move(Stmt) : pass # Move(var,exp) +class Jmp(Stmt) : pass # Jmp(exp) +class Special(Stmt): pass # Special (string) +class While(Stmt) : pass # While (cond, exps) +class If(Stmt) : pass # If(cond, yes-exprs, no-exprs) +class CpuExn(Stmt) : pass # CpuExn(n) + +# All BinOps have two operands of type exp +class PLUS (BinOp) : pass +class MINUS (BinOp) : pass +class TIMES (BinOp) : pass +class DIVIDE (BinOp) : pass +class SDIVIDE (BinOp) : pass +class MOD (BinOp) : pass +class SMOD (BinOp) : pass +class LSHIFT (BinOp) : pass +class RSHIFT (BinOp) : pass +class ARSHIFT (BinOp) : pass +class AND (BinOp) : pass +class OR (BinOp) : pass +class XOR (BinOp) : pass +class EQ (BinOp) : pass +class NEQ (BinOp) : pass +class LT (BinOp) : pass +class LE (BinOp) : pass +class SLT (BinOp) : pass +class SLE (BinOp) : pass + +# All UnOps have one operand of type exp +class NEG (UnOp) : pass +class NOT (UnOp) : pass + +# All Casts have two operands: (Int(size),exp) +class UNSIGNED(Cast) : pass +class SIGNED(Cast) : pass +class HIGH(Cast) : pass +class LOW(Cast) : pass + +# Endians doesn't have values +class Endian(ADT) : pass +class LittleEndian(Endian) : pass +class BigEndian(Endian) : pass + +class Type(ADT) : pass # Abstract base for expression type +class Imm(Type) : pass # Imm(size) - immediate value +class Mem(Type) : pass # Mem(addr_size, value_size) + +def loads(s): + return eval(s) + +# A playground. + +if __name__ == "__main__": + + exp = Load(Int(12),Int(14), LittleEndian()) + print exp + exp = Load(exp, exp, BigEndian()) + + + class CountEvens(Visitor): + def __init__(self): + self.count = 0 + + + def visit_Int(self, int): + self.count += 1 + + class CountNegatives(Visitor): + def __init__(self): + self.neg = False + self.count = 0 + + def visit_Int(self, int): + if int.val < 0 and not self.neg \ + or int.val > 0 and self.neg: + self.count += 1 + + def visit_NEG(self, op): + was = self.neg + self.neg = not was + self.run(op.val) + self.neg = was + + print "%s" % exp + counter = CountEvens() + counter.run(exp) + print counter.count + exp = eval("%s" % exp) + print "%s" % exp + counter = CountEvens() + counter.run(exp) + print counter.count + + zero = PLUS((NEG(NEG(Int(-1))), NEG(NEG(Int(1))))) + print zero + + nc = CountNegatives() + nc.run(zero) + print nc.count diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 000000000..6b004a90f --- /dev/null +++ b/python/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python2.7 + +from setuptools import setup + +setup( + name='bap', + version='0.9', + package_dir = {'bap' : '.'}, + packages = ['bap'], + install_requires = ['requests'] +) diff --git a/src/.merlin b/src/.merlin new file mode 100644 index 000000000..a30d9fd73 --- /dev/null +++ b/src/.merlin @@ -0,0 +1,2 @@ +REC +PKG cmdliner \ No newline at end of file diff --git a/src/bap_mc/bap_mc.ml b/src/bap_mc/bap_mc.ml index 335efafec..8298aea53 100644 --- a/src/bap_mc/bap_mc.ml +++ b/src/bap_mc/bap_mc.ml @@ -15,8 +15,8 @@ let create_memory addr s = | Error _ -> raise Create_mem_exn let print_kinds insn = - let output = Insn.kinds insn - |> List.map ~f:(fun kind -> + let output = Insn.kinds insn + |> List.map ~f:(fun kind -> Sexp.to_string_hum (Insn.Kind.sexp_of_t kind)) |> String.concat ~sep:", " in printf "%-4s;; %s\n" " " output @@ -24,21 +24,21 @@ let print_kinds insn = let print_insn insn width o_reg_format o_imm_format = let open Op in let init = [Sexp.Atom (Insn.name insn)] in - let res = + let res = Insn.ops insn |> Array.fold ~init ~f:(fun l x -> match x with - | Reg reg -> + | Reg reg -> if String.(o_reg_format = "code") then Sexp.Atom ("r:" ^ Int.to_string (Reg.code reg)) :: l else Sexp.Atom (Reg.name reg) :: l - | Imm imm -> + | Imm imm -> if String.(o_imm_format = "dec") then let v = match Imm.to_int imm with | Some x -> x | None -> raise @@ Convert_imm_exn (Imm.to_string imm) in Sexp.Atom (Printf.sprintf "%d" v) :: l else Sexp.Atom (Imm.to_string imm) :: l - | Fmm fmm -> + | Fmm fmm -> Sexp.Atom (Fmm.to_string fmm) :: l) in let s = Sexp.to_string @@ Sexp.List (List.rev res) in printf "%-4s%-*s" " " width s @@ -47,7 +47,7 @@ let print_insn insn width o_reg_format o_imm_format = * things consistent *) let print_asm insn f_inst = let s = String.strip @@ Insn.asm insn in - if f_inst + if f_inst then printf "; %s" s else printf "%-4s%s" " " s @@ -72,7 +72,7 @@ let disasm s o_arch f_asm f_inst f_kinds o_reg_format o_imm_format = Disasm.Basic.create ~backend:"llvm" o_arch >>= fun dis -> let input = match input_src with | Some input -> - begin match String.prefix input 2 with + begin match String.prefix input 2 with | "" | "\n" -> exit 0 | "\\x" -> let f = ident in to_bin_str input f | "0x" -> let f = String.substr_replace_all ~pattern:"0x" ~with_:"\\x" @@ -86,8 +86,8 @@ let disasm s o_arch f_asm f_inst f_kinds o_reg_format o_imm_format = let hit = print_disasm width f_asm f_inst f_kinds o_reg_format o_imm_format in let invalid state disasm = raise No_disassembly in - Disasm.Basic.run dis ~return:ident ~stop_on:[`valid] ~invalid ~hit ~init:() - mem_of_input; + Disasm.Basic.run dis ~return:ident ~stop_on:[`Valid] ~invalid ~hit ~init:() + mem_of_input; return () open Cmdliner @@ -136,28 +136,28 @@ let cmd = "echo \"0x31 0xd2 0x48 0xf7 0xf3\" | bap-mc --show-inst --show-asm"); `S "SEE ALSO"; `P "$(llvm-mc)"] in - Term.(pure disasm $ hex_str $ o_arch $ f_asm $ f_inst $ f_kinds + Term.(pure disasm $ hex_str $ o_arch $ f_asm $ f_inst $ f_kinds $ o_reg_format $ o_imm_format), Term.info "bap-mc" ~doc ~man ~version:"1.0" let () = - Plugins.load (); + Plugins.load (); let err = Format.std_formatter in try match Term.eval cmd ~catch:false ~err with - | `Error `Parse -> exit 64 + | `Error `Parse -> exit 64 | `Error _ -> exit 1 | _ -> exit 0 - with e -> - let fin s n = print_endline s; exit n in - match e with - | Bad_user_input -> + with e -> + let fin s n = print_endline s; exit n in + match e with + | Bad_user_input -> fin "Could not parse: malformed input" 65 - | Convert_imm_exn imm -> + | Convert_imm_exn imm -> fin (sprintf "Unable to convert Imm hex value [%s] to int" imm) 1 - | Create_mem_exn -> + | Create_mem_exn -> fin "Internal error: cannot create memory for dissasembly backend" 1 - | Stdin_exn -> + | Stdin_exn -> fin "Could not read from stdin" 1 - | No_disassembly -> + | No_disassembly -> fin (sprintf "Invalid instruction encountered, disassembly stopped") 1 | _ -> fin "Could not disassemble input" 1 diff --git a/src/readbin/readbin.ml b/src/readbin/readbin.ml index fd69a0162..2f5d9d6ec 100644 --- a/src/readbin/readbin.ml +++ b/src/readbin/readbin.ml @@ -35,14 +35,15 @@ let main () = let open Image in let arch = arch img in let bits = match addr_size img with - | Word_size.W32 -> 32 - | Word_size.W64 -> 64 in + | `r32 -> 32 + | `r64 -> 64 in let target = match arch with | Arch.ARM -> "arm" | Arch.X86_32 -> "i386" | Arch.X86_64 -> "x86_64" in Disasm.Basic.create ~backend:"llvm" target >>= fun dis -> - printf "# File name: %s\n" @@ filename img; + let name = Option.value (filename img) ~default:"" in + printf "# File name: %s\n" @@ name; printf "# Architecture: %s\n" @@ Arch.to_string arch; printf "# Address size: %d\n" bits; printf "# Entry point: %s\n" @@ Addr.to_string (entry_point img); @@ -50,7 +51,7 @@ let main () = Table.iteri (symbols img) ~f:(fun mem s -> printf "\n# Symbol name: %s\n" (Sym.name s); printf "# Symbol data:\n%a\n" Memory.pp mem; - Disasm.Basic.run dis ~stop_on:[`valid] + Disasm.Basic.run dis ~stop_on:[`Valid] ~hit:(print_disasm arch) ~return:ident ~init:() mem); printf "# Loadable sections: %d\n" @@ Table.length (sections img); diff --git a/src/server/.merlin b/src/server/.merlin new file mode 100644 index 000000000..dad8e8619 --- /dev/null +++ b/src/server/.merlin @@ -0,0 +1,11 @@ +B ../../_build/src/server +REC +PKG ezjsonm +PKG lwt +PKG lwt-zmq +PKG ZMQ +PKG uri +PKG cohttp.lwt +PKG cohttp + +S . diff --git a/src/server/adt.ml b/src/server/adt.ml new file mode 100644 index 000000000..c4af8bfac --- /dev/null +++ b/src/server/adt.ml @@ -0,0 +1,175 @@ +open Core_kernel.Std +open Bap.Std + +let pr ch fms = Format.fprintf ch fms + +let pp_word ch word = + pr ch "Int(%s,%d)" + (Word.string_of_value ~hex:false word) + (Word.bitwidth word) + +let pp_endian ch = function + | BigEndian -> pr ch "BigEndian()" + | LittleEndian -> pr ch "LittleEndian()" + +let pp_size ch size = + pr ch "%d" (Size.to_bits size) + +let pp_sexp sexp ch x = + pr ch "%a" Sexp.pp (sexp x) + +module Var = struct + let pp_ty ch = function + | Type.Imm n -> pr ch "Imm(%d)" n + | Type.Mem (n,m) -> pr ch "Mem(%a,%a)" pp_size n pp_size m + + let pp_var ch v = + pr ch "Var(\"%s\",%a)" Var.(name v) pp_ty Var.(typ v) + +end + +module Exp = struct + open Exp + open Var + + + + let rec pp ch = function + | Load (x,y,e,s) -> + pr ch "Load(%a,%a,%a,%a)" pp x pp y pp_endian e pp_size s + | Store (x,y,z,e,s) -> + pr ch "Store(%a,%a,%a,%a,%a)" pp x pp y pp z pp_endian e pp_size s + | BinOp (op,x,y) -> + pr ch "%a(%a,%a)" (pp_sexp sexp_of_binop) op pp x pp y + | UnOp (op,x) -> + pr ch "%a(%a)" (pp_sexp sexp_of_unop) op pp x + | Var v -> pp_var ch v + | Int w -> pp_word ch w + | Cast (ct,sz,ex) -> + pr ch "%a(%d,%a)" (pp_sexp sexp_of_cast) ct sz pp ex + | Let (v,e1,e2) -> pr ch "Let(%a,%a,%a)" pp_var v pp e1 pp e2 + | Unknown (s,t) -> pr ch "Unknown(%s,%a)" s pp_ty t + | Ite (e1,e2,e3) -> pr ch "Ite(%a,%a,%a)" pp e1 pp e2 pp e3 + | Extract (n,m,e) -> pr ch "Extract(%d,%d,%a)" n m pp e + | Concat (e1,e2) -> pr ch "Concat(%a,%a)" pp e1 pp e2 +end + +module Stmt = struct + open Stmt + open Var + let rec pp ch = function + | Move (v,e) -> pr ch "Move(%a,%a)" pp_var v Exp.pp e + | Jmp e -> pr ch "Jmp(%a)" Exp.pp e + | Special s -> pr ch "Special(\"%s\")" s + | While (e,ss) -> pr ch "While(%a, (%a))" Exp.pp e pps ss + | If (e,xs,ys) -> pr ch "If(%a, (%a), (%a))" Exp.pp e pps xs pps ys + | CpuExn n -> pr ch "CpuExn(%d)" n + and pps ch = function + | [] -> () + | [s] -> pp ch s + | s :: ss -> pr ch "%a, %a" pp s pps ss +end + + +module Asm = struct + open Disasm + let pp_pred ch kind = + pr ch "%a()" (pp_sexp Basic.sexp_of_pred) kind + + let pp_op ch = function + | Op.Imm imm -> pr ch "Imm(0x%Lx)" (Imm.to_int64 imm) + | Op.Fmm fmm -> pr ch "Fmm(%g)" (Fmm.to_float fmm) + | Op.Reg reg -> pr ch "Reg(\"%a\")" Reg.pp reg +end + +module Arm = struct + open Disasm + let pp_op ch = function + | Arm.Op.Imm imm -> pr ch "Imm(%a)" pp_word imm + | Arm.Op.Reg reg -> pr ch "Reg(%a())" Arm.Reg.pp reg + + let rec pp_ops ch = function + | [] -> () + | [x] -> pr ch "%a" pp_op x + | x :: xs -> pr ch "%a, %a" pp_op x pp_ops xs + + let pp_insn ch (insn,ops) = + pr ch "%a(%a)" (pp_sexp Arm.Insn.sexp_of_t) insn pp_ops ops +end + +let to_string pp x = Format.asprintf "%a" pp x +let to_strings pp lst = List.map ~f:(fun x -> Format.asprintf "%a" pp x) lst + +let strings_of_bil = to_strings Stmt.pp +let strings_of_ops = to_strings Asm.pp_op +let strings_of_preds = to_strings Asm.pp_pred +let string_of_arm insn ops = to_string Arm.pp_insn (insn,ops) +let string_of_endian = to_string pp_endian +let strings_of_kinds ks = + strings_of_preds (ks :> Disasm.Basic.pred list) + + + +module Lex = struct + open Re + + let spaces = rep space + let constr = seq [upper; rep alpha] + let open_p = seq [spaces; char '('; spaces] + let close_p = seq [spaces; char ')'; spaces] + let integer = rep digit + let quotes = set "\"" + let no_args = seq [open_p; spaces; close_p] + let comma = seq [spaces; char ','; spaces] + let size = alt [str "32"; str "64"] + let parens expr = seq [open_p; expr; close_p] + + let adt = compile @@ seq [ + group constr; + parens @@ group @@ rep any; + ] + + + let nullary = compile @@ seq [spaces; group constr; spaces; no_args] + + let word = compile @@ seq [ + group constr; + parens @@ seq [ + group integer; + comma; + group size; + ] + ] + +end + +module Parse = struct + open Re + open Or_error + + type 'a t = string -> 'a Or_error.t + + let parse re str = try get_all @@ exec re str with Not_found -> [| |] + + let sexp conv v = + try Ok (conv @@ Sexp.of_string v) + with exn -> errorf "Unknown constructor: %s" v + + let nullary conv str = match parse Lex.nullary str with + | [| _; constr|] -> sexp conv constr + | _ -> errorf "Malformed nullary constructor '%s'" str + + let int64 str = + try Ok (Int64.of_string str) + with exn -> errorf "'%s' is expected to be a number" str + + let word str = match parse Lex.word str with + | [| _; "Int"; v; "32" |] -> int64 v >>| Word.of_int64 ~width:32 + | [| _; "Int"; v; "64" |] -> int64 v >>| Word.of_int64 ~width:64 + | _ -> errorf "'%s' doesn't match 'Int(num,size)'" str + + + let kind = nullary Disasm.Basic.kind_of_sexp + let pred = nullary Disasm.Basic.pred_of_sexp + let endian = nullary endian_of_sexp +end diff --git a/src/server/adt.mli b/src/server/adt.mli new file mode 100644 index 000000000..805f09a46 --- /dev/null +++ b/src/server/adt.mli @@ -0,0 +1,18 @@ +open Core_kernel.Std +open Bap.Std +open Disasm + +val strings_of_bil : stmt list -> string list +val strings_of_ops : Basic.op list -> string list +val strings_of_kinds : Basic.kind list -> string list +val strings_of_preds : Basic.pred list -> string list +val string_of_arm : Arm.Insn.t -> Arm.Op.t list -> string +val string_of_endian : endian -> string + +module Parse : sig + type 'a t = string -> 'a Or_error.t + val word : word t + val kind : Basic.kind t + val pred : Basic.pred t + val endian : endian t +end diff --git a/src/server/file_fetcher.ml b/src/server/file_fetcher.ml new file mode 100644 index 000000000..fb9908d41 --- /dev/null +++ b/src/server/file_fetcher.ml @@ -0,0 +1,17 @@ +open Core_kernel.Std +open Core_lwt.Std + + +let fetch uri = + match Uri.path uri with + | "" -> Lwt.Or_error.errorf + "Uri with a file scheme have empty path: %s" + (Uri.to_string uri) + | path -> + Lwt.Or_error.try_with (fun () -> + Lwt_unix.(openfile path [O_RDONLY] 0o400 >>| unix_file_descr)) + >>=? fun fd -> + let data = Lwt_bytes.map_file ~fd ~shared:false () in + Lwt.Or_error.return (Bigsubstring.create data) + +let () = Transport.register_resource_fetcher ~scheme:"file" fetch diff --git a/src/server/file_fetcher.mli b/src/server/file_fetcher.mli new file mode 100644 index 000000000..e69de29bb diff --git a/src/server/http_service.ml b/src/server/http_service.ml new file mode 100644 index 000000000..55ee9fbaf --- /dev/null +++ b/src/server/http_service.ml @@ -0,0 +1,29 @@ +open Core_kernel.Std +open Core_lwt.Std +open Cohttp +open Lwt_log + +let section = Section.make "Http_transport" +let max_messages = 1000 + +module Http = Cohttp_lwt_unix +module Body = Cohttp_lwt_body + +let headers = Header.of_list [ + "allow", "GET, POST"; + "server", "BAP/0.1"; + ] + + +let start ~new_connection = + let callback conn req body = + let to_client,queue = Lwt.Stream.create_bounded max_messages in + let of_client = Lwt.Stream.clone (Body.to_stream body) in + new_connection (of_client, queue); + let body = Body.of_stream to_client in + Http.Server.respond ~flush:false ~headers ~status:`OK ~body () in + let srv = Http.Server.make ~callback () in + Http.Server.create srv >>= Lwt.Or_error.return + + +let () = Transport.register_service ~name:"http" ~start diff --git a/src/server/http_service.mli b/src/server/http_service.mli new file mode 100644 index 000000000..e1881ac6c --- /dev/null +++ b/src/server/http_service.mli @@ -0,0 +1,55 @@ +(** Implements transport layers on top of HTTP. + + {2 Service provider} + + {3 Data formats} + + This section describes HTTP protocol specific details of the + service provider. + + Requests starting with prefix [get] MUST use method [GET]. All other + requests MUST use method [POST]. The [Content-Type] header MUST be + present and MUST be set to "application/json". The presence and + content of all other header are regulated by RFC2616. + + The JSON request MUST be contained in a message-body part of the + request. + The response MUST contain message-body if the reponse status is + "200 OK", otherwise it MAY not contain message-body. + + Note: status "OK" indicates only that transport layer has + succesfully served the request and provided the response. But the + response by itself can still indicate an error. + + + {3 Session} + + In order to implement streaming ontop of HTTP the Client/Server + interaction should convey to the following rules. + + 1. Session MUST be started by the client side with a [PUT] + request, containing [Pragma] header with value [start-session]. + + 2. Client MUST wait for the response on the initial request, in + order to obtain session id. The session id MUST be a positive + number lesser then max_sessions. If server cannot allocate such + unique number it MUST respond with an error. + + 3. Server MUST respond on the initial request with one HTTP + response, that MUST contain [Pragma] header with a unique + identifier of the session, and chunked body-message. The response + MUST contain [Transfer-Encoding] header with value equal to + ["chunked"]. + + 4. Client MAY send further requests to the same connection + (established by the initial request) or create new connections to + send new requests. All requests MUST contain [Pragma] header with + value either equal to session id, or [start-session], to create + new session. + + 5. Server MUST respond to all request, other than the requests + containing [Pragma : start-session] header, to the stream, + + + +*) diff --git a/src/server/list1.ml b/src/server/list1.ml new file mode 100644 index 000000000..77004353f --- /dev/null +++ b/src/server/list1.ml @@ -0,0 +1,16 @@ +open Core_kernel.Std + +include List + +let create x xs = x::xs +let singleton x = [x] +let of_list = function + | [] -> None + | x -> Some x + +external to_list : 'a t -> 'a list = "%identity" + +let hd = hd_exn +let tl = tl_exn +let last = last_exn +let reduce = reduce_exn diff --git a/src/server/list1.mli b/src/server/list1.mli new file mode 100644 index 000000000..9131dc132 --- /dev/null +++ b/src/server/list1.mli @@ -0,0 +1,15 @@ +open Core_kernel.Std + +type +'a t + +include Container.S1 with type 'a t := 'a t +include Monad.S with type 'a t := 'a t + +val create : 'a -> 'a list -> 'a t +val singleton : 'a -> 'a t +val of_list : 'a list -> 'a t option + +val hd : 'a t -> 'a +val tl : 'a t -> 'a t +val last : 'a t -> 'a +val reduce : 'a t -> f:('a -> 'a -> 'a) -> 'a diff --git a/src/server/main.ml b/src/server/main.ml new file mode 100644 index 000000000..e69de29bb diff --git a/src/server/manager.ml b/src/server/manager.ml new file mode 100644 index 000000000..4a721c35d --- /dev/null +++ b/src/server/manager.ml @@ -0,0 +1,350 @@ +open Core_kernel.Std +open Core_lwt.Std + +open Bap.Std + +module Id = struct + type t = int64 + let of_string s : t Or_error.t = + try Ok (Int64.of_string s) with + exn -> Or_error.errorf "Bad ID format: '%s'" s + include Regular.Make(struct + include Int64 + let module_name = "Manager.Id" + end) +end + +module Ids = Id.Table + +type id = Id.t +type 'a list1 = 'a List1.t + +type server = Uri.t + +type 'a hashed = + | Available of 'a Lwt.Or_error.t + | Evicted of (unit -> 'a Lwt.Or_error.t) + +type meta = { + refs : server list1; + arch : Arch.t; + addr : Addr.t; + endian : endian; + id : id; +} + +type 'a resource = { + meta : meta; + mutable data : 'a hashed; +} with fields + + +type context = { + images : image resource Ids.t; + chunks : mem resource Ids.t; + sections : Section.t Ids.t; + symbols : Symbol.t Ids.t; + sections_of_image : id list Ids.t; + symbols_of_section : id list Ids.t; + memory_of_symbol : id list Ids.t; + symbol_of_memory : id Ids.t; + section_of_symbol : id Ids.t; + image_of_section : id Ids.t; +} with fields + + + +let t = + let empty = Ids.create in + { + images = empty (); + chunks = empty (); + sections = empty (); + symbols = empty (); + sections_of_image = empty (); + symbols_of_section = empty (); + memory_of_symbol = empty (); + symbol_of_memory = empty (); + section_of_symbol = empty (); + image_of_section = empty (); + } + + +let next_id : unit -> id = + let last = ref 0L in + fun () -> + let was = !last in + Int64.incr last; + assert Int64.(was < last.contents); + !last + +let string_of_id = Int64.to_string + +let provide_memory ?file arch id mem = + let buffer = Memory.to_buffer mem in + let query = string_of_id id in + let addr = Memory.min_addr mem in + let endian = Memory.endian mem in + let data = Available (Lwt.Or_error.return mem) in + Transport.serve_resource ~query ?file buffer >>|? fun refs -> + {meta = {endian; addr; arch; refs; id}; data} + + +let seq_is_empty seq = + Seq.length_is_bounded_by ~max:0 seq + +let add_image ?file img = + let img_id = next_id () in + let file = Option.first_some file (Image.filename img) in + let arch = Image.arch img in + let sections = Image.sections img |> Table.to_sequence in + Lwt.Or_error.Seq.iter sections ~f:(fun (mem,sec) -> + let sec_id = next_id () in + Ids.add_multi t.sections_of_image ~key:img_id ~data:sec_id; + Ids.add_exn t.image_of_section ~key:sec_id ~data:img_id; + provide_memory arch ?file sec_id mem >>=? fun section -> + Ids.add_exn t.chunks ~key:sec_id ~data:section; + Ids.add_exn t.sections ~key:sec_id ~data:sec; + let syms = Image.symbols_of_section img sec in + Lwt.Or_error.Seq.iter syms ~f:(fun sym -> + let sym_id = next_id () in + let (mem,mems) = Image.memory_of_symbol img sym in + Ids.add_exn t.symbols ~key:sym_id ~data:sym; + Ids.add_multi t.symbols_of_section ~key:sec_id ~data:sym_id; + Ids.add_exn t.section_of_symbol ~key:sym_id ~data:sec_id; + let all_mems = mem :: Seq.to_list mems in + Lwt.Or_error.List.iter all_mems ~f:(fun mem -> + let mem_id = next_id () in + Ids.add_multi t.memory_of_symbol ~key:sym_id ~data:mem_id; + Ids.add_exn t.symbol_of_memory ~key:mem_id ~data:sym_id; + provide_memory arch ?file mem_id mem >>|? fun data -> + Ids.add_exn t.chunks ~key:mem_id ~data) >>=? fun () -> + if seq_is_empty mems then + provide_memory arch ?file sym_id mem >>|? fun data -> + Ids.add_exn t.chunks ~key:sym_id ~data + else + Lwt.Or_error.ok_unit)) >>=? fun () -> + let buffer = Bigsubstring.create (Image.data img) in + let query = string_of_id img_id in + Transport.serve_resource ~query ?file buffer >>=? fun refs -> + let addr = Image.entry_point img in + let data = Available (Lwt.Or_error.return img) in + let endian = Image.endian img in + let resource = {meta={addr; arch; refs; endian; id=img_id}; data} in + Ids.add_exn t.images ~key:img_id ~data:resource; + Lwt.return (Or_error.return img_id) + +let add_memory arch endian addr uri : id Lwt.Or_error.t = + Transport.fetch_resource uri >>=? fun data -> + let pos = Bigsubstring.pos data in + let len = Bigsubstring.length data in + let data = Bigsubstring.base data in + match Memory.create ~pos ~len endian addr data with + | Error err -> + Error.tag_arg err "fetching chunk from" uri Uri.sexp_of_t |> + Lwt.Or_error.fail + | Ok mem -> + let id = next_id () in + provide_memory arch id mem >>=? fun data -> + Ids.add_exn t.chunks ~key:id ~data; + Lwt.Or_error.return id + +let string_of_id = Id.to_string + + +let id_of_string s = + let open Or_error in + Id.of_string s >>= fun id -> + let f = Ids.mem in + let tables = [f t.images; f t.chunks; f t.symbols; f t.sections] in + match List.exists tables ~f:(fun f -> f id) with + | true -> Ok id + | false -> errorf "Id %s is not known" s + +let symbol_of_memory = Ids.find t.symbol_of_memory +let section_of_symbol = Ids.find t.section_of_symbol +let image_of_section = Ids.find t.image_of_section + +let find_list tab id = match Ids.find tab id with + | Some lst -> lst + | None -> [] + +let sections_of_image = find_list t.sections_of_image +let symbols_of_section = find_list t.symbols_of_section +let memory_of_symbol = find_list t.memory_of_symbol + +type 'a served = { + links : server list1; + fetch : unit -> 'a Lwt.Or_error.t; +} + +type nil = Nil +type mem = Memory.t served +type sym = Symbol.t (** symbol *) +type sec = Section.t (** section *) +type img = Image.t served + +type ('mem, 'img, 'sec, 'sym) res = { + mem : 'mem; + img : 'img; + sec : 'sec; + sym : 'sym; + res : meta; +} + +type ('mem,'img,'sec,'sym,'a) visitor = + ('mem,'img,'sec,'sym) res -> 'a Lwt.Or_error.t + +let memory r = r.mem +let image r = r.img +let section r = r.sec +let symbol r = r.sym +let endian r = r.res.endian +let addr r = r.res.addr +let arch r = r.res.arch +let links r = r.res.refs +let id r = r.res.id + + +(* we assume that resource references are sorted in the order of their + availability, i.e. at the beginining of the list we have most fast + and robust sources, while at the end we have last resorts. +*) +let try_all (s,ss) = + let open Transport in + let open Lwt in + fetch_resource s >>= fun data -> + List.fold ss ~init:data ~f:(fun fetched next_src -> match fetched with + | Ok data -> return (Ok data) + | Error err -> fetch_resource next_src >>| + Result.map_error ~f:(fun err' -> Error.of_list [err;err'])) + +let fetch resource = match resource.data with + | Available r -> r + | Evicted load -> + let r = load () in + resource.data <- Available r; + r + +let print_warnings warnings = Lwt.return_unit + +let image_of_bigsubstring ?backend substr = + let base = Bigsubstring.base substr in + if Bigsubstring.length substr <> Bigstring.length base || + Bigsubstring.pos substr <> 0 then + Or_error.errorf "Unsupported: creating images from substring" + else Image.of_bigstring ?backend base + +let create_image ?backend data = + match image_of_bigsubstring ?backend data with + | Ok (img,warns) -> print_warnings warns >>= fun () -> return (Ok img) + | Error err -> return (Error err) + + +let add_file ?backend uri = + let file = Option.(Uri.scheme uri >>= function + |"file" -> return (Uri.path uri) | _ -> None) in + Transport.fetch_resource uri >>=? + create_image ?backend >>=? + add_image ?file + +let nothing () = Lwt.Or_error.return Nil + +let init r : (nil, nil, nil, nil) res = { + mem = Nil; + img = Nil; + sec = Nil; + sym = Nil; + res = r.meta; +} + +let find_in field id : 'a Or_error.t = + match Ids.find (Fieldslib.Field.get field t) id with + | Some x -> Ok x + | None -> + Or_error.errorf "Failed to find id %a in table %s" + Id.str id (Fieldslib.Field.name field) + +let serve res = { + links = res.meta.refs; + fetch = fun () -> fetch res; +} + +let links_of_memory r = r.links +let links_of_image r = r.links +let fetch_memory r = r.fetch () +let fetch_image r = r.fetch () + +let of_image res : (nil,img,nil,nil) res = + { (init res) with img = serve res; } + +let of_section sec id : (mem,img,sec,nil) res Or_error.t = + let open Or_error in + let open Fields_of_context in + find_in image_of_section id >>= fun img_id -> + find_in images img_id >>= fun img -> + find_in chunks id >>= fun mem -> + let r = init mem in + return { r with + img = serve img; + mem = serve mem; + sec} + +let of_symbol sym id : (mem list1,img,sec,sym) res Or_error.t = + let open Fields_of_context in + let open Or_error in + find_in section_of_symbol id >>= fun sec_id -> + find_in sections sec_id >>= fun sec -> + find_in image_of_section sec_id >>= fun img_id -> + find_in images img_id >>= fun img -> + find_in memory_of_symbol id >>= fun mems -> + List.map mems ~f:(find_in chunks) |> all >>= function + | [] -> errorf "Symbol without memory" + | m::ms -> + let res = init m in + let mem = List1.create m ms |> List1.map ~f:serve in + return { res with img = serve img; sym; sec; mem} + +module Return = struct + let unit res = Lwt.Or_error.ok_unit + let none res = Lwt.Or_error.return None + let null res = Lwt.Or_error.return 0 + let nil res = Lwt.Or_error.ok_nil + + let errorf fmt = + Printf.ksprintf (fun msg -> fun _ -> Lwt.Or_error.error_string msg) fmt + + let error msg data sexp _res = Lwt.Or_error.error msg data sexp +end + +let with_resource ~chunk ~symbol ~section ~image (id : id) = + let open Fields_of_context in + match Ids.find t.images id with + | Some img -> image (of_image img) + | None -> match Ids.find t.sections id with + | Some sec -> return (of_section sec id) >>=? section + | None -> match Ids.find t.symbols id with + | Some sym -> return (of_symbol sym id) >>=? symbol + | None -> match Ids.find t.chunks id with + | Some mem -> chunk {(init mem) with mem = serve mem} + | None -> Lwt.Or_error.errorf "unknown id: %a" Id.str id + +let servers_of_id id = + match Ids.find t.images id with + | Some {meta = {refs}} -> List1.to_list refs + | None -> match Ids.find t.chunks id with + | Some {meta = {refs}} -> List1.to_list refs + | None -> [] + +let links_of_id id = servers_of_id id + + +include struct + let export field = Fieldslib.Field.get field t |> Ids.keys + open Fields_of_context + + let images = export images + let sections = export sections + let symbols = export symbols + let chunks = export chunks +end diff --git a/src/server/manager.mli b/src/server/manager.mli new file mode 100644 index 000000000..eed71aa20 --- /dev/null +++ b/src/server/manager.mli @@ -0,0 +1,116 @@ +(** Resource Manager. + +*) +open Core_kernel.Std +open Core_lwt.Std +open Bap.Std + +(** Abstract identifier. + This types implements [Regular] interface see [Id] + module. *) +type id + +type 'a list1 = 'a List1.t + + +(** Resource. + + Resources can have different data assosiated with + it, depending on its nature. +*) +type ('mem,'img,'sec,'sym) res + +(** Types of resource data *) +type nil (** data not available *) +type mem +type sym = Symbol.t (** symbol *) +type sec = Section.t (** section *) +type img + +(** Unique Identifer *) +module Id : Regular with type t = id + + +(** adds file image to the resources data base *) +val add_image : ?file:string -> image -> id Lwt.Or_error.t + +(** fetches memory chunk from a given [uri], and adds it to the + resource pool *) +val add_memory : arch -> endian -> addr -> Uri.t -> id Lwt.Or_error.t + +(** fetched image from a given [url], and stores it among other + resources *) +val add_file : ?backend:string -> Uri.t -> id Lwt.Or_error.t + +(** [string_of_id] is a synonym for [Id.to_string] *) +val string_of_id : id -> string +val id_of_string : string -> id Or_error.t + +val symbol_of_memory : id -> id option +val section_of_symbol : id -> id option +val image_of_section : id -> id option + +val sections_of_image : id -> id list +val symbols_of_section : id -> id list +val memory_of_symbol : id -> id list + +val images : id list +val sections : id list +val symbols : id list +val chunks : id list + +(** Access to resource *) + +val memory : ('a,_,_,_) res -> 'a +val image : (_,'a,_,_) res -> 'a +val section : (_,_,'a,_) res -> 'a +val symbol : (_,_,_,'a) res -> 'a +val endian : (_,_,_,_) res -> endian +val arch : (_,_,_,_) res -> arch +val addr : (_,_,_,_) res -> addr +val links : (_,_,_,_) res -> Uri.t list1 +val id : (_,_,_,_) res -> id + +val links_of_memory : mem -> Uri.t list1 +val links_of_image : img -> Uri.t list1 +val fetch_memory : mem -> Memory.t Lwt.Or_error.t +val fetch_image : img -> Image.t Lwt.Or_error.t + +(** Resource Visitor. + Visitor is a function that accepts a resource and returns a value + of type ['a Lwt.Or_error.t] +*) +type ('mem,'img,'sec,'sym,'a) visitor = + ('mem,'img,'sec,'sym) res -> 'a Lwt.Or_error.t + + +(** Predefined visitors. + + This visitors are fully polymorphic over resource type, i.e., they + can accept resource of any type. Most of the visitors do restrict + you in a return type, e.g., if you're using [Return.none] visitor, + then all you other visitors should also return ['a option] +*) +module Return : sig + val unit : (_,_,_,_,unit) visitor + val none : (_,_,_,_,'a option) visitor + val null : (_,_,_,_,int) visitor + val nil : (_,_,_,_,'a list) visitor + val error : string -> 'a -> ('a -> Sexp.t) -> (_,_,_,_,_) visitor + + (** [errorf fmt args...] creates a visitor that will yield an error + with a message created from format string [fmt] and arguments + [args], e.g., [errorf "id %d has wrong type" id] + *) + val errorf : ('a, unit, string, (_,_,_,_,_) visitor) format4 -> 'a +end + +(** retrieve resource with a specified [id] and apply corresponding + function. +*) +val with_resource : + chunk:(mem,nil,nil,nil,'a) visitor -> + symbol:(mem list1, img,sec,sym,'a) visitor -> + section:(mem,img,sec,nil,'a) visitor -> + image:(nil,img,nil,nil,'a) visitor -> + id -> 'a Lwt.Or_error.t diff --git a/src/server/mmap_client.ml b/src/server/mmap_client.ml new file mode 100644 index 000000000..5b0e4abbf --- /dev/null +++ b/src/server/mmap_client.ml @@ -0,0 +1,70 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log + +let section = Section.make "mmap_client" +let scheme = "mmap" + +let err fmt = Lwt.Or_error.errorf fmt + +type entry = { + path : string; + data : Bigstring.t; +} + +let empty = { + path = ""; + data = Bigstring.create 0; +} + +let entry path = { empty with path} + +module Weak_set = struct + include Caml.Weak.Make(struct + type t = entry + let equal t1 t2 = + String.equal t1.path t2.path + let hash t = String.hash t.path + end) + let find_exn = find + let find t elt = + try Some (find_exn t elt) with Not_found -> None +end + +let int_of_string str = + try Some (Int.of_string str) with exn -> None + +let int_of_param uri name = + let str = Uri.to_string uri in + match Uri.get_query_param uri name with + | None -> err "uri '%s' doesn't contain %s" str name + | Some off -> match int_of_string off with + | None -> err "%s value '%s' is not a valid integer" name off + | Some n -> Lwt.Or_error.return n + +let substring_of_entry uri entry = + int_of_param uri "length" >>=? fun len -> + int_of_param uri "offset" >>|? fun pos -> + Bigsubstring.create ~len ~pos entry.data + +let openfile path = + Lwt.Or_error.try_with (fun () -> + Lwt_unix.(openfile path [O_RDONLY] 0o400 >>| unix_file_descr)) + +let main () = + let files = Weak_set.create 256 in + let fetch uri = + let str = Uri.to_string uri in + match Uri.path uri with + | "" -> err "url '%s' has an empty path" str + | path -> match Weak_set.find files (entry path) with + | Some entry -> substring_of_entry uri entry + | None -> + openfile path >>=? fun fd -> + let data = Lwt_bytes.map_file ~fd ~shared:false () in + let entry = {data; path} in + Weak_set.add files entry; + substring_of_entry uri entry in + Transport.register_resource_fetcher ~scheme fetch + +let () = main () diff --git a/src/server/mmap_client.mli b/src/server/mmap_client.mli new file mode 100644 index 000000000..eb1f111fd --- /dev/null +++ b/src/server/mmap_client.mli @@ -0,0 +1 @@ +(* See mmap_server for the definition of the protocol *) diff --git a/src/server/mmap_server.ml b/src/server/mmap_server.ml new file mode 100644 index 000000000..e5439c552 --- /dev/null +++ b/src/server/mmap_server.ml @@ -0,0 +1,71 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log + +let section = Section.make "mmap_server" + + +(* the data field should be mmaped otherwise we're risking to run out + fd resources. If a user provides us with a data that is mapped, + but doesn't give us a filename of the mapped file, then we won't + save the data in our bag, and issue a debug warning +*) + +type file = { + data : Bigstring.t; + path : string; +} + +type files = file Bag.t + +let scheme = "mmap" + + +let url_of_file ?query sub path : Uri.t = + let length = Bigsubstring.length sub in + let offset = Bigsubstring.pos sub in + let url = Uri.make ~scheme ~path ~query:[ + "length", [Int.to_string length]; + "offset", [Int.to_string offset]; + ] () in + match query with + | None -> url + | Some v -> + ("q", [v]) :: Uri.query url |> + Uri.with_query url + +let save_to_file data : string = + let path = Filename.temp_file "bap_" ".mmap" in + let fd = Unix.(openfile path [Unix.O_RDWR] 0o600) in + let size = Bigstring.length data in + let dst = Lwt_bytes.map_file ~fd ~size ~shared:true () in + Bigstring.blito ~src:data ~dst (); + Bigstring.unsafe_destroy dst; + Unix.close fd; + path + +let add_and_forget files file = + let _ : file Bag.Elt.t = Bag.add files file in + () + +let main () = + let files : files = Bag.create () in + let create ?query ?file data = + match file with + | Some file -> Lwt.Or_error.return (url_of_file ?query data file) + | None -> + let base = Bigsubstring.base data in + match Bag.find_elt files ~f:(fun f -> phys_equal f.data base) with + | Some file -> let path = Bag.Elt.(value file).path in + Lwt.Or_error.return (url_of_file ?query data path) + | None -> + let path = save_to_file base in + if Bigstring.is_mmapped base then + ign_debug ~section + "user provided a mmaped file without a name" + else + add_and_forget files {data=base; path}; + Lwt.Or_error.return (url_of_file ?query data path) in + Transport.register_resource_server ~scheme ~create + +let () = main () diff --git a/src/server/mmap_server.mli b/src/server/mmap_server.mli new file mode 100644 index 000000000..bc01d2f6d --- /dev/null +++ b/src/server/mmap_server.mli @@ -0,0 +1,32 @@ +(** shares files using mmaping. + + + mmap server and clients doesn't actually transfer data between, + but share the data using URL to address a piece of data in the + filesystem. + + The server responsibility is to create files for memory chunks + if they are unowned, i.e., doesn't have a file. + + The URI format is used in a following way: + + - [scheme] must be [mmap] + - [path] must point to an existing file + - [query] should contain two fields: + - [offset] an offset in bytes from the start of the file + - [length] length of the memory chunk + + + When server accepts a new memory chunk with the [field] parameter + pointing to a valid mmapable file it will assume, that + [Bigsubstring.base] refers to a file data, and return a correct + url pointing to the memory chunk in the specified data. If [file] + parameter is left uspecified, then server will look at the base of + a bigstring and if it is already mapped it before, the it will + reuse it, otherwise it will create a file and return an url + pointing to it. If the query parameter is not None, then it will be + appended to the query with the key [q]. + + [mmap:///bin/ls?offset=16&length=1024&q=user] + +*) diff --git a/src/server/resource.mli b/src/server/resource.mli new file mode 100644 index 000000000..ec2948938 --- /dev/null +++ b/src/server/resource.mli @@ -0,0 +1,20 @@ +(** Server resource. + + Resource is an abstration of a binary data, that can be shared + between client and server. +*) + +open Core_kernel.Std +open Bap.Std +open Rpc + + +type t with sexp_of + + +val of_image : image -> t +val of_section : image:t -> Section.t -> t +val of_symbol : section:t -> Symbol.t -> t +val of_memory : ?section:t -> ?symbol:t -> mem -> t + +val data : t -> Bigsubstring.t diff --git a/src/server/rpc.ml b/src/server/rpc.ml new file mode 100644 index 000000000..f67f48166 --- /dev/null +++ b/src/server/rpc.ml @@ -0,0 +1,308 @@ +open Core_kernel.Std +open Or_error +open Bap.Std +open Ezjsonm + +type response = Ezjsonm.t +type request = Ezjsonm.t +type target = string with sexp_of +type uri = Uri.t +type links = Uri.t List1.t +type 'a resource = links * 'a +type id = string with sexp_of +type res_id = string with sexp_of +type res_ids = res_id list with sexp_of + +type value = [ + | `Null + | `Bool of bool + | `Float of float + | `String of string + | `A of value list + | `O of (string * value) list +] with sexp_of + + +let minify = false + + +let sexp_of_response = Fn.compose Ezjsonm.to_sexp Ezjsonm.value +let sexp_of_request = sexp_of_response + +module Id = String + +type severity = [ + | `Critical + | `Error + | `Warning +] with bin_io, compare, sexp + +let string_of_severity s = + Sexp.to_string @@ sexp_of_severity s + + +module Response = struct + type t = response + type msg = (id * value) list + type loader = value + type disassembler = value + type transport = string + type insn = value + + let to_string r = to_string ~minify r + + let create id (msg : msg) : t = `O ([ + "id", string id; + ] @ msg) + + let error sev desc : msg = [ + "error", dict [ + "severity", string (string_of_severity sev); + "description", string desc + ] + ] + + let capabilities ~version ts ls ds : msg = [ + "capabilities", dict [ + "version", string version; + "loaders", `A ls; + "disassemblers", `A ds; + "transports", strings ts; + ] + ] + + let list_of_uris uris = + List1.map uris ~f:Uri.to_string |> + List1.to_list + + let disassembler + ~name ~arch ~kinds ~has_name ~has_ops ~has_target + ~has_bil : disassembler = + dict [ + "name", string name; + "architecture", string (Arch.to_string arch); + "kinds", strings @@ Adt.strings_of_kinds kinds; + "has_name", bool has_name; + "has_ops", bool has_ops; + "has_target", bool has_target; + "has_bil", bool has_bil + ] + + let string_of_sym s = + Sexp.to_string (<:sexp_of<[`debug | `symtab]>> s) + + let strings_of_syms syms = + List.intersperse ~sep:"," @@ List.map syms ~f:string_of_sym + + let loader ~name ~arch ~format syms : loader = + dict [ + "name", string name; + "architecture", string (Arch.to_string arch); + "format", string format; + "symbols", strings (strings_of_syms syms) + ] + + let transport = ident + + + let optional_field name json_of_value = function + | None -> [] + | Some value -> [name, json_of_value value] + + let enum map x = strings (map x) + + let string_of_addr = Addr.string_of_value ~hex:false + + let memory_parameters m : msg = [ + "addr", string @@ string_of_addr @@ Memory.min_addr m; + "size", string @@ Int.to_string @@ Memory.size m; + ] + + + let resource links name props : msg = [ + name, dict @@ [ + "links", strings @@ list_of_uris links + ] @ props + ] + + let memory (links, m) : msg = + resource links "memory" @@ memory_parameters m + + let bil_value = Fn.compose strings Adt.strings_of_bil + + let insn ?target ?bil mem insn : insn = + let module Insn = Disasm.Basic.Insn in + dict @@ [ + "name", string @@ Insn.name insn; + "asm", string @@ Insn.asm insn; + "kinds", strings @@ Adt.strings_of_kinds @@ Insn.kinds insn; + "operands", strings @@ Adt.strings_of_ops + @@ Array.to_list @@ Insn.ops insn; + ] @ memory mem + @ optional_field "target" string target + @ optional_field "bil" bil_value bil + + let insns (insns : insn list) : msg = [ + "insns", `A insns; + ] + + let list_of_perm sec = + let (:=) v f = Option.some_if (f sec) v in + List.filter_opt Section.([ + "r" := is_readable; + "w" := is_writable; + "x" := is_executable; + ]) + + + let image ~secs (links,image) : msg = + let open Image in + let (/) = Fn.compose in + resource links "image" @@ + List.map ~f:(fun (r,v) -> r, v image) [ + "arch", string / Arch.to_string / arch; + "entry_point", string / string_of_addr / entry_point; + "addr_size", string / Int.to_string / Size.to_bits / addr_size; + "endian", string / Adt.string_of_endian / endian; + ] @ optional_field "file" string (filename image) @ [ + "sections", strings secs; + ] + + + + let symbol s mems : msg = + let open Symbol in [ + "symbol", dict @@ [ + "name", string @@ name s; + "is_function", bool @@ is_function s; + "is_debug", bool @@ is_debug s; + "chunks", list (fun (links,m) -> dict @@ [ + "links", strings @@ list_of_uris links; + ] @ memory_parameters m) + (List1.to_list mems); + ] + ] + + let section ~syms s mem : msg = [ + "section", dict @@ [ + "name", string @@ Section.name s; + "perm", strings @@ list_of_perm s; + "symbols", strings syms; + ] @ memory mem + ] + + let resources name rs : msg = [name, strings rs] + let sections = resources "sections" + let symbols = resources "symbols" + let images = resources "images" + let chunks = resources "chunks" + + let added id : msg = ["resource", string id] + +end + + +module Target = struct + type t = target + let arm insn ops : t = Adt.string_of_arm insn ops +end + +module Request = struct + type t = request with sexp_of + let (/) = Fn.compose + + let of_string s = + Or_error.try_with (fun () -> from_string s) |> function + | Ok s -> Ok s + | Error err -> errorf "Error: %s when parsing:\n%s\n" + (Error.to_string_hum err) s + + let pp_obj () obj = + Sexp.to_string_hum @@ sexp_of_value obj + + let pp_path () path = + String.concat ~sep:"." path + + let no_value path v = + errorf "Path '%a' not found in object %a" pp_path path pp_obj v + let protocol path msg v = + errorf "Failed to parse path %a : %s. Object: %a" + pp_path path msg pp_obj v + + + let parse pro v path = + try Ok (find v path |> pro) with + | Not_found -> no_value path v + | Parse_error (v,msg) -> protocol path msg v + | exn -> protocol path Exn.(to_string exn) v + + + let value = parse ident + let string = parse get_string + let string_opt = parse @@ Option.some / get_string + let arch = parse @@ uw / Arch.of_string / get_string + let addr = parse @@ ok_exn / Adt.Parse.word / get_string + let endian = parse @@ ok_exn / Adt.Parse.endian / get_string + let url = parse @@ Uri.of_string / get_string + + let nulls constr = + parse @@ ok_exn / all / get_list (constr / get_string) + + let kinds = nulls Adt.Parse.kind + let preds = nulls Adt.Parse.pred + + let accept_load_file f obj = + url obj ["url"] >>= fun uri -> + if mem obj ["loader"] + then string obj ["loader"] >>= fun loader -> + return (f ?loader:(Some loader) uri) + else return (f ?loader:None uri) + + let accept_load_chunk f obj = + url obj ["url"] >>= fun url -> + addr obj ["address"] >>= fun addr -> + arch obj ["arch"] >>= fun arch -> + endian obj ["endian"] >>= fun endian -> + return (f addr arch endian url) + + let optional obj name ~default get = + if mem obj [name] then get obj [name] else return default + + let accept_get_insns f obj = + string obj ["resource"] >>= fun id -> + optional obj "stop_conditions" preds ~default:[] >>= fun ks -> + optional obj "backend" string_opt ~default:None >>= fun backend -> + return (f ?backend ks id) + + let accept_init f obj = string obj ["version"] >>| f + + let accept_get_resource f obj = + try_with (fun () -> get_string obj) >>| f + + let accept obj + ~init ~load_file ~load_chunk ~get_insns ~get_resource = + let obj = Ezjsonm.value obj in + let init = accept_init init in + let load_file = accept_load_file load_file in + let load_chunk = accept_load_chunk load_chunk in + let get_insns = accept_get_insns get_insns in + let get_resource = accept_get_resource get_resource in + let (>>) path fn = if mem obj [path] + then Some (value obj [path] >>= fn) + else None in + let (||) = Option.merge ~f:(fun x y -> x) in + let chain = + "init" >> init || + "load_file" >> load_file || + "load_memory_chunk" >> load_chunk || + "get_insns" >> get_insns || + "get_resource" >> get_resource in + match chain with + | Some r -> r + | None -> errorf "One of the required properties is not found: %a" + pp_obj obj + + let id obj = + string (Ezjsonm.value obj) ["id"] + +end diff --git a/src/server/rpc.mli b/src/server/rpc.mli new file mode 100644 index 000000000..738ac1460 --- /dev/null +++ b/src/server/rpc.mli @@ -0,0 +1,86 @@ +open Core_kernel.Std +open Bap.Std + +type request +type response +type target +type id with sexp_of +type links = Uri.t List1.t +type 'a resource = links * 'a +type res_id = string +type res_ids = res_id list with sexp_of + + +module Id : Identifiable with type t := id + + +module Target : sig + type t = target + + val arm : Disasm.Arm.Insn.t -> Disasm.Arm.Op.t list -> t + +end + +module Request : sig + type t = request + + val of_string : string -> t Or_error.t + + val id : t -> id Or_error.t + + val accept : t -> + init:(string -> 'a) -> + load_file:(?loader:string -> Uri.t -> 'a) -> + load_chunk:(addr -> arch -> endian -> Uri.t -> 'a) -> + get_insns:(?backend:res_id -> Disasm.Basic.pred list -> res_id -> 'a) -> + get_resource:(res_id -> 'a) -> 'a Or_error.t +end + +module Response : sig + type t = response + type msg + type insn + type loader + type disassembler + type transport + + val to_string : t -> string + + + (** creates a response to the request with the [id] *) + val create : id -> msg -> t + + val error : [`Critical | `Error | `Warning] -> string -> msg + + val capabilities : version:string -> + transport list -> loader list -> disassembler list -> msg + + val image : secs:res_ids -> Image.t resource -> msg + + val section : syms:res_ids -> Section.t -> mem resource -> msg + + val symbol : Symbol.t -> mem resource List1.t -> msg + + val memory : mem resource -> msg + + val insn : ?target:target -> ?bil:stmt list -> + mem resource -> Disasm.Basic.full_insn -> insn + + val insns : insn list -> msg + + val images : res_id list -> msg + val sections : res_id list -> msg + val symbols : res_id list -> msg + val chunks : res_id list -> msg + val added : res_id -> msg + + val loader : name:string -> arch:arch -> format:string -> + [`symtab | `debug] list -> loader + + val disassembler : name:string -> arch:arch -> + kinds:Disasm.kind list -> has_name:bool -> has_ops:bool -> + has_target:bool -> has_bil:bool -> disassembler + + val transport : string -> transport + +end diff --git a/src/server/server.ml b/src/server/server.ml new file mode 100644 index 000000000..92787afc8 --- /dev/null +++ b/src/server/server.ml @@ -0,0 +1,252 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log +open Bap.Std +open Rpc + +module Res = Manager +module Dis = Disasm.Basic + +let version = "0.1" + +module type Disasms = sig + val get + : ?cpu:string -> backend:string -> string + -> f:((Dis.asm, Dis.kinds) Dis.t -> 'a Lwt.Or_error.t) + -> 'a Lwt.Or_error.t +end + +module Disasms : Disasms = struct + type spec = { + target : string; + backend : string; + cpu : string option; + } with compare, sexp, fields + + let disasms = 8 + + module Spec = Hashable.Make(struct + type t = spec with compare, sexp + let hash = Hashtbl.hash + end) + module Pools = Spec.Table + + let ds = Pools.create () + + let rec get ?cpu ~backend target ~f = + let spec = {target; backend; cpu} in + match Pools.find ds spec with + | Some pool -> Lwt.Pool.use pool ~f:(fun dis -> Lwt.return dis >>=? f) + | None -> + let pool = Lwt.Pool.create disasms (fun () -> + Lwt.return @@ Dis.create ?cpu ~backend target >>=? fun dis -> + let dis = Dis.(dis |> store_asm |> store_kinds) in + Lwt.Or_error.return dis) in + Pools.add_exn ds ~key:spec ~data:pool; + get ?cpu ~backend target ~f +end + +let section = Lwt_log.Section.make "bap_server" + +let stub name = Lwt.Or_error.unimplemented name + +module Handlers(Ctxt : sig + val reply : Response.msg -> unit Lwt.t + end) = struct + open Ctxt + + let reply_error sev fmt = + Printf.ksprintf (fun msg -> Response.error sev msg |> reply) fmt + let error fmt = reply_error `Error fmt + let warning fmt = reply_error `Warning fmt + + let init version = + let ts = List.(Transport.registered_fetchers >>| + Response.transport) in + let kinds = Disasm.Insn.Kind.all in + let ds = + Response.disassembler + ~name:"llvm" ~arch:Arch.ARM ~kinds + ~has_name:true ~has_bil:true ~has_ops:true ~has_target:true :: + List.map [Arch.X86_32; Arch.X86_64] ~f:(fun arch -> + Response.disassembler ~name:"llvm" ~arch + ~has_name:true ~has_ops:true ~kinds + ~has_target:false ~has_bil:false) in + let ls = + List.map Arch.all ~f:(fun arch -> + Response.loader ~name:"bap_elf" ~arch + ~format:"ELF" [`debug]) in + let capabilities = Response.capabilities ~version ts ls ds in + let (%) x f = List.map x ~f:Manager.string_of_id |> f in + let images = Manager.images % Response.images in + let sections = Manager.sections % Response.sections in + let symbols = Manager.symbols % Response.symbols in + let chunks = Manager.chunks % Response.chunks in + Lwt.List.iter ~f:reply + [capabilities; images; sections; symbols; chunks] >>= + Lwt.Or_error.return + + + let reply_resource uri res = + res >>|? Res.string_of_id >>|? Response.added >>= function + | Ok msg -> reply msg >>= Lwt.Or_error.return + | Error err -> error "Failed to add resource from %s: %s" + (Uri.to_string uri) + (Error.to_string_hum err) >>= + Lwt.Or_error.return + + + let load_file ?loader uri = + Res.add_file ?backend:loader uri |> reply_resource uri + + let load_chunk addr arch endian uri : unit Lwt.Or_error.t = + Res.add_memory arch endian addr uri |> reply_resource uri + + + let get_mem mem : 'a Rpc.resource Lwt.Or_error.t = + Res.fetch_memory mem >>|? fun m -> + Res.links_of_memory mem, m + + + (** Runs disassembler on the specified memory *) + let disasm_mem lift dis ~stop_on (links,mem) : unit Lwt.t= + let invalid_mem mem = + warning "%s doesn't contain a valid instruction" + (Sexp.to_string (sexp_of_mem mem)) in + let emit_insns = Lwt.List.filter_map ~f:(function + | mem,None -> invalid_mem mem >>= fun () -> return None + | mem,Some insn -> + lift mem insn >>= fun (target,bil) -> + let resp = Response.insn ?target ?bil (links,mem) insn in + return (Some resp)) in + Dis.run dis mem ~return ~init:[] ~stop_on + ~stopped:(fun s _ -> + (if stop_on <> [] + then warning "Hit end of data before the stop condition" + else Lwt.return_unit) >>= fun () -> emit_insns (Dis.insns s)) + ~invalid:(fun s mem _ -> + invalid_mem mem >>= fun () -> emit_insns (Dis.insns s)) + ~hit:(fun s _ _ _ -> emit_insns (Dis.insns s)) + >>| Response.insns >>= reply + + type ('a,'b) lifter = + mem -> ('a,'b) Dis.insn -> (Target.t option * stmt list option) Lwt.t + + let arm_lifter : ('a,'b) lifter = fun mem insn -> + let open Disasm in + let arm = Arm.Insn.create insn in + let ops = Array.map (Dis.Insn.ops insn) ~f:Arm.Op.create |> + Array.to_list |> Option.all in + let target = Option.both arm ops |> + Option.map ~f:(Tuple2.uncurry Target.arm) in + match Arm.Lift.insn mem insn with + | Ok bil -> return (target, Some bil) + | Error err -> + warning "Failed to raise insn %s to BIL: %s" + (Sexp.to_string (Dis.Insn.sexp_of_t insn)) + (Error.to_string_hum err) >>= fun () -> + return (target, None) + + let no_lifter : ('a,'b) lifter = fun _ _ -> return (None,None) + + let lifter_of_arch : arch -> ('a,'b) lifter = function + | Arch.ARM -> arm_lifter + | _ -> no_lifter + + let get_insns ?(backend="llvm") stop_on res_id = + Lwt.return @@ Res.id_of_string res_id >>=? fun id -> + let mems_of_img img = + Image.sections img |> Table.to_sequence |> + Seq.map ~f:fst |> Seq.to_list in + let chunk r = Res.memory r |> get_mem >>|? List.return in + let section = chunk in + let symbol r = + Res.memory r |> List1.to_list |> + Lwt.Or_error.List.map ~f:get_mem in + let image r = Res.image r |> Res.fetch_image >>|? mems_of_img >>|? + List.map ~f:(fun mem -> Res.links r, mem) in + Res.with_resource id ~chunk ~symbol ~section ~image + >>=? fun ms -> + let get_arch r = Lwt.Or_error.return (Res.arch r) in + Res.with_resource id + ~chunk:get_arch ~symbol:get_arch + ~section:get_arch ~image:get_arch >>=? fun arch -> + let lifter = lifter_of_arch arch in + let target = Arch.(match backend, arch with + | "llvm", ARM -> Ok "arm" + | "llvm", X86_32 -> Ok "i386" + | "llvm", X86_64 -> Ok "x86_64" + | backend, arch -> + Or_error.errorf "Unsupported backend+arch combination: %s+%a" + backend Arch.pps arch) in + Lwt.return target >>=? Disasms.get ~backend ~f:(fun dis -> + Lwt.List.iter ms ~f:(disasm_mem lifter dis ~stop_on) >>= fun () -> + Lwt.Or_error.return ()) + + let get_resource res_id : 'a Lwt.Or_error.t = + Lwt.return @@ Res.id_of_string res_id >>=? fun id -> + Res.with_resource id + ~chunk:(fun r -> + get_mem (Res.memory r) >>|? Response.memory) + ~symbol:(fun r -> + let sym = Res.symbol r in + let mem = Res.memory r in + let m,ms = List1.hd mem, List1.tl mem |> List1.to_list in + get_mem m >>=? fun m -> + Lwt.Or_error.List.map ms ~how:`Parallel ~f:get_mem >>|? fun ms -> + Response.symbol sym (List1.create m ms)) + ~image:(fun r -> + let img = Res.image r in + let img_id = Res.id r in + let links = Res.links_of_image img in + let secs = Res.sections_of_image img_id |> + List.map ~f:Res.string_of_id in + Res.fetch_image img >>|? Tuple2.create links >>|? + Response.image ~secs) + ~section:(fun r -> + let sec = Res.section r in + let sec_id = Res.id r in + let syms = Res.symbols_of_section sec_id |> + List.map ~f:Res.string_of_id in + let mem = Res.memory r in + get_mem mem >>|? Response.section ~syms sec) >>=? fun msg -> + reply msg >>= Lwt.Or_error.return +end + +let accept reply (req : request) : unit Lwt.Or_error.t = + let module H = Handlers(struct let reply = reply end) in + let open H in + Request.accept req ~init ~load_file ~load_chunk + ~get_insns ~get_resource |> Lwt.return |> Lwt.Or_error.join + +exception Stopped + +let run_exn (requests, replies) : unit Lwt.t = + let reply x = + Lwt.Stream.Push_queue.push replies x >>= function + | Ok () -> Lwt.return_unit + | Error err -> + error_f "Service has finished with error: %s" + (Error.to_string_hum err) >>= fun () -> + Lwt.fail Stopped in + let handle_request req = + Request.id req |> Lwt.return >>=? fun id -> + let reply msg = reply (Response.create id msg) in + accept reply req in + Lwt.Stream.iter_s requests ~f:(fun req -> + handle_request req >>= function + | Ok () -> Lwt.return () + | Error err -> match Request.id req with + | Ok id -> + let str = Error.to_string_hum err in + let msg = Response.error `Warning str in + Response.create id msg |> reply + | Error err' -> + let err = Error.of_list [err; err'] in + let msg = Error.to_string_hum err in + warning_f ~section "Ignoring junk request: %s" msg) + + +let run (pipe : (request, response) Transport.pipe) + : unit Lwt.Or_error.t = + Lwt.Or_error.try_with (fun () -> run_exn pipe) diff --git a/src/server/server.mli b/src/server/server.mli new file mode 100644 index 000000000..66d5ea353 --- /dev/null +++ b/src/server/server.mli @@ -0,0 +1,7 @@ +open Core_kernel.Std +open Core_lwt.Std +open Bap.Std +open Rpc + +(** runs an instance of BAP server. *) +val run : (request,response) Transport.pipe -> unit Lwt.Or_error.t diff --git a/src/server/start_server.ml b/src/server/start_server.ml new file mode 100644 index 000000000..fd3ed4071 --- /dev/null +++ b/src/server/start_server.ml @@ -0,0 +1,45 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log +open Bap.Std + +let section = Lwt_log.Section.make "bap_server" + +let handle_session (of_client,to_client) = + let to_client = Lwt.Stream.Push_queue.wrap + to_client ~f:Rpc.Response.to_string in + let of_client = Lwt.Stream.filter_map of_client ~f:(fun msg -> + match Rpc.Request.of_string msg with + | Ok req -> Some req + | Error err -> + ign_warning_f ~section "Failed with %s, when parsing '%s'" + (Error.to_string_hum err) msg; + None) in + Lwt.protect (fun () -> + Server.run (of_client,to_client) >>= function + | Ok () -> return () + | Error err -> error_f ~section "Session failed: %s" + (Error.to_string_hum err)) + ~finally:(fun () -> + Lwt.Stream.Push_queue.close to_client; + return ()) + + +let main () = + let new_connection connection = + Lwt.async (fun () -> handle_session connection) in + Transport.start_service ~new_connection () >>= function + | Ok () -> return () + | Error err -> + error_f ~section "Failed to start server: %s" + Error.(to_string_hum err) + +let () = + let module H = Http_service in + let module F = File_fetcher in + let module M = Mmap_client in + let module N = Mmap_server in + let module Z = Zmq_server in + let module C = Zmq_client in + let () = Plugins.load () in + Lwt.Main.run @@ main () diff --git a/src/server/start_server.mli b/src/server/start_server.mli new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/server/start_server.mli @@ -0,0 +1 @@ + diff --git a/src/server/transport.ml b/src/server/transport.ml new file mode 100644 index 000000000..49e176371 --- /dev/null +++ b/src/server/transport.ml @@ -0,0 +1,100 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log + +let section = Section.make "Transport" + +type data = Bigsubstring.t +type ('a,'b) pipe = 'a Lwt.Stream.t * ('b Lwt.Stream.bounded_push) +type 'a list1 = 'a List1.t + + +type provider = + ?query:string -> ?file:string -> data -> Uri.t Lwt.Or_error.t + +type fetcher = Uri.t -> data Lwt.Or_error.t + +type connection = (string,string) pipe +type service = new_connection:(connection -> unit) -> unit Lwt.Or_error.t + + +type t = { + served : Uri.t Lwt_sequence.t; + providers : provider String.Table.t; + fetchers : fetcher String.Table.t; + services : service String.Table.t; +} with fields + +open Fields + + +let t = { + served = Lwt_sequence.create (); + providers = String.Table.create (); + fetchers = String.Table.create (); + services = String.Table.create (); +} + +let log level fmt err = + log_f ~level ~section (fmt ^^ ": %s") Error.(to_string_hum err) + + +let combine r = + let servers,failures = + List.partition_map r + ~f:(function Ok s -> `Fst s | Error e -> `Snd e ) in + match servers,failures with + | [],[] -> Lwt.Or_error.errorf "No providers are registered" + | [],errs -> + Error.(Error.tag (of_list errs) "All providers failed") |> + Lwt.Or_error.fail + | s::ss, errs -> + Lwt.List.iter errs ~f:(log Warning "provider failed") >>= fun () -> + List1.create s ss |> + Lwt.Or_error.return + + +let serve_resource ?query ?file data = + String.Table.data t.providers |> + Lwt.List.map ~how:`Parallel ~f:(fun create -> create ?query ?file data) + >>= combine + +let fetch_resource uri = + match Uri.scheme uri with + | None -> Lwt.Or_error.errorf "url '%s' doesn't contain scheme" + Uri.(to_string uri) + | Some scheme -> match String.Table.find t.fetchers scheme with + | None -> + Lwt.Or_error.errorf "Don't know how to fetch '%s' scheme" scheme + | Some fetch -> fetch uri + +let register what ~key ~data = + String.Table.add (Fieldslib.Field.get what t) ~key ~data |> function + | `Ok -> () + | `Duplicate -> + ign_warning_f ~section "Can register %s in %s: Duplicate entry." + key (Fieldslib.Field.name what) + +let register_resource_fetcher ~scheme fetcher = + register fetchers ~key:scheme ~data:fetcher + +let register_resource_server ~scheme ~create = + register providers ~key:scheme ~data:create + +let register_service ~name ~start = + register services ~key:name ~data:start + +let start_service ?name ~new_connection () = + match name with + | None -> + String.Table.data t.services |> + Lwt.List.map ~how:`Parallel ~f:(fun start -> start ~new_connection) + >>= combine >>=? fun (results : unit list1) -> + notice "All services finished" >>= + Lwt.Or_error.return + | Some name -> match String.Table.find t.services name with + | Some start -> start ~new_connection + | None -> Lwt.Or_error.errorf "Unknown service protocol: %s" name + +let registered_fetchers : string list = + String.Table.keys t.fetchers diff --git a/src/server/transport.mli b/src/server/transport.mli new file mode 100644 index 000000000..97de90e2e --- /dev/null +++ b/src/server/transport.mli @@ -0,0 +1,61 @@ +(** Provides transporting + + Hides underlying transporting protocols + +*) +open Core_kernel.Std +open Core_lwt.Std + +type ('a,'b) pipe = 'a Lwt.Stream.t * ('b Lwt.Stream.bounded_push) +type connection = (string,string) pipe +type 'a list1 = 'a List1.t + +(** if [name] is not provided then starts all services in parallel, + otherwise only the specified service will be started. + + To all started service the callback [new_connection] is passed. + It will be called any time a new connection is established. + + The connection contains of a stream of requests, paired with a + [reply] function. +*) +val start_service : ?name:string -> + new_connection:(connection -> unit) -> unit -> unit Lwt.Or_error.t + +val serve_resource : ?query:string -> ?file:string -> + Bigsubstring.t -> (Uri.t list1) Lwt.Or_error.t + +val fetch_resource : Uri.t -> Bigsubstring.t Lwt.Or_error.t + +val register_service : + name:string -> + start:(new_connection:(connection -> unit) -> unit Lwt.Or_error.t) -> unit + +val register_resource_fetcher : + scheme:string -> + (Uri.t -> Bigsubstring.t Lwt.Or_error.t) -> unit + +(** [register ~scheme ~create] a service provider. + Each time the resource with the specified [scheme] is going to be + served the provided function will be called with two parameters: + + [query] a suggested query string, to distinguish the served + resource from others. The [query] string is only a hint and it may + even doesn't required to be unique, so that it is completely a + server responsibility to provide a unique uri for the given data. + Also ther query is not expected to be properly encoded. + + [file] a name of a local file that is mapped to + [Bigsubstring.base]. The service provider may rely on the fact, + that if the filename provided, then it is mapped. + +*) + +val register_resource_server : + scheme:string -> + create:(?query:string -> + ?file: string -> + Bigsubstring.t -> Uri.t Lwt.Or_error.t) -> unit + + +val registered_fetchers : string list diff --git a/src/server/zmq_client.ml b/src/server/zmq_client.ml new file mode 100644 index 000000000..cdda210a7 --- /dev/null +++ b/src/server/zmq_client.ml @@ -0,0 +1,82 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log + +module Chan = Lwt_zmq.Socket + +let section = Section.make "Zmq_client" + +let max_connections = 16 + +let ok = Lwt.Or_error.return +let err fmt = Lwt.Or_error.errorf fmt +let fail exn = Lwt.Or_error.fail (Error.of_exn exn) + +let host_of_uri uri = match Uri.scheme uri, Uri.host uri with + | Some "zmq+tcp", Some host -> ok ("tcp://"^host) + | Some "zmq+ipc", None -> ok ("ipc://" ^ Uri.path uri) + | Some "zmq+tcp", None -> err "zmq+tcp scheme expects a host" + | Some "zmq+ipc", Some _ -> err "zmq_ipc shouldn't have a host" + | Some scheme, _ -> err "wrong scheme: '%s'" scheme + | None, _ -> err "expected scheme" + +let query_of_uri uri = + match Uri.get_query_param uri "chunk" with + | Some query -> ok query + | None -> err "url '%s' should contain 'chunk' query" @@ + Uri.to_string uri + +let noraise f a = + try Lwt.Or_error.return (f a) + with exn -> fail exn + +let connect uri sock = + host_of_uri uri >>=? + noraise (ZMQ.Socket.connect sock) + +let send_request uri chan () = + query_of_uri uri >>=? fun q -> + Lwt.catch (fun () -> Chan.send chan q >>= ok) ~exn:fail + +let data_of_chunks chunks = + let len = List.fold chunks ~init:0 + ~f:(fun sum s -> sum + String.length s) in + match len with + | 0 -> err "empty chunk" + | len -> + let dst = Bigstring.create len in + let _n : int = List.fold chunks ~init:0 ~f:(fun pos src -> + let len = String.length src in + Bigstring.From_string.blit + ~src ~src_pos:0 + ~dst ~dst_pos:pos ~len; + pos+len) in + Bigsubstring.of_bigstring dst |> ok + +let recv chan () : Bigsubstring.t Lwt.Or_error.t = + Lwt.try_bind (fun () -> Chan.recv_all chan) + ~ok:data_of_chunks ~exn:fail + +let disconnect uri sock = + host_of_uri uri >>=? + noraise (ZMQ.Socket.disconnect sock) + +let fetch uri sock = + let chan = Chan.of_socket sock in + connect uri sock >>=? + send_request uri chan >>=? + recv chan >>=? fun data -> + disconnect uri sock >>=? + fun () -> ok data + + +let main () = + let ctxt = ZMQ.Context.create () in + let socks = Lwt_pool.create max_connections + (fun () -> Lwt.return ZMQ.Socket.(create ctxt req)) in + List.iter ["zmq+tcp"; "zmq+ipc"] ~f:(fun scheme -> + Transport.register_resource_fetcher ~scheme + (fun uri -> Lwt_pool.use socks (fetch uri))) + + +let () = main () diff --git a/src/server/zmq_client.mli b/src/server/zmq_client.mli new file mode 100644 index 000000000..978bbea31 --- /dev/null +++ b/src/server/zmq_client.mli @@ -0,0 +1 @@ +(* implements zmq client *) diff --git a/src/server/zmq_server.ml b/src/server/zmq_server.ml new file mode 100644 index 000000000..6d934cba4 --- /dev/null +++ b/src/server/zmq_server.ml @@ -0,0 +1,134 @@ +open Core_kernel.Std +open Core_lwt.Std +open Lwt_log + +module Channel = Lwt_zmq.Socket + +let section = Section.make "Zmq_server" + +(* We split big blobs of data into smaller chunks *) +let max_chunk_size = 1 lsl 20 + + +(** data that is served *) +type chunks = string list String.Table.t + + +(* the fun part with 0MQ is that we can start from simple REQ/REP + pattern and then extend it with DEALER/PROXY/ROUTER pattern + transparently. *) + +(** server socket *) +type socket = { + chan : [`Rep] Lwt_zmq.Socket.t ; (** comm channel *) + uri : Uri.t; (** url that we serve *) + scheme : string; (** scheme that we serve *) +} + +type supported_transport = [`tcp | `ipc] +with enumerate, sexp + +let endpoint_of_proto = function + | `tcp -> "tcp://*:*" + | `ipc -> "ipc://*" + +(* for tcp transport we need to substitute local specicific + address, like [0.0.0.0] with externally visible address. +*) +let prepare_endpoint_for_export proto endpoint = match proto with + | `ipc -> endpoint + | `tcp -> (* this assumes that host is properly configured *) + let host = Unix.gethostname () in + Uri.with_host endpoint (Some host) + +let string_of_proto = function + | `tcp -> "tcp" + | `ipc -> "ipc" + +let create_socket context proto : socket = + let socket = ZMQ.Socket.(create context rep) in + let endpoint = endpoint_of_proto proto in + ZMQ.Socket.bind socket endpoint; + let endpoint = ZMQ.Socket.get_last_endpoint socket in + let uri = Uri.of_string endpoint in + let scheme = Uri.scheme uri |> Option.map ~f:(sprintf "zmq+%s") in + let uri = Uri.with_scheme uri scheme |> + prepare_endpoint_for_export proto in + let chan = Channel.of_socket socket in + {chan; uri; scheme = uw scheme} + + +(* REQ/REP sockets should interleave requests and replies, and two + requests or two replies in a row will provoke an EFSM error. The + problem is that we cannot know for sure the current state of the + socket in a case of a failure, e.g., if we have failed on waiting + for request, do we still in a request state, or we moved somehow to + the reply state? + + The idea to recover is always send empty message in a case of + error. If we already replied, then nothing will happen and we will + remain in the same state, i.e. the process_request will proceed + normally, otherwise, we will send an empty message, indicating that + something goes wrong, and proceed to process_request. *) +let serve (chunks : chunks) sock : unit = + let chan = sock.chan in + let process_request () = + Channel.recv chan >>= fun req -> + match String.Table.find chunks req with + | None -> Lwt.failwithf "Bad query '%s'" req + | Some chunks -> Channel.send_all chan chunks in + let reply_nothing () = + Lwt.catch (fun () -> Channel.send chan "") + ~exn:(fun exn -> warning ~exn ~section "Failing on resync") in + let rec run () = + Lwt.try_bind process_request ~ok:run ~exn:(fun exn -> + warning ~exn ~section "Failed to serve request" >>= + reply_nothing >>= run) in + Lwt.async run + +let insert_chunk next table (key : string) (data) : string = + let rec loop key = + match String.Table.add table ~key ~data with + | `Ok -> key + | `Duplicate -> loop (key ^ next ()) in + loop key + +let chunk_of_data data : string list = + let pos = Bigsubstring.pos data in + let len = Bigsubstring.length data in + let data = Bigsubstring.base data in + let size = min len max_chunk_size in + let n = (size + len - 1) / size in + List.init n ~f:(fun i -> + let src_pos = pos + i * size in + let left = pos + len - src_pos in + let size = min size left in + let buff = String.create size in + Bigstring.To_string.blit ~len:size + ~src:data ~src_pos + ~dst:buff ~dst_pos:0; + buff) + +let main () = + let id = ref 0L in + let next () = Int64.incr id; Int64.to_string !id in + let context = ZMQ.Context.create () in + let chunks : chunks = String.Table.create () in + let insert_chunk = insert_chunk next chunks in + let sockets = all_of_supported_transport |> + List.map ~f:(create_socket context) in + List.iter sockets ~f:(serve chunks); + + let create uri ?query ?file data = + let chunk = chunk_of_data data in + let query = Option.value query ~default:"" in + let query = insert_chunk query chunk in + Lwt.Or_error.return (Uri.with_query' uri ["chunk", query]) in + + + List.iter sockets ~f:(fun socket -> + Transport.register_resource_server + ~scheme:socket.scheme + ~create:(create socket.uri)) + +let () = main () diff --git a/src/server/zmq_server.mli b/src/server/zmq_server.mli new file mode 100644 index 000000000..9f0c763f8 --- /dev/null +++ b/src/server/zmq_server.mli @@ -0,0 +1 @@ +(* Implements ZMQ Server transporting *)