diff --git a/proc_peek_recon.py b/proc_peek_recon.py index 89efc30..03b9b47 100644 --- a/proc_peek_recon.py +++ b/proc_peek_recon.py @@ -32,35 +32,42 @@ ### Support Functions ### -def get_arg (ea, arg_num): - arg_index = 1 - - while True: - ea = PrevNotTail(ea) - - if GetMnem(ea) == "push": - if arg_index == arg_num: - dref = Dfirst(ea) - - if dref == BADADDR: - return dref - - return read_string(dref) - - arg_index += 1 - - +# +# Type define the argument data type; +# S -> string +# I -> num +# +def get_arg(ea, arg_num, type): + arg_index = 1 + + while True: + ea = PrevNotTail(ea) + if GetMnem(ea) == "push": + if arg_index == arg_num: + + # string parameter + if type == 'S': + dref = Dfirst(ea) + if dref == BADADDR: + return dref + return read_string(dref) + + # number parameter + elif type == 'I': + return read_size(ea) + + arg_index += 1 + def instruction_match (ea, mnem=None, op1=None, op2=None, op3=None): if mnem and mnem != GetMnem(ea): - return False - + return False + if op1 and op1 != GetOpnd(ea, 0): return False if op2 and op2 != GetOpnd(ea, 1): return False if op3 and op3 != GetOpnd(ea, 2): return False return True - def disasm_match (ea, needle): disasm_line = GetDisasm(ea) @@ -73,26 +80,51 @@ def disasm_match (ea, needle): return True +def read_size(ea): + s = GetOpnd(ea, 0) + if s.endswith("h"): + return int(s.rstrip('h'), 16) + else: + return BADADDR -def read_string (ea): - s = "" - - while True: - byte = Byte(ea) - - if byte == 0 or byte < 32 or byte > 126: - break - - s += chr(byte) - ea += 1 - - return s +def read_string (ea): + s = "" + string_type = GetStringType(ea) + if string_type == ASCSTR_C: + while True: + byte = Byte(ea) + + if byte == 0: #or byte < 32 or byte > 126: + break + + if byte == 0x0d: s += "" + elif byte == 0x0a: s += "" + else: s += chr(byte) + + ea += 1 + return s + elif string_type == ASCSTR_UNICODE: + #TODO + while True: + word = Word(ea) + if byte == 0: + break + else: s += word + ea += 2 + return "unicode string: " + s + + elif string_type == ASCSTR_PASCAL: + byte = Byte(ea) + for i in range (1, byte): + s += Byte(ea) + return "pascal string: " + s + + def token_count (format_string): return format_string.count("%") - format_string.count("%%") - ######################################################################################################################## ### Meat and Potatoes ### @@ -101,7 +133,7 @@ def token_count (format_string): peek_file = open(peek_filename, "w+") #ida_log = lambda x: None -ida_log = lambda x: sys.stdout.write(x + "\n") +ida_log = lambda x: sys.stdout.write("RECON> " + x + "\n") write_line = lambda x: peek_file.write("%s\n" % x) window = state = found_ea = processed = 0 @@ -109,135 +141,227 @@ def token_count (format_string): ida_log("searching for inline memcpy()'s and sign extended moves (movsx).") for ea in Heads(MinEA(), MaxEA()): - processed += 1 - - # we don't care about instructions within known library routines. - if GetFunctionFlags(ea) and GetFunctionFlags(ea) & FUNC_LIB: - continue - - if disasm_match(ea, "movsx"): - ida_log("%08x: found sign extended move" % ea) - write_line("%08x:3:sign extended move" % ea) - - if state == 0 and instruction_match(ea, "shr", "ecx", "2"): - state = 1 - window = 0 - - elif state == 1 and disasm_match(ea, "rep movsd"): - state = 2 - window = 0 - found_ea = ea - - elif state == 2 and instruction_match(ea, "and", "ecx", "3"): - state = 3 - window = 0 - - elif state == 3 and disasm_match(ea, "rep movsb"): - ida_log("%08x: found memcpy" % found_ea) - set_cmt(found_ea, "inline memcpy()", False) - write_line("%08x:5:inline memcpy" % found_ea) - found_ea = state = window = 0 - - if window > 15: - state = window = 0 - - if state != 0: - window += 1 + processed += 1 + + # rep movsd : rep movsd [edi], [esi] : eax = memcpy(edi, esi, ecx) + # rep stosd : rep stosd [edi], eax : eax = memset(edi, eax, ecx) + # rep scasd : rep scasd [edi] : eax = strchr(edi, eax) + + # we don't care about instructions within known library routines. + # if GetFunctionFlags(ea) and GetFunctionFlags(ea) & FUNC_LIB: + if GetFunctionFlags(ea) & FUNC_LIB: + continue + + if disasm_match(ea, "movsx"): + ida_log("%08x: found sign extended move" % ea) + set_cmt(ea, "sign extended move", False) + write_line("%08x:sign extended move" % ea) + elif state == 0 and instruction_match(ea, "shr", "ecx", "2"): + state = 1 + window = 0 + elif state == 1 and disasm_match(ea, "rep movsd"): + state = 2 + window = 0 + found_ea = ea + elif state == 2 and instruction_match(ea, "and", "ecx", "3"): + state = 3 + window = 0 + elif state == 3 and disasm_match(ea, "rep movsb"): + ida_log("%08x: found memcpy" % found_ea) + set_cmt(ea, "inline memcpy()", False) + write_line("%08x: inline memcpy" % found_ea) + found_ea = state = window = 0 + + if window > 15: + state = window = 0 + + if state != 0: + window += 1 ida_log("done. looked at %d heads." % processed) ida_log("looking for potentially interesting API calls now.") # format of functions dictionary is function name: format string arg number # fill this from google search: +run-time.library +security.note site:msdn.microsoft.com +# [cm] my own google dork: "Security Warning" intitle:function site:msdn.microsoft.com +# "Security note" crt site:msdn.microsoft.com functions = \ { - "fread" : {}, - "gets" : {}, - "lstrcat" : {}, - "lstrcpy" : {}, - "mbscat" : {}, - "mbscpy" : {}, - "mbsncat" : {}, - "memcpy" : {}, - #"snprintf" : {"fs_arg": 3}, - #"snwprintf" : {"fs_arg": 3}, - "sprintf" : {"fs_arg": 2}, - "sscanf" : {"fs_arg": 2}, - "strcpy" : {}, - "strcat" : {}, - "StrCatBuf" : {}, - "strncat" : {}, - "swprintf" : {"fs_arg": 2}, - "swscanf" : {"fs_arg": 2}, - "vfprintf" : {"fs_arg": 2}, - "vfwprintf" : {"fs_arg": 2}, - "vprintf" : {"fs_arg": 1}, - "vwprintf" : {"fs_arg": 1}, - "vsprintf" : {"fs_arg": 2}, - #"vsnprintf" : {"fs_arg": 3}, - #"vsnwprintf" : {"fs_arg": 3}, - "vswprintf" : {"fs_arg": 2}, - "wcscat" : {}, - "wcsncat" : {}, - "wcscpy" : {}, - "wsprintfA" : {"fs_arg": 2}, - "wsprintfW" : {"fs_arg": 2}, - "wvsprintfA" : {"fs_arg": 2}, - "wvsprintfW" : {"fs_arg": 2}, +# insecure by default + "gets" : {}, + "getws" : {}, + +# exec functions + "execl" : {"cmd_name": 1}, + "wexecl" : {"cmd_name": 1}, + "execv" : {"cmd_name": 1}, + "wexecv" : {"cmd_name": 1}, + "WinExec" : {"cmd_name": 1}, + "ShellExecute" : {}, + "ShellExecuteEx" : {}, + "CreateProcess" : {"cmd_name": 2}, + "CreateProcessAsUser": {"cmd_name": 2}, + "CreateProcessWithLogon" : {"cmd_name": 2}, + +# memory copy functions + "memcpy" : {"size": 3}, + "wmemcpy" : {"size": 3}, + "VirtualAllocEx" : {"size": 3}, + "VirtualAlloc" : {"size": 2}, + "VirtualAllocExNuma": {"size": 2}, + "LocalAlloc" : {"size": 2}, + "HeapAlloc" : {"size": 3}, + "CopyMemory" : {"size": 3}, + +# string functions + "lstrcat" : {}, + "lstrcat" : {}, + "lstrcpy" : {}, + "lstrlen" : {}, + "lstrlen" : {}, + "mbscat" : {}, + "mbscpy" : {}, + "mbsncpy" : {"size": 3}, + "mbsnbcpy" : {"size": 3}, + "mbsncat" : {}, + "mbsstr_l" : {}, + "RtlInitString" : {}, + "SHAnsiToAnsi" : {"size": 3}, + "SHAnsiToUnicode" : {"size": 3}, + "SHUnicodeToUnicode": {"size": 3}, + "SHUnicodeToAnsi" : {"size": 3}, + "strcpy" : {}, + "strncpy" : {"size": 3}, + "strcat" : {}, + "StrCatBuf" : {}, + "StrCatChain" : {}, + "StrCpyN" : {}, + "StrCpyN" : {}, + "strcpy" : {}, + "strncat" : {"size": 3}, + "strstr" : {}, + "wcscat" : {}, + "wcsstr" : {}, + "wcsncat" : {}, + "wcscpy" : {}, + "wcsncpy" : {"size": 3}, + "CompareStringWrapW": {}, + "MultiByteToWideChar": {}, + "WideCharToMultiByte": {}, + +# format strings + "printf" : {"fs_arg": 1}, + "wprintf" : {"fs_arg": 1}, + "snprintf" : {"fs_arg": 3}, + "snwprintf" : {"fs_arg": 3}, + "scanf" : {"fs_arg": 1}, + "sprintf" : {"fs_arg": 2}, + "sscanf" : {"fs_arg": 2}, + "swprintf" : {"fs_arg": 2}, + "swscanf" : {"fs_arg": 2}, + "vfprintf" : {"fs_arg": 2}, + "vfwprintf" : {"fs_arg": 2}, + "vprintf" : {"fs_arg": 1}, + "vwprintf" : {"fs_arg": 1}, + "vsprintf" : {"fs_arg": 2}, + "vsnprintf" : {"fs_arg": 3}, + "vsnwprintf" : {"fs_arg": 3}, + "vswprintf" : {"fs_arg": 2}, + "wsprintf" : {"fs_arg": 2}, + "wsprintf" : {"fs_arg": 2}, + "wvsprintf" : {"fs_arg": 2}, + "wvsprintf" : {"fs_arg": 2}, + "wvnsprintf" : {"fs_arg": 3}, + "wnsprintf" : {"fs_arg": 3}, } -prefixes = ["", "_", "__imp_", "__imp__"] +prefixes = ["", "_", "__imp_", "__imp__"] +suffixes = ["", "A", "W"] -# for every function we are interested in. +# For every function we are interested in. for func in functions: - # enumerate all possible prefixes. - for prefix in prefixes: - full_name = prefix + func - location = LocByName(full_name) - - if location == BADADDR: - continue - - ida_log("enumerating xrefs to %s" % full_name) - - for xref in list(CodeRefsTo(location, True)) + list(DataRefsTo(location)): - if GetMnem(xref) in ("call", "jmp"): - # ensure the xref does not exist within a known library routine. - if GetFunctionFlags(ea) and GetFunctionFlags(xref) & FUNC_LIB: - continue - - ### - ### peek a call with format string arguments - ### - - if functions[func].has_key("fs_arg"): - fs_arg = functions[func]["fs_arg"] - - format_string = get_arg(xref, fs_arg) - - # format string must be resolved at runtime. - if format_string == BADADDR: - ida_log("%08x format string must be resolved at runtime" % xref) - write_line("%08x:10:%s" % (xref, func)) - - # XXX - we have to escape '%' chars here otherwise 'print', which wraps around 'Message()' will - # incorrectly dereference from the stack and potentially crash the script. - else: - format_string = str(format_string).replace("%", "%%") - - # format string found. - if format_string.find("%s") != -1: - format_string = format_string.replace("\n", "") - ida_log("%08x favorable format string found '%s'" % (xref, format_string)) - write_line("%08x:%d:%s %s" % (xref, token_count(format_string)+fs_arg, func, format_string)) - - ### - ### peek a non format string call - ### - - else: - ida_log("%08x found call to '%s'" % (xref, func)) - write_line("%08x:3:%s" % (xref, func)) + + # enumerate all possibile suffixes. + for prefix in prefixes: + + # enumerate all possible prefixes. + for suffix in suffixes: + full_name = prefix + func + suffix + #ida_log("library %s" % full_name) + location = LocByName(full_name) + + if location == BADADDR: + continue + + ida_log("enumerating xrefs to %s" % full_name) + + for xref in list(CodeRefsTo(location, True)) + list(DataRefsTo(location)): + if GetMnem(xref) in ("call", "jmp"): + # Ensure the xref does not exist within a known library routine. + # if GetFunctionFlags(ea) and GetFunctionFlags(xref) & FUNC_LIB: + + #if GetFunctionFlags(xref) & FUNC_LIB: + # continue + + ### + ### peek a call with format string arguments + ### + if functions[func].has_key("fs_arg"): + fs_arg = functions[func]["fs_arg"] + + format_string = get_arg(xref, fs_arg, 'S') + + # format string must be resolved at runtime. + if format_string == BADADDR: + ida_log("%08x '%s' format string must be resolved at runtime" % (xref, full_name)) + write_line("%08x: %s" % (xref, full_name)) + + # XXX - we have to escape '%' chars here otherwise 'print', which wraps around 'Message()' will + # incorrectly dereference from the stack and potentially crash the script. + else: + format_string = str(format_string).replace("%", "%%") + + # format string found. + if format_string.find("%s") != -1: + format_string = format_string.replace("\n", "") + ida_log("%08x favorable format string found '%s'" % (xref, format_string)) + write_line("%08x:%d:%s %s" % (xref, token_count(format_string), full_name, format_string)) + # + # get cmd_name string + # + elif functions[func].has_key("cmd_name"): + cmd_name = functions[func]["cmd_name"] + + cmd = get_arg(xref, cmd_name, 'S') + + if cmd == BADADDR: + ida_log("%08x '%s' command must be resolved at runtime" % (xref, full_name)) + write_line("%08x: %s" % (xref, full_name)) + else: + ida_log("%08x found call to '%s' with static command: %d" % (xref, full_name, cmd)) + write_line("%08x: %s command: %s" % (xref, full_name, cmd)) + + # + # get static size value + # + elif functions[func].has_key("size"): + size_arg = functions[func]["size"] + + size = get_arg(xref, size_arg, 'I') + + if size == BADADDR: + ida_log("%08x '%s' size must be resolved at runtime" % (xref, full_name)) + write_line("%08x: %s" % (xref, full_name)) + else: + ida_log("%08x found call to '%s' with static size: %d" % (xref, full_name, size)) + write_line("%08x: %s size: %d" % (xref, full_name, size)) + + ### + ### peek a non format string call + ### + else: + ida_log("%08x found call to '%s'" % (xref, full_name)) + write_line("%08x: %s" % (xref, full_name)) peek_file.close() -print "done." \ No newline at end of file +ida_log("done.") \ No newline at end of file diff --git a/proc_peek_recon_db.py b/proc_peek_recon_db.py index 138f818..4e28100 100644 --- a/proc_peek_recon_db.py +++ b/proc_peek_recon_db.py @@ -34,34 +34,42 @@ ### Support Functions ### -def get_arg (ea, arg_num): - arg_index = 1 - - while True: - ea = PrevNotTail(ea) - - if GetMnem(ea) == "push": - if arg_index == arg_num: - dref = Dfirst(ea) - - if dref == BADADDR: - return dref - - return read_string(dref) - - arg_index += 1 +# +# Type define the argument data type; +# S -> string +# I -> num +# +def get_arg(ea, arg_num, type): + arg_index = 1 + + while True: + ea = PrevNotTail(ea) + if GetMnem(ea) == "push": + if arg_index == arg_num: + + # string parameter + if type == 'S': + dref = Dfirst(ea) + if dref == BADADDR: + return dref + return read_string(dref) + + # number parameter + elif type == 'I': + return read_size(ea) + + arg_index += 1 def instruction_match (ea, mnem=None, op1=None, op2=None, op3=None): - if mnem and mnem != GetMnem(ea): - return False + if mnem and mnem != GetMnem(ea): + return False - if op1 and op1 != GetOpnd(ea, 0): return False - if op2 and op2 != GetOpnd(ea, 1): return False - if op3 and op3 != GetOpnd(ea, 2): return False - - return True + if op1 and op1 != GetOpnd(ea, 0): return False + if op2 and op2 != GetOpnd(ea, 1): return False + if op3 and op3 != GetOpnd(ea, 2): return False + return True def disasm_match (ea, needle): disasm_line = GetDisasm(ea) @@ -75,230 +83,351 @@ def disasm_match (ea, needle): return True +def read_size(ea): + s = GetOpnd(ea, 0) + if s.endswith("h"): + return int(s.rstrip('h'), 16) + else: + return BADADDR def read_string (ea): - s = "" - - while True: - byte = Byte(ea) - - if byte == 0 or byte < 32 or byte > 126: - break - - s += chr(byte) - ea += 1 - - return s - + s = "" + string_type = GetStringType(ea) + if string_type == ASCSTR_C: + while True: + byte = Byte(ea) + + if byte == 0: #or byte < 32 or byte > 126: + break + + if byte == 0x0d: s += "" + elif byte == 0x0a: s += "" + else: s += chr(byte) + + ea += 1 + return s + elif string_type == ASCSTR_UNICODE: + #TODO + while True: + word = Word(ea) + if byte == 0: + break + else: s += word + ea += 2 + return "unicode string: " + s + + elif string_type == ASCSTR_PASCAL: + byte = Byte(ea) + for i in range (1, byte): + s += Byte(ea) + return "pascal string: " + s def token_count (format_string): - return format_string.count("%") - format_string.count("%%") + return format_string.count("%") - format_string.count("%%") def ida_log (message): - print "RECON> " + message + print "RECON> " + message def add_recon (mysql, module_id, offset, stack_depth, reason, status): - # escape single quotes and backslashes in fields that might have them. - reason = reason.replace("\\", "\\\\").replace("'", "\\'") - - sql = " INSERT INTO pp_recon" - sql += " SET module_id = '%d'," % module_id - sql += " offset = '%d'," % offset - sql += " stack_depth = '%d'," % stack_depth - sql += " reason = '%s'," % reason - sql += " status = '%s'," % status - sql += " notes = ''" - - cursor = mysql.cursor() - - try: - cursor.execute(sql) - except MySQLdb.Error, e: - ida_log("MySQL error %d: %s" % (e.args[0], e.args[1])) - ida_log(sql) - return False + # escape single quotes and backslashes in fields that might have them. + reason = reason.replace("\\", "\\\\").replace("'", "\\'") - cursor.close() - return True + sql = " INSERT INTO pp_recon" + sql += " SET module_id = '%d'," % module_id + sql += " offset = '%d'," % offset + sql += " stack_depth = '%d'," % stack_depth + sql += " reason = '%s'," % reason + sql += " status = '%s'," % status + sql += " notes = ''" + + cursor = mysql.cursor() + + try: + cursor.execute(sql) + except MySQLdb.Error, e: + ida_log("MySQL error %d: %s" % (e.args[0], e.args[1])) + ida_log(sql) + return False + cursor.close() + return True ######################################################################################################################## ### Meat and Potatoes ### def meat_and_potatoes (mysql): - # init some local vars. - window = state = found_ea = processed = 0 - - # calculate the current modules base address. - # XXX - cheap hack, the subtraction is for the PE header size. - base_address = MinEA() - 0x1000 - - # create a database entry for the current module. - cursor = mysql.cursor() - - try: - cursor.execute("INSERT INTO pp_modules SET name = '%s', base = '%d', notes = ''" % (GetInputFile(), base_address)) - except MySQLdb.Error, e: - ida_log("MySQL error %d: %s" % (e.args[0], e.args[1])) - ida_log(sql) - return - - # save the module ID we just created. - module_id = cursor.lastrowid - - cursor.close() - - ida_log("searching for inline memcpy()'s and sign extended moves (movsx).") - for ea in Heads(MinEA(), MaxEA()): - processed += 1 - - # we don't care about instructions within known library routines. - if GetFunctionFlags(ea) and GetFunctionFlags(ea) & FUNC_LIB: - continue - - if disasm_match(ea, "movsx"): - ida_log("%08x: found sign extended move" % ea) - - if not add_recon(mysql, module_id, ea - base_address, 3, "sign extended mov", "new"): - return - - if state == 0 and instruction_match(ea, "shr", "ecx", "2"): - # this is a good place to watch the inline strcpy since it gets executed only once and we can see the - # original size value prior to division by 4. - state = 1 - window = 0 - found_ea = ea - - elif state == 1 and disasm_match(ea, "rep movsd"): - state = 2 - window = 0 - - elif state == 2 and instruction_match(ea, "and", "ecx", "3"): - state = 3 - window = 0 - - elif state == 3 and disasm_match(ea, "rep movsb"): - ida_log("%08x: found memcpy" % found_ea) - set_cmt(found_ea, "inline memcpy()", False) - - if not add_recon(mysql, module_id, found_ea - base_address, 5, "inline memcpy", "new"): - return - - found_ea = state = window = 0 - - if window > 15: - state = window = 0 - - if state != 0: - window += 1 - - ida_log("done. looked at %d heads." % processed) - ida_log("looking for potentially interesting API calls now.") - - # format of functions dictionary is function name: format string arg number - # XXX - fill this from google search: +run-time.library +security.note site:msdn.microsoft.com - functions = \ - { - "fread" : {}, - "gets" : {}, - "lstrcat" : {}, - "lstrcpy" : {}, - "mbscat" : {}, - "mbscpy" : {}, - "mbsncat" : {}, - "memcpy" : {}, - #"snprintf" : {"fs_arg": 3}, - #"snwprintf" : {"fs_arg": 3}, - "sprintf" : {"fs_arg": 2}, - "sscanf" : {"fs_arg": 2}, - "strcpy" : {}, - "strcat" : {}, - "StrCatBuf" : {}, - "strncat" : {}, - "swprintf" : {"fs_arg": 2}, - "swscanf" : {"fs_arg": 2}, - "vfprintf" : {"fs_arg": 2}, - "vfwprintf" : {"fs_arg": 2}, - "vprintf" : {"fs_arg": 1}, - "vwprintf" : {"fs_arg": 1}, - "vsprintf" : {"fs_arg": 2}, - #"vsnprintf" : {"fs_arg": 3}, - #"vsnwprintf" : {"fs_arg": 3}, - "vswprintf" : {"fs_arg": 2}, - "wcscat" : {}, - "wcsncat" : {}, - "wcscpy" : {}, - "wsprintfA" : {"fs_arg": 2}, - "wsprintfW" : {"fs_arg": 2}, - "wvsprintfA" : {"fs_arg": 2}, - "wvsprintfW" : {"fs_arg": 2}, - } - - prefixes = ["", "_", "__imp_", "__imp__"] - - # for every function we are interested in. - for func in functions: - # enumerate all possible prefixes. - for prefix in prefixes: - full_name = prefix + func - location = LocByName(full_name) - - if location == BADADDR: - continue - - ida_log("enumerating xrefs to %s" % full_name) - - for xref in CodeRefsTo(location, True) + DataRefsTo(location): - if GetMnem(xref) in ("call", "jmp"): - # ensure the xref does not exist within a known library routine. - flags = GetFunctionFlags(xref) - if flags: - if flags & FUNC_LIB: - continue - - ### - ### peek a call with format string arguments - ### - - if functions[func].has_key("fs_arg"): - fs_arg = functions[func]["fs_arg"] - - format_string = get_arg(xref, fs_arg) - - # format string must be resolved at runtime. - if format_string == BADADDR: - ida_log("%08x format string must be resolved at runtime" % xref) - - if not add_recon(mysql, module_id, xref - base_address, 10, func, "new"): - return - - # XXX - we have to escape '%' chars here otherwise 'print', which wraps around 'Message()' will - # incorrectly dereference from the stack and potentially crash the script. - else: - format_string = str(format_string).replace("%", "%%") - - # format string found. - if format_string.find("%s") != -1: - format_string = format_string.replace("\n", "") - ida_log("%08x favorable format string found '%s'" % (xref, format_string)) - - if not add_recon(mysql, module_id, xref - base_address, token_count(format_string)+fs_arg, "%s %s" % (func, format_string), "new"): - return - - ### - ### peek a non format string call - ### - - else: - ida_log("%08x found call to '%s'" % (xref, func)) - - if not add_recon(mysql, module_id, xref - base_address, 3, func, "new"): - return - - ida_log("done.") + # init some local vars. + window = state = found_ea = processed = 0 + + # calculate the current modules base address. + # XXX - cheap hack, the subtraction is for the PE header size. + base_address = MinEA() - 0x1000 + + # create a database entry for the current module. + cursor = mysql.cursor() + + try: + cursor.execute("INSERT INTO pp_modules SET name = '%s', base = '%d', notes = ''" % (GetInputFile(), base_address)) + except MySQLdb.Error, e: + ida_log("MySQL error %d: %s" % (e.args[0], e.args[1])) + ida_log(sql) + cursor.close() + return + + # save the module ID we just created. + module_id = cursor.lastrowid + + cursor.close() + + ida_log("searching for inline memcpy()'s and sign extended moves (movsx).") + for ea in Heads(MinEA(), MaxEA()): + processed += 1 + + # we don't care about instructions within known library routines. + if GetFunctionFlags(ea) & FUNC_LIB: + continue + + if disasm_match(ea, "movsx"): + ida_log("%08x: found sign extended move" % ea) + + if not add_recon(mysql, module_id, ea - base_address, 3, "sign extended mov", "new"): + return + + if state == 0 and instruction_match(ea, "shr", "ecx", "2"): + # this is a good place to watch the inline strcpy since it gets executed only once and we can see the + # original size value prior to division by 4. + state = 1 + window = 0 + found_ea = ea + + elif state == 1 and disasm_match(ea, "rep movsd"): + state = 2 + window = 0 + + elif state == 2 and instruction_match(ea, "and", "ecx", "3"): + state = 3 + window = 0 + + elif state == 3 and disasm_match(ea, "rep movsb"): + ida_log("%08x: found memcpy" % found_ea) + set_cmt(found_ea, "inline memcpy()", False) + + if not add_recon(mysql, module_id, found_ea - base_address, 5, "inline memcpy", "new"): + return + + found_ea = state = window = 0 + + if window > 15: + state = window = 0 + + if state != 0: + window += 1 + + ida_log("done. looked at %d heads." % processed) + ida_log("looking for potentially interesting API calls now.") + + # format of functions dictionary is function name: format string arg number + # fill this from google search: +run-time.library +security.note site:msdn.microsoft.com + # [cm] my own google dork: "Security Warning" intitle:function site:msdn.microsoft.com + # "Security note" crt site:msdn.microsoft.com + functions = \ + { + # insecure by default + "gets" : {}, + "getws" : {}, + + # exec functions + "execl" : {"cmd_name": 1}, + "wexecl" : {"cmd_name": 1}, + "execv" : {"cmd_name": 1}, + "wexecv" : {"cmd_name": 1}, + "WinExec" : {"cmd_name": 1}, + "ShellExecute" : {}, + "ShellExecuteEx" : {}, + "CreateProcess" : {"cmd_name": 2}, + "CreateProcessAsUser": {"cmd_name": 2}, + "CreateProcessWithLogon" : {"cmd_name": 2}, + + # memory copy functions + "memcpy" : {"size": 3}, + "wmemcpy" : {"size": 3}, + "VirtualAllocEx" : {"size": 3}, + "VirtualAlloc" : {"size": 2}, + "VirtualAllocExNuma": {"size": 2}, + "LocalAlloc" : {"size": 2}, + "HeapAlloc" : {"size": 3}, + "CopyMemory" : {"size": 3}, + + # string functions + "lstrcat" : {}, + "lstrcat" : {}, + "lstrcpy" : {}, + "lstrlen" : {}, + "lstrlen" : {}, + "mbscat" : {}, + "mbscpy" : {}, + "mbsncpy" : {"size": 3}, + "mbsnbcpy" : {"size": 3}, + "mbsncat" : {}, + "mbsstr_l" : {}, + "RtlInitString" : {}, + "SHAnsiToAnsi" : {"size": 3}, + "SHAnsiToUnicode" : {"size": 3}, + "SHUnicodeToUnicode": {"size": 3}, + "SHUnicodeToAnsi" : {"size": 3}, + "strcpy" : {}, + "strncpy" : {"size": 3}, + "strcat" : {}, + "StrCatBuf" : {}, + "StrCatChain" : {}, + "StrCpyN" : {}, + "StrCpyN" : {}, + "strcpy" : {}, + "strncat" : {"size": 3}, + "strstr" : {}, + "wcscat" : {}, + "wcsstr" : {}, + "wcsncat" : {}, + "wcscpy" : {}, + "wcsncpy" : {"size": 3}, + "CompareStringWrapW": {}, + "MultiByteToWideChar": {}, + "WideCharToMultiByte": {}, + + # format strings + "printf" : {"fs_arg": 1}, + "wprintf" : {"fs_arg": 1}, + "snprintf" : {"fs_arg": 3}, + "snwprintf" : {"fs_arg": 3}, + "scanf" : {"fs_arg": 1}, + "sprintf" : {"fs_arg": 2}, + "sscanf" : {"fs_arg": 2}, + "swprintf" : {"fs_arg": 2}, + "swscanf" : {"fs_arg": 2}, + "vfprintf" : {"fs_arg": 2}, + "vfwprintf" : {"fs_arg": 2}, + "vprintf" : {"fs_arg": 1}, + "vwprintf" : {"fs_arg": 1}, + "vsprintf" : {"fs_arg": 2}, + "vsnprintf" : {"fs_arg": 3}, + "vsnwprintf" : {"fs_arg": 3}, + "vswprintf" : {"fs_arg": 2}, + "wsprintf" : {"fs_arg": 2}, + "wsprintf" : {"fs_arg": 2}, + "wvsprintf" : {"fs_arg": 2}, + "wvsprintf" : {"fs_arg": 2}, + "wvnsprintf" : {"fs_arg": 3}, + "wnsprintf" : {"fs_arg": 3}, + } + + prefixes = ["", "_", "__imp_", "__imp__"] + suffixes = ["", "A", "W"] + + # for every function we are interested in. + for func in functions: + + # enumerate all possibile suffixes. + for prefix in prefixes: + + # enumerate all possible prefixes. + for suffix in suffixes: + full_name = prefix + func + suffix + #ida_log("library %s" % full_name) + location = LocByName(full_name) + + if location == BADADDR: + continue + + ida_log("enumerating xrefs to %s" % full_name) + + for xref in list(CodeRefsTo(location, True)) + list(DataRefsTo(location)): + if GetMnem(xref) in ("call", "jmp"): + # ensure the xref does not exist within a known library routine. + #flags = GetFunctionFlags(xref) + #if flags: + # if flags & FUNC_LIB: continue + + ### + ### peek a call with format string arguments + ### + if functions[func].has_key("fs_arg"): + fs_arg = functions[func]["fs_arg"] + + format_string = get_arg(xref, fs_arg, 'S') + + # format string must be resolved at runtime. + if format_string == BADADDR: + ida_log("%08x '%s' format string must be resolved at runtime" % (xref, full_name)) + + if not add_recon(mysql, module_id, xref - base_address, 0, full_name, "new"): + ida_log("error") + return + + # XXX - we have to escape '%' chars here otherwise 'print', which wraps around 'Message()' will + # incorrectly dereference from the stack and potentially crash the script. + else: + format_string = str(format_string).replace("%", "%%") + + # format string found. + if format_string.find("%s") != -1: + format_string = format_string.replace("\n", "") + ida_log("%08x favorable format string found '%s'" % (xref, format_string)) + + if not add_recon(mysql, module_id, xref - base_address, token_count(format_string)+fs_arg, "%s - fs: %s" % (full_name, format_string), "new"): + ida_log("error") + return + # + # TODO: get cmd_name string + # + elif functions[func].has_key("cmd_name"): + cmd_name = functions[func]["cmd_name"] + + cmd = get_arg(xref, cmd_name, 'S') + + if cmd == BADADDR: + ida_log("%08x '%s' command must be resolved at runtime" % (xref, full_name)) + if not add_recon(mysql, module_id, xref - base_address, cmd_name, full_name, "new"): + ida_log("error") + return + else: + ida_log("%08x found call to '%s' with static command: %d" % (xref, full_name, cmd)) + if not add_recon(mysql, module_id, xref - base_address, cmd_name, "%s - cmd: %s" % (full_name, cmd_name), "new"): + ida_log("error") + return + # + # get static size value + # + elif functions[func].has_key("size"): + size_arg = functions[func]["size"] + + size = get_arg(xref, size_arg, 'I') + + if size == BADADDR: + ida_log("%08x '%s' size must be resolved at runtime" % (xref, full_name)) + if not add_recon(mysql, module_id, xref - base_address, size_arg, full_name, "new"): + ida_log("error") + return + else: + ida_log("%08x found call to '%s' with static size: %d" % (xref, full_name, size)) + if not add_recon(mysql, module_id, xref - base_address, size_arg, "%s - size: %d" % (full_name, size), "new"): + ida_log("error") + return + + ### + ### peek a non format string call + ### + + else: + ida_log("%08x found call to '%s'" % (xref, full_name)) + + if not add_recon(mysql, module_id, xref - base_address, 0, "%s" % (full_name), "new"): + ida_log("error") + return + + ida_log("done.") ######################################################################################################################## @@ -306,36 +435,36 @@ def meat_and_potatoes (mysql): ### def mysql_connect (): - mysql_host = None - mysql_user = None - mysql_pass = None + mysql_host = None + mysql_user = None + mysql_pass = None - if not mysql_host: - mysql_host = AskStr("localhost", "MySQL IP address or hostname:") + if not mysql_host: + mysql_host = AskStr("localhost", "MySQL IP address or hostname:") - if not mysql_host: - return -1 + if not mysql_host: + return -1 - if not mysql_user: - mysql_user = AskStr("root", "MySQL username:") + if not mysql_user: + mysql_user = AskStr("root", "MySQL username:") - if not mysql_user: - return -1 + if not mysql_user: + return -1 - if not mysql_pass: - mysql_pass = AskStr("", "MySQL password:") + if not mysql_pass: + mysql_pass = AskStr("", "MySQL password:") - if not mysql_pass: - return -1 + if not mysql_pass: + return -1 # connect to mysql - try: - mysql = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_pass, db="paimei") - except MySQLdb.OperationalError, err: - ida_log("failed connecting to MySQL server: %s" % err[1]) - mysql = None + try: + mysql = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_pass, db="paimei") + except MySQLdb.OperationalError, err: + ida_log("failed connecting to MySQL server: %s" % err[1]) + mysql = None - return mysql + return mysql ######################################################################################################################## @@ -343,15 +472,16 @@ def mysql_connect (): ### def main (): - mysql = mysql_connect() - - if mysql == -1: - ida_log("cancelled by user.") - elif mysql == None: - # error message already printed. - return - else: - meat_and_potatoes(mysql) - mysql.close() + mysql = mysql_connect() + + if mysql == -1: + ida_log("cancelled by user.") + elif mysql == None: + # error message already printed. + return + else: + meat_and_potatoes(mysql) + + mysql.close() main() \ No newline at end of file