Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AFL filename formats #11

Merged
merged 9 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions Docker/build_savior.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ function dir_check {
cd $SOFTWARE_DIR/llvm-3.6
mkdir build
cd $SOFTWARE_DIR/llvm-3.6/build
cmake -DLLVM_ENABLE_RTTI:BOOL=ON ..
make install -j$(nproc)
cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RTTI:BOOL=ON ..
make install -j4

PROG=KLEE
apt-get install -y build-essential curl libcap-dev libncurses5-dev python-minimal unzip
Expand Down Expand Up @@ -160,7 +160,7 @@ function dir_check {
#TODO: open source KLEE concolic executor separately
#installing klee-3.6
rm -rf /root/savior/KLEE/klee-build
cd ~/work/savior/KLEE
cd $WORK_DIR/savior/KLEE
mkdir klee-build
cd klee-build
echo "NOTE: you might need to rebuild libboost for C++ ABI compatibility on Ubuntu 16.04"
Expand Down Expand Up @@ -195,12 +195,12 @@ function dir_check {
mkdir build
mkdir install
cd $SOFTWARE_DIR/llvm-4.0/build
cmake -DLLVM_ENABLE_RTTI:BOOL=ON -DCMAKE_INSTALL_PREFIX=$SOFTWARE_DIR/llvm-4.0/install ..
make install -j$(nproc)
cmake -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RTTI:BOOL=ON -DCMAKE_INSTALL_PREFIX=$SOFTWARE_DIR/llvm-4.0/install ..
make install -j4

#install svf
PROG=svf

#build insertbug pass with llvm-3.6 first
cd $WORK_DIR/$PROJ/svf/InsertBugPotential
mkdir build && cd build && cmake .. && make -j$(nproc)
Expand All @@ -214,4 +214,3 @@ function dir_check {
cmake ..
make -j$(nproc)
cd .. && ln -sf $(pwd)/Release-build/bin/dma_wrapper.py dma_wrapper.py

9 changes: 6 additions & 3 deletions coordinator/EdgeOracles/avg_bug_potential_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ def get_fuzzer_queue_dir(self, raw_config, target_bin):
fuzzer_dir = os.path.join(sync_dir, "slave_000001", "queue")
return fuzzer_dir

##### PR: filename mismatch
def read_queue(self):
return [f for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
return [utils.from_afl_name_to_simple(f) for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
##### PR: filename mismatch

def get_oracle_config(self):
config = ConfigParser.ConfigParser()
Expand Down Expand Up @@ -141,7 +143,7 @@ def build_input_to_score_cache(self, dummy_all_edges, inputs):
stat['score'] = 0.0
stat['first_seen'] = seed
stat['interesting_edges'] = []
stat['size'] = os.path.getsize(seed)
stat['size'] = os.path.getsize(utils.from_simple_to_afl_name(seed)) ##### PR: filename mismatch
contributing_edge_counter = 0
for e in set(edges):
is_interesting_edge = False
Expand Down Expand Up @@ -188,13 +190,14 @@ def get_score(self, testcase):
# even though it contains new coverage
score2 = "orig:" in testcase
# Smaller size is better
score3 = -os.path.getsize(testcase)
score3 = -os.path.getsize(utils.from_simple_to_afl_name(testcase)) ##### PR: filename mismatch
# Shorter path is better
score4 = -self.get_path_length(testcase)
# Since name contains id, so later generated one will be chosen earlier
score5 = testcase
return (score1, score2, score3, score4, score5)


def testcase_compare(self, a, b):
a_score = self.get_score(a)
b_score = self.get_score(b)
Expand Down
4 changes: 3 additions & 1 deletion coordinator/EdgeOracles/bug_potential_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ def get_fuzzer_queue_dir(self, raw_config, target_bin):
fuzzer_dir = os.path.join(sync_dir, "slave_000001", "queue")
return fuzzer_dir

##### PR: filename mismatch
def read_queue(self):
return [f for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
return [utils.from_afl_name_to_simple(f) for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
##### PR: filename mismatch

def get_oracle_config(self):
config = ConfigParser.ConfigParser()
Expand Down
6 changes: 4 additions & 2 deletions coordinator/EdgeOracles/sequential_oracle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import sys
import os
import utils
import ConfigParser
from utils import bcolors
from operator import itemgetter
Expand All @@ -23,9 +24,10 @@ def get_fuzzer_queue_dir(self, raw_config, target_bin):
sync_dir = config.get("moriarty", "sync_dir").replace("@target", target_dir)
return os.path.join(sync_dir, "master", "queue")


##### PR: filename mismatch
def read_queue(self):
return [f for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
return [utils.from_afl_name_to_simple(f) for f in os.listdir(self.fuzzer_input_dir) if os.path.isfile(os.path.join(self.fuzzer_input_dir, f))]
##### PR: filename mismatch

def get_result(self, raw_data, max_results, edge_threshold=0.8):
stats = []
Expand Down
4 changes: 3 additions & 1 deletion coordinator/SEs/klee_conc_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ def run(self, input_id_map_list, cov_file):
for input_id_map in input_id_map_list:
#--generate klee seed ktest
# print input_id_map
afl_input = input_id_map['input']
afl_input = utils.from_simple_to_afl_name(input_id_map['input'])
if not afl_input:
continue
if max_input_size < os.path.getsize(afl_input):
max_input_size = os.path.getsize(afl_input)
klee_seed = klee_seed_dir+"/"+str(input_counter).zfill(6)+".ktest"
Expand Down
4 changes: 3 additions & 1 deletion coordinator/SEs/klee_sym_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ def run(self, input_id_map_list, cov_file_list):

#--generate klee seed ktest
# print input_id_map
afl_input = input_id_map['input']
afl_input = utils.from_simple_to_afl_name(input_id_map['input'])
if not afl_input:
continue
klee_seed = self.seed_dir+"/klee_instance_sym_"+str(pid).zfill(6)+".ktest"
# print "before calling converter"
# print afl_input
Expand Down
21 changes: 17 additions & 4 deletions coordinator/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import shutil
import datetime
import random
import glob

def error_msg(s):
print bcolors.FAIL+"[ERROR] {0}".format(s)+bcolors.ENDC
Expand Down Expand Up @@ -210,6 +211,18 @@ def merge_coverage_files(data_files, output_name, ftype='branch-only'):
error_msg("can not merge coverage file {0}".format(output_name))
return False

##### PR: filename mismatch
def from_simple_to_afl_name(simple_path):
tmp = os.path.basename(simple_path).replace("_",":")
tmp = glob.glob(os.path.join(os.path.dirname(simple_path), tmp) + "*")
if not tmp:
return ""
else:
return tmp[0]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is there a case when there will be multiple entries return by glob given a unique name?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah never mind, glob.glob returns a list,
can we add an assert here to ensure the list len is 1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think an assert is too strong since a file can be removed by AFL on the fly.
Time to time AFL calls a routine to polish the queue (a cmin similar function if you want), this is briefly mentionned here as a part of afl-fuzz algorithm.
Unfortunately, it may raise the assertion if the file savior wants to read has been removed by AFL.
I preferred the way you chose here and simply continue if a problem occurred.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem are the first_seen values in AFL. You cannot retrieve easily filenames from ids as it is currently implemented in AFL. This is the reason why I kept simple file format inside the coordinator, demanding the format conversions.

it's been a few years since i work on this code, can you elaborate a bit more, why we can't keep the naming scheme consistent (i.e., use the standard naming across all modules?), the first_seen values in AFL can be modified here:

fprintf(f, "%u\t%s/queue/id_%06u\n", i, out_dir, edge_san_first_seen[i]);

would it help make things come cleaner?

The problem here is the use of edge_san_first_seen[i] storing only the id of the first testcase covering a branch. I have not seen a simple way to print back the full filename in the standard format. A solution would be to store the full name but it does not sound like a simpler way.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I got your point, reading the code again we use the seed names in the input_id_map for SE converter, so it needs to be a full match.

Thanks for the discussion btw, my concern was keeping a mixed scheme will make the code logic more convoluted, being able to modified KLEE seems like a more straightforward approach but we don't have source.

@DanielGuoVT maybe you could consider release another klee version before fully open source. But until then we can use the solution in this PR.


def from_afl_name_to_simple(afl_name):
return os.path.join(os.path.dirname(afl_name), os.path.basename(afl_name)[:9].replace(':','_'))
##### PR: filename mismatch

def expand_stack_limit():
"""Klee requires ulimit to set stack as unlimited"""
Expand Down Expand Up @@ -283,9 +296,9 @@ def gen_loctrace_file(prog, inp, input_mode,outfile=None, timeout=1):
os.unlink(target_file)
myenv['AFL_LOC_TRACE_FILE'] = target_file
if input_mode == "symfile":
prog_cmd = prog.replace("INPUT_FILE", inp)
prog_cmd = prog.replace("INPUT_FILE", from_simple_to_afl_name(inp))
elif input_mode == "stdin":
prog_cmd = " ".join([prog + " < " + inp])
prog_cmd = " ".join([prog + " < " + from_simple_to_afl_name(inp)])
prog_cmd = "timeout " + str(timeout)+"s " + prog_cmd
prog_cmd = prog_cmd + " > /dev/null 2> /dev/null"
p = subprocess.Popen(prog_cmd, shell=True, env=myenv)
Expand Down Expand Up @@ -326,7 +339,7 @@ def log_recommend_edges(lst, log, loc_map, find_loc_script, prog, cur_heu):
return

def _get_src_loc(e, s):
cmd = [find_loc_script, e, loc_map, prog + " < " + s]
cmd = [find_loc_script, e, loc_map, prog + " < " + from_simple_to_afl_name(s)]
cmd = " ".join(cmd)
#we call the find script twice, first to get loctrace, second to get src
subprocess.call(cmd, shell=True)
Expand Down Expand Up @@ -375,7 +388,7 @@ def save_inputs(seed_list, target_dir):
error_msg("{0} is not a valid directory".format(target_dir))
return
for seed in seed_list:
shutil.copy2(seed['input'], target_dir)
shutil.copy2(from_simple_to_afl_name(seed['input']), target_dir)

def pack_klee_errors(search_dir, target_dir):
"""
Expand Down
Loading