From 5b08b6d7b488c8b99e052f6a33115ba1e0be9531 Mon Sep 17 00:00:00 2001 From: Joao Paulo Magalhaes Date: Sat, 8 Jun 2024 17:50:51 +0100 Subject: [PATCH] add unit tests for known-problematic fuzz inputs --- test/CMakeLists.txt | 50 +++++++++ test/test_fuzz/test_fuzz_common.hpp | 134 ++++++++++++++++++++++++ test/test_fuzz/test_fuzz_events.cpp | 9 ++ test/test_fuzz/test_fuzz_main.cpp | 16 +++ test/test_fuzz/test_fuzz_parse_emit.cpp | 9 ++ 5 files changed, 218 insertions(+) create mode 100644 test/test_fuzz/test_fuzz_common.hpp create mode 100644 test/test_fuzz/test_fuzz_events.cpp create mode 100644 test/test_fuzz/test_fuzz_main.cpp create mode 100644 test/test_fuzz/test_fuzz_parse_emit.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 905292ce..818d8867 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -265,3 +265,53 @@ if(RYML_TEST_SUITE) ryml_add_test_from_suite(${case}) endforeach() endif(RYML_TEST_SUITE) + + +#------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ + +c4_download_remote_proj(rapidyaml-data rapidyaml_data_dir + GIT_REPOSITORY https://github.com/biojppm/rapidyaml-data + GIT_TAG master) +if(NOT EXISTS ${rapidyaml_data_dir}/fuzz/yaml.dict) + c4_err("cannot find rapidyaml-data at ${rapidyaml_data_dir} -- was there an error downloading the project?") +endif() + +set(corpus_suite_dir ${rapidyaml_data_dir}/fuzz/yaml_test_suite) +set(corpus_generated_dir ${rapidyaml_data_dir}/fuzz/yaml_generated) +set(corpus_artifacts_dir ${rapidyaml_data_dir}/fuzz/yaml_artifacts) +set(corpus_merged_dir ${rapidyaml_data_dir}/fuzz/yaml_merged) +set(yaml_dict ${rapidyaml_data_dir}/fuzz/yaml.dict) +file(GLOB_RECURSE fuzz_files RELATIVE "${corpus_artifacts_dir}" "${corpus_artifacts_dir}/*") +file(GLOB_RECURSE suite_files RELATIVE "${corpus_suite_dir}" "${corpus_suite_dir}/*") + +# add individual tests for problematic fuzz files +function(ryml_add_fuzz_test name) + c4_add_executable(ryml-test-fuzz-${name} + SOURCES + test_fuzz/test_fuzz_common.hpp + test_fuzz/test_fuzz_${name}.cpp + test_fuzz/test_fuzz_main.cpp + ${ARGN} + INC_DIRS ${CMAKE_CURRENT_LIST_DIR} + LIBS ryml c4fs + FOLDER test/fuzz) + if(RYML_DBG) + target_compile_definitions(ryml-test-fuzz-${name} PUBLIC RYML_DBG) + endif() + add_dependencies(ryml-test-build ryml-test-fuzz-${name}) + ryml_get_target_exe(ryml-test-fuzz-${name} tgtexe) + function(ryml_add_fuzz_test_file name_ dir file) + string(REPLACE "/" "_" fuzz_name "${file}") + add_test(NAME ryml-test-fuzz-${name_}-${fuzz_name} + COMMAND ${tgtexe} ${dir}/${file}) + endfunction() + foreach(fuzz_file ${fuzz_files}) + ryml_add_fuzz_test_file(${name} ${corpus_artifacts_dir} ${fuzz_file}) + endforeach() +endfunction() +ryml_add_fuzz_test(parse_emit) +ryml_add_fuzz_test(events + ../test/test_suite/test_suite_event_handler.hpp + ../test/test_suite/test_suite_event_handler.cpp) diff --git a/test/test_fuzz/test_fuzz_common.hpp b/test/test_fuzz/test_fuzz_common.hpp new file mode 100644 index 00000000..53857acd --- /dev/null +++ b/test/test_fuzz/test_fuzz_common.hpp @@ -0,0 +1,134 @@ +#pragma once +#ifndef TEST_FUZZ_COMMON_H +#define TEST_FUZZ_COMMON_H + +#ifdef RYML_SINGLE_HEADER +#include +#else +#include +#include +#include +#include +#include +#endif +#include +#include +#include + +#ifdef C4_EXCEPTIONS +#include +#else +#include +std::jmp_buf jmp_env = {}; +c4::csubstr jmp_msg = {}; +#endif + + +#ifdef RYML_DBG +#define _if_dbg(...) __VA_ARGS__ +bool report_errors = true; +#else +#define _if_dbg(...) +bool report_errors = false; +#endif + +inline void report_error(const char* msg, size_t length, c4::yml::Location loc, FILE *f) +{ + if(!report_errors) + return; + if(!loc.name.empty()) + { + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + fputc(' ', f); + fprintf(f, "%.*s\n", static_cast(length), msg); + fflush(f); +} + +inline C4_NORETURN void errcallback(const char *msg, size_t msg_len, c4::yml::Location location, void *) +{ + report_error(msg, msg_len, location, stderr); + C4_IF_EXCEPTIONS( + throw std::runtime_error({msg, msg_len}); + , + jmp_msg.assign(msg, msg_len); + std::longjmp(jmp_env, 1); + ); +} + +inline c4::yml::Callbacks create_custom_callbacks() +{ + c4::set_error_flags(c4::ON_ERROR_CALLBACK); + c4::set_error_callback([](const char *msg, size_t msg_len){ + errcallback(msg, msg_len, {}, nullptr); + }); + c4::yml::Callbacks callbacks = {}; + callbacks.m_error = errcallback; + return callbacks; +} + +namespace c4 { +namespace yml { + +inline int fuzztest_parse_emit(uint32_t case_number, csubstr src) +{ + C4_UNUSED(case_number); + set_callbacks(create_custom_callbacks()); + Tree tree(create_custom_callbacks()); + bool parse_success = false; + C4_IF_EXCEPTIONS_(try, if(setjmp(jmp_env) == 0)) + { + RYML_ASSERT(tree.empty()); + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + parse_in_arena(src, &tree); + parse_success = true; + _if_dbg(print_tree("parsed tree", tree)); + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + std::string dst = emitrs_yaml(tree); + _if_dbg(_dbg_printf("emitted[{}]: [{}]~~~\n{}\n~~~\n", case_number, dst.size(), to_csubstr(dst)); fflush(NULL)); + C4_DONT_OPTIMIZE(dst); + C4_DONT_OPTIMIZE(parse_success); + } + C4_IF_EXCEPTIONS_(catch(std::exception const&), else) + { + // if an exception leaks from here, it is likely because of a greedy noexcept + _if_dbg(if(parse_success) print_tree("parsed tree", tree)); + return 1; + } + return 0; +} + +inline int fuzztest_yaml_events(uint32_t case_number, csubstr src) +{ + C4_UNUSED(case_number); + set_callbacks(create_custom_callbacks()); + EventHandlerYamlStd::EventSink sink = {}; + EventHandlerYamlStd handler(&sink, create_custom_callbacks()); + ParseEngine parser(&handler); + std::string str(src.begin(), src.end()); + C4_IF_EXCEPTIONS_(try, if(setjmp(jmp_env) == 0)) + { + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + parser.parse_in_place_ev("input", c4::to_substr(str)); + _if_dbg(_dbg_printf("evts[{}]: ~~~\n{}\n~~~\n", case_number, sink.get()); fflush(NULL)); + C4_DONT_OPTIMIZE(sink); + } + C4_IF_EXCEPTIONS_(catch(std::exception const&), else) + { + // if an exception leaks from here, it is likely because of a greedy noexcept + _if_dbg(fprintf(stdout, "err\n"); fflush(NULL)); + return 1; + } + return 0; +} + +} // namespace yml +} // namespace c4 + +#endif /* TEST_FUZZ_COMMON_H */ diff --git a/test/test_fuzz/test_fuzz_events.cpp b/test/test_fuzz/test_fuzz_events.cpp new file mode 100644 index 00000000..a857a2fe --- /dev/null +++ b/test/test_fuzz/test_fuzz_events.cpp @@ -0,0 +1,9 @@ +#include "./test_fuzz_common.hpp" +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len) +{ + static std::atomic case_number{0}; + c4::csubstr src = {reinterpret_cast(str), len}; + return c4::yml::fuzztest_yaml_events(case_number++, src); +} diff --git a/test/test_fuzz/test_fuzz_main.cpp b/test/test_fuzz/test_fuzz_main.cpp new file mode 100644 index 00000000..45f21b28 --- /dev/null +++ b/test/test_fuzz/test_fuzz_main.cpp @@ -0,0 +1,16 @@ +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len); + +int main(int argc, const char *argv[]) +{ + if(argc < 2) + return 1; + const char *filename = argv[1]; + if(!c4::fs::file_exists(filename)) + return 1; + std::string file = c4::fs::file_get_contents(filename); + (void)LLVMFuzzerTestOneInput(reinterpret_cast(&file[0]), file.size()); + return 0; +} diff --git a/test/test_fuzz/test_fuzz_parse_emit.cpp b/test/test_fuzz/test_fuzz_parse_emit.cpp new file mode 100644 index 00000000..dc4c0805 --- /dev/null +++ b/test/test_fuzz/test_fuzz_parse_emit.cpp @@ -0,0 +1,9 @@ +#include "./test_fuzz_common.hpp" +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len) +{ + static std::atomic case_number{0}; + c4::csubstr src = {reinterpret_cast(str), len}; + return c4::yml::fuzztest_parse_emit(case_number++, src); +}