From 60fa2a789fcb674e11a5c08dea6afb75138314e0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 14 Feb 2019 14:28:54 +0100 Subject: [PATCH] Add location information to decoded JSON objects Aid users in printing semantic errors in JSON input by providing them with location data for each parsed JSON element. An example usage of such data is demonstrated by nftables' error messages: | # nft add chain mytable mychain | Error: No such file or directory | add chain mytable mychain | ^^^^^^^ To reduce overhead for library users not interested in such data, store it only if `JSON_STORE_LOCATION' flag was passed to json_load*() functions. It may then be retrieved using the new API function json_get_location(). Signed-off-by: Phil Sutter --- CMakeLists.txt | 1 + doc/apiref.rst | 33 +++++++ src/jansson.def | 1 + src/jansson.h | 4 + src/jansson_private.h | 2 + src/load.c | 24 +++++ src/value.c | 99 ++++++++++++++++++++ test/.gitignore | 1 + test/suites/api/Makefile.am | 2 + test/suites/api/test_location.c | 158 ++++++++++++++++++++++++++++++++ 10 files changed, 325 insertions(+) create mode 100644 test/suites/api/test_location.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8788afe4..6b26d89c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,7 @@ if (NOT JANSSON_WITHOUT_TESTS) test_load test_load_callback test_loadb + test_location test_number test_object test_pack diff --git a/doc/apiref.rst b/doc/apiref.rst index 14187724..2254face 100644 --- a/doc/apiref.rst +++ b/doc/apiref.rst @@ -2068,3 +2068,36 @@ And usage:: json_decref(obj); } + +.. _apiref-location-information: + +Location Information +==================== + +Jansson supports storing decoded objects' locations in input for better +reporting of semantic errors in applications. Since this comes with a certain +overhead, location information is stored only if ``JSON_STORE_LOCATION`` flag +was specified during decoding. + +.. function:: int json_get_location(json_t *json, int *line, int *column, int *position, int *length); + + Retrieve location of *json* writing it to memory locations pointed to by + *line*, *column*, *position* and *length* if not *NULL*. Returns 0 on success + or -1 if no location information is available for *json*. + +``line`` + The line number on which the object occurred. + +``column`` + The column on which the object occurred. Note that this is the *character + column*, not the byte column, i.e. a multibyte UTF-8 character counts as one + column. + +``position`` + The position in bytes from the start of the input. This is useful for + debugging Unicode encoding problems. + +``length`` + The length of the object in bytes. For arrays and objects, length is always + 1. For all other types, the value resembles the actual length as it appears + in input. Note that for strings, this includes the quotes. diff --git a/src/jansson.def b/src/jansson.def index 5c76c2f6..28ecbdaa 100644 --- a/src/jansson.def +++ b/src/jansson.def @@ -80,4 +80,5 @@ EXPORTS json_get_alloc_funcs jansson_version_str jansson_version_cmp + json_get_location diff --git a/src/jansson.h b/src/jansson.h index ee99d488..38b0c09d 100644 --- a/src/jansson.h +++ b/src/jansson.h @@ -413,6 +413,10 @@ void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn); const char *jansson_version_str(void); int jansson_version_cmp(int major, int minor, int micro); +/* location information */ + +int json_get_location(json_t *json, int *line, int *column, int *position, int *length); + #ifdef __cplusplus } #endif diff --git a/src/jansson_private.h b/src/jansson_private.h index b7dbdf92..deb10569 100644 --- a/src/jansson_private.h +++ b/src/jansson_private.h @@ -101,6 +101,8 @@ int jsonp_loop_check(hashtable_t *parents, const json_t *json, char *key, size_t /* Helpers for location information */ json_t *jsonp_simple(json_t *json, size_t flags); +void jsonp_store_location(json_t *json, int line, int column, + int position, int length); /* Windows compatibility */ #if defined(_WIN32) || defined(WIN32) diff --git a/src/load.c b/src/load.c index da8cd030..0fa10b50 100644 --- a/src/load.c +++ b/src/load.c @@ -655,6 +655,22 @@ static void lex_close(lex_t *lex) { strbuffer_close(&lex->saved_text); } +static void store_location_from_lex(json_t *json, size_t flags, const lex_t *lex) +{ + int tlen = lex->saved_text.length; + + if (!(flags & JSON_STORE_LOCATION)) + return; + + if (tlen) + tlen--; + + jsonp_store_location(json, lex->stream.line, + lex->stream.column - tlen, + lex->stream.position - tlen, + tlen + 1); +} + /*** parser ***/ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); @@ -664,6 +680,7 @@ static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) { if (!object) return NULL; + store_location_from_lex(object, flags, lex); lex_scan(lex, error); if (lex->token == '}') return object; @@ -741,6 +758,7 @@ static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) { if (!array) return NULL; + store_location_from_lex(array, flags, lex); lex_scan(lex, error); if (lex->token == ']') return array; @@ -796,6 +814,7 @@ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { } json = jsonp_stringn_nocheck_own(value, len); + store_location_from_lex(json, flags, lex); lex->value.string.val = NULL; lex->value.string.len = 0; break; @@ -803,24 +822,29 @@ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { case TOKEN_INTEGER: { json = json_integer(lex->value.integer); + store_location_from_lex(json, flags, lex); break; } case TOKEN_REAL: { json = json_real(lex->value.real); + store_location_from_lex(json, flags, lex); break; } case TOKEN_TRUE: json = jsonp_simple(json_true(), flags); + store_location_from_lex(json, flags, lex); break; case TOKEN_FALSE: json = jsonp_simple(json_false(), flags); + store_location_from_lex(json, flags, lex); break; case TOKEN_NULL: json = jsonp_simple(json_null(), flags); + store_location_from_lex(json, flags, lex); break; case '{': diff --git a/src/value.c b/src/value.c index e151191e..43e19ae3 100644 --- a/src/value.c +++ b/src/value.c @@ -1026,10 +1026,13 @@ static void json_delete_simple(json_simple_t *simple) /*** deletion ***/ +static void delete_location(json_t *json); + void json_delete(json_t *json) { if (!json) return; + delete_location(json); switch (json_typeof(json)) { case JSON_OBJECT: json_delete_object(json_to_object(json)); @@ -1148,3 +1151,99 @@ json_t *do_deep_copy(const json_t *json, hashtable_t *parents) { return NULL; } } + +/*** location information ***/ + +typedef struct { + json_t json; /* just to integrate with hashtable */ + int line; + int column; + int position; + int length; +} json_location_t; + +static hashtable_t location_hash; +static int location_hash_initialized; + +static void location_atexit(void) +{ + if (location_hash_initialized) + hashtable_close(&location_hash); +} + +void jsonp_store_location(json_t *json, int line, int column, + int position, int length) +{ + json_location_t *loc = NULL; + + /* not possible to store location for the singleton primitives + * as one can't distinguish them by their memory location */ + if (json->refcount == (size_t)-1) + return; + + if (!location_hash_initialized) { + if (!hashtable_seed) { + /* Autoseed */ + json_object_seed(0); + } + if (hashtable_init(&location_hash)) + return; + + atexit(location_atexit); + location_hash_initialized = 1; + } else { + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + } + if (!loc) { + loc = jsonp_malloc(sizeof(*loc)); + if (!loc) + return; + + loc->json.refcount = (size_t)-1; + + if (hashtable_set(&location_hash, + (void *)&json, sizeof(json), (void *)loc)) + return; + } + + loc->line = line; + loc->column = column; + loc->position = position; + loc->length = length; +} + +int json_get_location(json_t *json, int *line, int *column, + int *position, int *length) +{ + json_location_t *loc = NULL; + + if (location_hash_initialized) + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + + if (!loc) + return -1; + + if (line) + *line = loc->line; + if (column) + *column = loc->column; + if (position) + *position = loc->position; + if (length) + *length = loc->length; + + return 0; +} + +static void delete_location(json_t *json) +{ + struct json_location_t *loc; + + if (!location_hash_initialized) + return; + + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + hashtable_del(&location_hash, (void *)&json, sizeof(json)); + if (loc) + jsonp_free(loc); +} diff --git a/test/.gitignore b/test/.gitignore index 93cc8da4..57701539 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -11,6 +11,7 @@ suites/api/test_fixed_size suites/api/test_load suites/api/test_load_callback suites/api/test_loadb +suites/api/test_location suites/api/test_memory_funcs suites/api/test_number suites/api/test_object diff --git a/test/suites/api/Makefile.am b/test/suites/api/Makefile.am index 2bc638b8..9ef422ba 100644 --- a/test/suites/api/Makefile.am +++ b/test/suites/api/Makefile.am @@ -11,6 +11,7 @@ check_PROGRAMS = \ test_load \ test_load_callback \ test_loadb \ + test_location \ test_memory_funcs \ test_number \ test_object \ @@ -28,6 +29,7 @@ test_dump_callback_SOURCES = test_dump_callback.c util.h test_fixed_size_SOURCES = test_fixed_size.c util.h test_load_SOURCES = test_load.c util.h test_loadb_SOURCES = test_loadb.c util.h +test_location_SOURCES = test_location.c util.h test_memory_funcs_SOURCES = test_memory_funcs.c util.h test_number_SOURCES = test_number.c util.h test_object_SOURCES = test_object.c util.h diff --git a/test/suites/api/test_location.c b/test/suites/api/test_location.c new file mode 100644 index 00000000..00d528dd --- /dev/null +++ b/test/suites/api/test_location.c @@ -0,0 +1,158 @@ +#include +#include +#include +#include "util.h" + + +#define INPUT "{ \"testkey\": [\"testvalue1\", \"testvalue2\"] }" +#define MULTILINE_INPUT \ +"{\n" \ +" \"root key 1\": \"root key 1 value 1\",\n" \ +" \"root key 2\": [\n" \ +" \"root key 2 array value 1\",\n" \ +" \"root key 2 array value 2\"\n" \ +" ],\n" \ +" \"root key 3\": [ true, false ],\n" \ +" \"root key 4\": null,\n" \ +" \"root key 5\": {\n" \ +" \"root key 5 object key 1\": 23,\n" \ +" \"root key 5 object key 2\": 3.1415926536\n" \ +" },\n" \ +" \"root key emoji\": \"\\uD83D\\uDE02\"\n" \ +"}\n" + +#define OUTPUT \ +"object: 1, 1, 1, 1\n" \ +"object key \"testkey\" value: 1, 14, 14, 1\n" \ +"array: 1, 14, 14, 1\n" \ +"array item 0: 1, 15, 15, 12\n" \ +"string: \"testvalue1\": 1, 15, 15, 12\n" \ +"array item 1: 1, 29, 29, 12\n" \ +"string: \"testvalue2\": 1, 29, 29, 12\n" + +#define MULTILINE_OUTPUT \ +"object: 1, 1, 1, 1\n" \ +"object key \"root key 1\" value: 2, 16, 18, 20\n" \ +"string: \"root key 1 value 1\": 2, 16, 18, 20\n" \ +"object key \"root key 2\" value: 3, 16, 55, 1\n" \ +"array: 3, 16, 55, 1\n" \ +"array item 0: 4, 3, 59, 26\n" \ +"string: \"root key 2 array value 1\": 4, 3, 59, 26\n" \ +"array item 1: 5, 3, 89, 26\n" \ +"string: \"root key 2 array value 2\": 5, 3, 89, 26\n" \ +"object key \"root key 3\" value: 7, 16, 135, 1\n" \ +"array: 7, 16, 135, 1\n" \ +"array item 0: 7, 18, 137, 4\n" \ +"true: 7, 18, 137, 4\n" \ +"array item 1: 7, 24, 143, 5\n" \ +"false: 7, 24, 143, 5\n" \ +"object key \"root key 4\" value: 8, 16, 167, 4\n" \ +"null: 8, 16, 167, 4\n" \ +"object key \"root key 5\" value: 9, 16, 188, 1\n" \ +"object: 9, 16, 188, 1\n" \ +"object key \"root key 5 object key 1\" value: 10, 30, 219, 2\n" \ +"integer: 23: 10, 30, 219, 2\n" \ +"object key \"root key 5 object key 2\" value: 11, 30, 252, 12\n" \ +"real: 3.141593: 11, 30, 252, 12\n" \ +"object key \"root key emoji\" value: 13, 20, 288, 14\n" \ +"string: \"😂\": 13, 20, 288, 14\n" + +static void print_location(char *outbuf, ssize_t *outbufspace, + json_t *root, const char *fmt, ...) +{ + va_list ap; + char buf[1024]; + int line, column, position, length; + + if (*outbufspace <= 0) + return; + + if (json_get_location(root, &line, &column, &position, &length)) + return; + + va_start(ap, fmt); + vsnprintf(buf, 1024, fmt, ap); + va_end(ap); + strncat(outbuf, buf, *outbufspace); + *outbufspace -= strlen(buf); + + if (*outbufspace <= 0) + return; + + sprintf(buf, ": %d, %d, %d, %d\n", line, column, position, length); + strncat(outbuf, buf, *outbufspace); + *outbufspace -= strlen(buf); +} + +static void parse(char *outbuf, ssize_t *outbufspace, json_t *root) +{ + unsigned int index; + const char *key; + json_t *tmp; + + switch(json_typeof(root)) { + case JSON_OBJECT: + print_location(outbuf, outbufspace, root, "object"); + json_object_foreach(root, key, tmp) { + print_location(outbuf, outbufspace, tmp, + "object key \"%s\" value", key); + parse(outbuf, outbufspace, tmp); + } + break; + case JSON_ARRAY: + print_location(outbuf, outbufspace, root, "array"); + json_array_foreach(root, index, tmp) { + print_location(outbuf, outbufspace, tmp, + "array item %u", index); + parse(outbuf, outbufspace, tmp); + } + break; + case JSON_STRING: + print_location(outbuf, outbufspace, root, + "string: \"%s\"", json_string_value(root)); + break; + case JSON_INTEGER: + print_location(outbuf, outbufspace, root, + "integer: %" JSON_INTEGER_FORMAT, + json_integer_value(root)); + break; + case JSON_REAL: + print_location(outbuf, outbufspace, root, + "real: %lf", json_real_value(root)); + break; + case JSON_TRUE: + print_location(outbuf, outbufspace, root, "true"); + break; + case JSON_FALSE: + print_location(outbuf, outbufspace, root, "false"); + break; + case JSON_NULL: + print_location(outbuf, outbufspace, root, "null"); + break; + } +} + +static void run_test(const char *input, const char *output, int flags) +{ + ssize_t bufspace; + char buf[8192]; + + json_t *root = json_loads(input, flags, NULL); + if (!root) + fail("loading input failed"); + *buf = '\0'; + bufspace = sizeof(buf) - 1; + parse(buf, &bufspace, root); + json_decref(root); + + if (strcmp(buf, output)) + fail_args("output doesn't match:\nexpect:\n%s\ngot:\n%s\n", output, buf); +} + +static void run_tests(void) +{ + run_test(INPUT, "", 0); + run_test(MULTILINE_INPUT, "", 0); + run_test(INPUT, OUTPUT, JSON_STORE_LOCATION); + run_test(MULTILINE_INPUT, MULTILINE_OUTPUT, JSON_STORE_LOCATION); +}