diff --git a/CMakeLists.txt b/CMakeLists.txt index 8788afe4..6b26d89c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,7 @@ if (NOT JANSSON_WITHOUT_TESTS) test_load test_load_callback test_loadb + test_location test_number test_object test_pack diff --git a/doc/apiref.rst b/doc/apiref.rst index 14187724..2254face 100644 --- a/doc/apiref.rst +++ b/doc/apiref.rst @@ -2068,3 +2068,36 @@ And usage:: json_decref(obj); } + +.. _apiref-location-information: + +Location Information +==================== + +Jansson supports storing decoded objects' locations in input for better +reporting of semantic errors in applications. Since this comes with a certain +overhead, location information is stored only if ``JSON_STORE_LOCATION`` flag +was specified during decoding. + +.. function:: int json_get_location(json_t *json, int *line, int *column, int *position, int *length); + + Retrieve location of *json* writing it to memory locations pointed to by + *line*, *column*, *position* and *length* if not *NULL*. Returns 0 on success + or -1 if no location information is available for *json*. + +``line`` + The line number on which the object occurred. + +``column`` + The column on which the object occurred. Note that this is the *character + column*, not the byte column, i.e. a multibyte UTF-8 character counts as one + column. + +``position`` + The position in bytes from the start of the input. This is useful for + debugging Unicode encoding problems. + +``length`` + The length of the object in bytes. For arrays and objects, length is always + 1. For all other types, the value resembles the actual length as it appears + in input. Note that for strings, this includes the quotes. diff --git a/src/jansson.def b/src/jansson.def index 5c76c2f6..28ecbdaa 100644 --- a/src/jansson.def +++ b/src/jansson.def @@ -80,4 +80,5 @@ EXPORTS json_get_alloc_funcs jansson_version_str jansson_version_cmp + json_get_location diff --git a/src/jansson.h b/src/jansson.h index ee99d488..38b0c09d 100644 --- a/src/jansson.h +++ b/src/jansson.h @@ -413,6 +413,10 @@ void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn); const char *jansson_version_str(void); int jansson_version_cmp(int major, int minor, int micro); +/* location information */ + +int json_get_location(json_t *json, int *line, int *column, int *position, int *length); + #ifdef __cplusplus } #endif diff --git a/src/jansson_private.h b/src/jansson_private.h index b7dbdf92..deb10569 100644 --- a/src/jansson_private.h +++ b/src/jansson_private.h @@ -101,6 +101,8 @@ int jsonp_loop_check(hashtable_t *parents, const json_t *json, char *key, size_t /* Helpers for location information */ json_t *jsonp_simple(json_t *json, size_t flags); +void jsonp_store_location(json_t *json, int line, int column, + int position, int length); /* Windows compatibility */ #if defined(_WIN32) || defined(WIN32) diff --git a/src/load.c b/src/load.c index da8cd030..0d67f6a4 100644 --- a/src/load.c +++ b/src/load.c @@ -655,6 +655,22 @@ static void lex_close(lex_t *lex) { strbuffer_close(&lex->saved_text); } +static void store_location_from_lex(json_t *json, size_t flags, const lex_t *lex) +{ + int tlen = lex->saved_text.length; + + if (!(flags & JSON_STORE_LOCATION)) + return; + + if (tlen) + tlen--; + + jsonp_store_location(json, lex->stream.line, + lex->stream.column - tlen, + lex->stream.position - tlen, + lex->saved_text.length ?: 1); +} + /*** parser ***/ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); @@ -664,6 +680,7 @@ static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) { if (!object) return NULL; + store_location_from_lex(object, flags, lex); lex_scan(lex, error); if (lex->token == '}') return object; @@ -741,6 +758,7 @@ static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) { if (!array) return NULL; + store_location_from_lex(array, flags, lex); lex_scan(lex, error); if (lex->token == ']') return array; @@ -796,6 +814,7 @@ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { } json = jsonp_stringn_nocheck_own(value, len); + store_location_from_lex(json, flags, lex); lex->value.string.val = NULL; lex->value.string.len = 0; break; @@ -803,24 +822,29 @@ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { case TOKEN_INTEGER: { json = json_integer(lex->value.integer); + store_location_from_lex(json, flags, lex); break; } case TOKEN_REAL: { json = json_real(lex->value.real); + store_location_from_lex(json, flags, lex); break; } case TOKEN_TRUE: json = jsonp_simple(json_true(), flags); + store_location_from_lex(json, flags, lex); break; case TOKEN_FALSE: json = jsonp_simple(json_false(), flags); + store_location_from_lex(json, flags, lex); break; case TOKEN_NULL: json = jsonp_simple(json_null(), flags); + store_location_from_lex(json, flags, lex); break; case '{': diff --git a/src/value.c b/src/value.c index e151191e..43e19ae3 100644 --- a/src/value.c +++ b/src/value.c @@ -1026,10 +1026,13 @@ static void json_delete_simple(json_simple_t *simple) /*** deletion ***/ +static void delete_location(json_t *json); + void json_delete(json_t *json) { if (!json) return; + delete_location(json); switch (json_typeof(json)) { case JSON_OBJECT: json_delete_object(json_to_object(json)); @@ -1148,3 +1151,99 @@ json_t *do_deep_copy(const json_t *json, hashtable_t *parents) { return NULL; } } + +/*** location information ***/ + +typedef struct { + json_t json; /* just to integrate with hashtable */ + int line; + int column; + int position; + int length; +} json_location_t; + +static hashtable_t location_hash; +static int location_hash_initialized; + +static void location_atexit(void) +{ + if (location_hash_initialized) + hashtable_close(&location_hash); +} + +void jsonp_store_location(json_t *json, int line, int column, + int position, int length) +{ + json_location_t *loc = NULL; + + /* not possible to store location for the singleton primitives + * as one can't distinguish them by their memory location */ + if (json->refcount == (size_t)-1) + return; + + if (!location_hash_initialized) { + if (!hashtable_seed) { + /* Autoseed */ + json_object_seed(0); + } + if (hashtable_init(&location_hash)) + return; + + atexit(location_atexit); + location_hash_initialized = 1; + } else { + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + } + if (!loc) { + loc = jsonp_malloc(sizeof(*loc)); + if (!loc) + return; + + loc->json.refcount = (size_t)-1; + + if (hashtable_set(&location_hash, + (void *)&json, sizeof(json), (void *)loc)) + return; + } + + loc->line = line; + loc->column = column; + loc->position = position; + loc->length = length; +} + +int json_get_location(json_t *json, int *line, int *column, + int *position, int *length) +{ + json_location_t *loc = NULL; + + if (location_hash_initialized) + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + + if (!loc) + return -1; + + if (line) + *line = loc->line; + if (column) + *column = loc->column; + if (position) + *position = loc->position; + if (length) + *length = loc->length; + + return 0; +} + +static void delete_location(json_t *json) +{ + struct json_location_t *loc; + + if (!location_hash_initialized) + return; + + loc = hashtable_get(&location_hash, (void *)&json, sizeof(json)); + hashtable_del(&location_hash, (void *)&json, sizeof(json)); + if (loc) + jsonp_free(loc); +} diff --git a/test/.gitignore b/test/.gitignore index 93cc8da4..57701539 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -11,6 +11,7 @@ suites/api/test_fixed_size suites/api/test_load suites/api/test_load_callback suites/api/test_loadb +suites/api/test_location suites/api/test_memory_funcs suites/api/test_number suites/api/test_object diff --git a/test/suites/api/Makefile.am b/test/suites/api/Makefile.am index 2bc638b8..9ef422ba 100644 --- a/test/suites/api/Makefile.am +++ b/test/suites/api/Makefile.am @@ -11,6 +11,7 @@ check_PROGRAMS = \ test_load \ test_load_callback \ test_loadb \ + test_location \ test_memory_funcs \ test_number \ test_object \ @@ -28,6 +29,7 @@ test_dump_callback_SOURCES = test_dump_callback.c util.h test_fixed_size_SOURCES = test_fixed_size.c util.h test_load_SOURCES = test_load.c util.h test_loadb_SOURCES = test_loadb.c util.h +test_location_SOURCES = test_location.c util.h test_memory_funcs_SOURCES = test_memory_funcs.c util.h test_number_SOURCES = test_number.c util.h test_object_SOURCES = test_object.c util.h diff --git a/test/suites/api/test_location.c b/test/suites/api/test_location.c new file mode 100644 index 00000000..00d528dd --- /dev/null +++ b/test/suites/api/test_location.c @@ -0,0 +1,158 @@ +#include +#include +#include +#include "util.h" + + +#define INPUT "{ \"testkey\": [\"testvalue1\", \"testvalue2\"] }" +#define MULTILINE_INPUT \ +"{\n" \ +" \"root key 1\": \"root key 1 value 1\",\n" \ +" \"root key 2\": [\n" \ +" \"root key 2 array value 1\",\n" \ +" \"root key 2 array value 2\"\n" \ +" ],\n" \ +" \"root key 3\": [ true, false ],\n" \ +" \"root key 4\": null,\n" \ +" \"root key 5\": {\n" \ +" \"root key 5 object key 1\": 23,\n" \ +" \"root key 5 object key 2\": 3.1415926536\n" \ +" },\n" \ +" \"root key emoji\": \"\\uD83D\\uDE02\"\n" \ +"}\n" + +#define OUTPUT \ +"object: 1, 1, 1, 1\n" \ +"object key \"testkey\" value: 1, 14, 14, 1\n" \ +"array: 1, 14, 14, 1\n" \ +"array item 0: 1, 15, 15, 12\n" \ +"string: \"testvalue1\": 1, 15, 15, 12\n" \ +"array item 1: 1, 29, 29, 12\n" \ +"string: \"testvalue2\": 1, 29, 29, 12\n" + +#define MULTILINE_OUTPUT \ +"object: 1, 1, 1, 1\n" \ +"object key \"root key 1\" value: 2, 16, 18, 20\n" \ +"string: \"root key 1 value 1\": 2, 16, 18, 20\n" \ +"object key \"root key 2\" value: 3, 16, 55, 1\n" \ +"array: 3, 16, 55, 1\n" \ +"array item 0: 4, 3, 59, 26\n" \ +"string: \"root key 2 array value 1\": 4, 3, 59, 26\n" \ +"array item 1: 5, 3, 89, 26\n" \ +"string: \"root key 2 array value 2\": 5, 3, 89, 26\n" \ +"object key \"root key 3\" value: 7, 16, 135, 1\n" \ +"array: 7, 16, 135, 1\n" \ +"array item 0: 7, 18, 137, 4\n" \ +"true: 7, 18, 137, 4\n" \ +"array item 1: 7, 24, 143, 5\n" \ +"false: 7, 24, 143, 5\n" \ +"object key \"root key 4\" value: 8, 16, 167, 4\n" \ +"null: 8, 16, 167, 4\n" \ +"object key \"root key 5\" value: 9, 16, 188, 1\n" \ +"object: 9, 16, 188, 1\n" \ +"object key \"root key 5 object key 1\" value: 10, 30, 219, 2\n" \ +"integer: 23: 10, 30, 219, 2\n" \ +"object key \"root key 5 object key 2\" value: 11, 30, 252, 12\n" \ +"real: 3.141593: 11, 30, 252, 12\n" \ +"object key \"root key emoji\" value: 13, 20, 288, 14\n" \ +"string: \"😂\": 13, 20, 288, 14\n" + +static void print_location(char *outbuf, ssize_t *outbufspace, + json_t *root, const char *fmt, ...) +{ + va_list ap; + char buf[1024]; + int line, column, position, length; + + if (*outbufspace <= 0) + return; + + if (json_get_location(root, &line, &column, &position, &length)) + return; + + va_start(ap, fmt); + vsnprintf(buf, 1024, fmt, ap); + va_end(ap); + strncat(outbuf, buf, *outbufspace); + *outbufspace -= strlen(buf); + + if (*outbufspace <= 0) + return; + + sprintf(buf, ": %d, %d, %d, %d\n", line, column, position, length); + strncat(outbuf, buf, *outbufspace); + *outbufspace -= strlen(buf); +} + +static void parse(char *outbuf, ssize_t *outbufspace, json_t *root) +{ + unsigned int index; + const char *key; + json_t *tmp; + + switch(json_typeof(root)) { + case JSON_OBJECT: + print_location(outbuf, outbufspace, root, "object"); + json_object_foreach(root, key, tmp) { + print_location(outbuf, outbufspace, tmp, + "object key \"%s\" value", key); + parse(outbuf, outbufspace, tmp); + } + break; + case JSON_ARRAY: + print_location(outbuf, outbufspace, root, "array"); + json_array_foreach(root, index, tmp) { + print_location(outbuf, outbufspace, tmp, + "array item %u", index); + parse(outbuf, outbufspace, tmp); + } + break; + case JSON_STRING: + print_location(outbuf, outbufspace, root, + "string: \"%s\"", json_string_value(root)); + break; + case JSON_INTEGER: + print_location(outbuf, outbufspace, root, + "integer: %" JSON_INTEGER_FORMAT, + json_integer_value(root)); + break; + case JSON_REAL: + print_location(outbuf, outbufspace, root, + "real: %lf", json_real_value(root)); + break; + case JSON_TRUE: + print_location(outbuf, outbufspace, root, "true"); + break; + case JSON_FALSE: + print_location(outbuf, outbufspace, root, "false"); + break; + case JSON_NULL: + print_location(outbuf, outbufspace, root, "null"); + break; + } +} + +static void run_test(const char *input, const char *output, int flags) +{ + ssize_t bufspace; + char buf[8192]; + + json_t *root = json_loads(input, flags, NULL); + if (!root) + fail("loading input failed"); + *buf = '\0'; + bufspace = sizeof(buf) - 1; + parse(buf, &bufspace, root); + json_decref(root); + + if (strcmp(buf, output)) + fail_args("output doesn't match:\nexpect:\n%s\ngot:\n%s\n", output, buf); +} + +static void run_tests(void) +{ + run_test(INPUT, "", 0); + run_test(MULTILINE_INPUT, "", 0); + run_test(INPUT, OUTPUT, JSON_STORE_LOCATION); + run_test(MULTILINE_INPUT, MULTILINE_OUTPUT, JSON_STORE_LOCATION); +}