From f483ccd2c289b9dd8ac25f33c3b1a13562ab4877 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 20 Nov 2023 12:06:23 -0500 Subject: [PATCH] json_location_parser --- doc/ref/jsonpath/compile_jsonpath.md | 222 +++++++ .../jsonpath/json_location_parser.hpp | 541 ++++++++++++++++++ .../jsoncons_ext/jsonpath/jsonpath_error.hpp | 6 +- .../jsonpath/normalized_path_parser.hpp | 195 +++++-- ...sts.cpp => json_location_parser_tests.cpp} | 33 +- .../src/jsonpath_compile_jsonpath_tests.cpp | 111 ++++ .../src/jsonpath_make_expression_tests.cpp | 40 +- 7 files changed, 1061 insertions(+), 87 deletions(-) create mode 100644 doc/ref/jsonpath/compile_jsonpath.md create mode 100644 include/jsoncons_ext/jsonpath/json_location_parser.hpp rename test/jsonpath/src/{normalized_path_parser_tests.cpp => json_location_parser_tests.cpp} (56%) create mode 100644 test/jsonpath/src/jsonpath_compile_jsonpath_tests.cpp diff --git a/doc/ref/jsonpath/compile_jsonpath.md b/doc/ref/jsonpath/compile_jsonpath.md new file mode 100644 index 0000000000..326fed625e --- /dev/null +++ b/doc/ref/jsonpath/compile_jsonpath.md @@ -0,0 +1,222 @@ +### jsoncons::jsonpath::compile_jsonpath + +```cpp +#include +``` + +```cpp +template +jsonpath_expr compile_jsonpath(const Json::string_view_type& expr, + const custom_functions& funcs = custom_functions()); (1) (since 0.172.0) +``` +```cpp +template +jsonpath_expr compile_jsonpath(const Json::string_view_type& expr, + std::error_code& ec); (2) +``` +```cpp +template +jsonpath_expr compile_jsonpath(const Json::string_view_type& expr, + const custom_functions& funcs, std::error_code& ec); (3) +``` + +(1) Makes a [jsonpath_expr](jsonpath_expression.md) from the JSONPath expression `expr`. + +(2-3) Makes a [jsonpath_expr](jsonpath_expression.md) from the JSONPath expression `expr`. + +#### Parameters + + + + + + + + + + +
exprJSONPath expression string
ecout-parameter for reporting errors in the non-throwing overload
+ +#### Return value + +Returns a [jsonpath_expr](jsonpath_expr.md) object that represents the JSONPath expression. + +#### Exceptions + +(1) throws a [jsonpath_error](jsonpath_error.md) if JSONPath compilation fails. + +(2)-(3) sets the out-parameter `ec` to the [jsonpath_error_category](jsonpath_errc.md) if JSONPath compilation fails. + +### Examples + +The examples below uses the sample data file `books.json`, + +```json +{ + "books": + [ + { + "category": "fiction", + "title" : "A Wild Sheep Chase", + "author" : "Haruki Murakami", + "price" : 22.72 + }, + { + "category": "fiction", + "title" : "The Night Watch", + "author" : "Sergei Lukyanenko", + "price" : 23.58 + }, + { + "category": "fiction", + "title" : "The Comedians", + "author" : "Graham Greene", + "price" : 21.99 + }, + { + "category": "memoir", + "title" : "The Night Watch", + "author" : "Phillips, David Atlee" + } + ] +} +``` + +#### Return copies + +```cpp +int main() +{ + auto expr = jsonpath::compile_jsonpath("$.books[?(@.price > avg($.books[*].price))].title"); + + std::ifstream is("./input/books.json"); + json data = json::parse(is); + + json result = expr.evaluate(data); + std::cout << pretty_print(result) << "\n\n"; +} +``` +Output: +``` +[ + "The Night Watch" +] +``` + +#### Access path and reference to original value + +```cpp +int main() +{ + auto expr = jsonpath::compile_jsonpath("$.books[?(@.price >= 22.0)]"); + + std::ifstream is("./input/books.json"); + json data = json::parse(is); + + auto callback = [](const std::string& path, const json& val) + { + std::cout << path << ": " << val << "\n"; + }; + expr.evaluate(data, callback, jsonpath::result_options::path); +} +``` +Output: +``` +$['books'][0]: {"author":"Haruki Murakami","category":"fiction","price":22.72,"title":"A Wild Sheep Chase"} +$['books'][1]: {"author":"Sergei Lukyanenko","category":"fiction","price":23.58,"title":"The Night Watch"} +``` + +#### Custom functions + +```cpp +#include +#include + +using json = jsoncons::json; +namespace jsonpath = jsoncons::jsonpath; + +template +class my_custom_functions : public jsonpath::custom_functions +{ +public: + my_custom_functions() + { + this->register_function("divide", // function name + 2, // number of arguments + [](jsoncons::span> params, + std::error_code& ec) -> Json + { + const Json& arg0 = params[0].value(); + const Json& arg1 = params[1].value(); + + if (!(arg0.is_number() && arg1.is_number())) + { + ec = jsonpath::jsonpath_errc::invalid_type; + return Json::null(); + } + return Json(arg0.as() / arg1.as()); + } + ); + } +}; + +int main() +{ + json root = json::parse(R"([{"foo": 60, "bar": 10},{"foo": 60, "bar": 5}])"); + + auto expr = jsonpath::compile_jsonpath("$[?(divide(@.foo, @.bar) == 6)]", + my_custom_functions()); + json result = expr.evaluate(root); + + std::cout << result << "\n\n"; +} +``` +Output: +``` +[{"bar": 10,"foo": 60}] +``` + +#### compile_jsonpath with stateful allocator + +```cpp +#include // Assuming C++ 17 +#include +#include +#include "FreeListAllocator.hpp" // for FreeListAllocator + +using my_alloc = FreeListAllocator; // an allocator with a single-argument constructor +using my_json = jsoncons::basic_json; + +int main() +{ + auto alloc = my_alloc(1); + + // until 0.171.0 + // jsoncons::json_decoder decoder(jsoncons::result_allocator_arg, + // alloc, alloc); + + // since 0.170.1 + jsoncons::json_decoder decoder(alloc, alloc); + + std::ifstream is("./input/books.json"); + + jsoncons::basic_json_reader,my_alloc> reader(is, decoder, alloc); + reader.read(); + + my_json doc = decoder.get_result(); + + std::string_view p{"$.books[?(@.category == 'fiction')].title"}; + auto expr = jsoncons::jsonpath::compile_jsonpath(combine_allocators(alloc), p); + auto result = expr.evaluate(doc); + + std::cout << pretty_print(result) << "\n\n"; +} +``` +Output: +```json +[ + "A Wild Sheep Chase", + "The Night Watch", + "The Comedians" +] +``` diff --git a/include/jsoncons_ext/jsonpath/json_location_parser.hpp b/include/jsoncons_ext/jsonpath/json_location_parser.hpp new file mode 100644 index 0000000000..8fcbe3ef5b --- /dev/null +++ b/include/jsoncons_ext/jsonpath/json_location_parser.hpp @@ -0,0 +1,541 @@ +// Copyright 2013-2023 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSON_LOCATION_PARSER_HPP +#define JSONCONS_JSONPATH_JSON_LOCATION_PARSER_HPP + +#include +#include +#include +#include // std::is_const +#include // std::numeric_limits +#include // std::move +#include +#include // std::reverse +#include +#include +#include + +namespace jsoncons { +namespace jsonpath { + + template + class basic_path_element + { + public: + using char_type = CharT; + using allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using string_type = std::basic_string,char_allocator_type>; + private: + bool has_name_; + string_type name_; + std::size_t index_; + + public: + basic_path_element(const string_type& name) + : has_name_(true), name_(name), index_(0) + { + } + + basic_path_element(string_type&& name) + : has_name_(true), name_(std::move(name)), index_(0) + { + } + + basic_path_element(std::size_t index) + : has_name_(false), index_(index) + { + } + + basic_path_element(const basic_path_element& other) = default; + + basic_path_element& operator=(const basic_path_element& other) = default; + + bool has_name() const + { + return has_name_; + } + + bool has_index() const + { + return !has_name_; + } + + const string_type& name() const + { + return name_; + } + + std::size_t index() const + { + return index_; + } + + int compare(const basic_path_element& other) const + { + int diff = 0; + if (has_name_ != other.has_name_) + { + diff = static_cast(has_name_) - static_cast(other.has_name_); + } + else + { + if (has_name_) + { + diff = name_.compare(other.name_); + } + else + { + diff = index_ < other.index_ ? -1 : index_ > other.index_ ? 1 : 0; + } + } + return diff; + } + }; + + +namespace detail { + + enum class json_location_state + { + start, + relative_location, + single_quoted_string, + double_quoted_string, + unquoted_string, + selector, + digit, + expect_rbracket, + quoted_string_escape_char + }; + + enum class selector_separator_kind{bracket,dot}; + + template + class json_location_parser + { + public: + using allocator_type = Allocator; + using char_type = CharT; + using string_type = std::basic_string; + using string_view_type = jsoncons::basic_string_view; + using path_element_type = basic_path_element; + using path_element_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using path_type = std::vector; + + private: + + allocator_type alloc_; + std::size_t line_; + std::size_t column_; + const char_type* end_input_; + const char_type* p_; + + public: + json_location_parser(const allocator_type& alloc = allocator_type()) + : alloc_(alloc), line_(1), column_(1), + end_input_(nullptr), + p_(nullptr) + { + } + + json_location_parser(std::size_t line, std::size_t column, + const allocator_type& alloc = allocator_type()) + : alloc_(alloc), line_(line), column_(column), + end_input_(nullptr), + p_(nullptr) + { + } + + std::size_t line() const + { + return line_; + } + + std::size_t column() const + { + return column_; + } + + path_type parse(const string_view_type& path) + { + std::error_code ec; + auto result = parse(path, ec); + if (ec) + { + JSONCONS_THROW(jsonpath_error(ec, line_, column_)); + } + return result; + } + + path_type parse(const string_view_type& path, std::error_code& ec) + { + std::vector elements; + + string_type buffer(alloc_); + + end_input_ = path.data() + path.length(); + p_ = path.data(); + + + selector_separator_kind separator_kind = selector_separator_kind::bracket; + + json_location_state state = json_location_state::start; + + while (p_ < end_input_) + { + switch (state) + { + case json_location_state::start: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '$': + case '@': + { + state = json_location_state::relative_location; + ++p_; + ++column_; + break; + } + default: + { + ec = jsonpath_errc::expected_root_or_current_node; + return path_type{}; + } + } + break; + } + case json_location_state::relative_location: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '[': + separator_kind = selector_separator_kind::bracket; + state = json_location_state::selector; + ++p_; + ++column_; + break; + case '.': + separator_kind = selector_separator_kind::dot; + state = json_location_state::selector; + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_lbracket_or_dot; + return path_type(); + }; + break; + case json_location_state::selector: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '\'': + state = json_location_state::single_quoted_string; + ++p_; + ++column_; + break; + case '\"': + state = json_location_state::double_quoted_string; + ++p_; + ++column_; + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state = json_location_state::digit; + break; + case '-': + ec = jsonpath_errc::expected_single_quote_or_digit; + return path_type(); + default: + if (separator_kind == selector_separator_kind::dot) + { + state = json_location_state::unquoted_string; + } + else + { + ec = jsonpath_errc::expected_single_quote_or_digit; + return path_type(); + } + break; + } + break; + case json_location_state::single_quoted_string: + switch (*p_) + { + case '\'': + elements.emplace_back(buffer); + buffer.clear(); + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } + ++p_; + ++column_; + break; + case '\\': + state = json_location_state::quoted_string_escape_char; + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case json_location_state::double_quoted_string: + switch (*p_) + { + case '\"': + elements.emplace_back(buffer); + buffer.clear(); + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } + ++p_; + ++column_; + break; + case '\\': + state = json_location_state::quoted_string_escape_char; + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case json_location_state::unquoted_string: + switch (*p_) + { + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case 'n':case 'o':case 'p':case 'q':case 'r':case 's':case 't':case 'u':case 'v':case 'w':case 'x':case 'y':case 'z': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case 'N':case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z': + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + case '_': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + case '\\': + state = json_location_state::quoted_string_escape_char; + ++p_; + ++column_; + break; + default: + if (typename std::make_unsigned::type(*p_) > 127) + { + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + elements.emplace_back(buffer); + buffer.clear(); + advance_past_space_character(); + state = json_location_state::relative_location; + } + break; + }; + break; + case json_location_state::expect_rbracket: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + state = json_location_state::relative_location; + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_type(alloc_); + } + break; + + case json_location_state::digit: + switch(*p_) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + std::size_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_type(alloc_); + } + elements.emplace_back(n); + buffer.clear(); + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } + break; + } + break; + case json_location_state::quoted_string_escape_char: + switch (*p_) + { + case '\"': + buffer.push_back('\"'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case '\'': + buffer.push_back('\''); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case '\\': + buffer.push_back('\\'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case '/': + buffer.push_back('/'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 'b': + buffer.push_back('\b'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 'f': + buffer.push_back('\f'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 'n': + buffer.push_back('\n'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 'r': + buffer.push_back('\r'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 't': + buffer.push_back('\t'); + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + case 'u': + ++p_; + ++column_; + state = json_location_state::single_quoted_string; + break; + default: + ec = jsonpath_errc::illegal_escaped_character; + return path_type(alloc_); + } + break; + default: + ++p_; + ++column_; + break; + } + } + if (state == json_location_state::unquoted_string) + { + elements.emplace_back(buffer); + } + else if (state == json_location_state::digit) + { + std::size_t n{ 0 }; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_type(alloc_); + } + elements.emplace_back(n); + } + else if (state != json_location_state::relative_location) + { + ec = jsonpath_errc::unexpected_eof; + return path_type(); + } + return path_type(std::move(elements)); + } + + void advance_past_space_character() + { + switch (*p_) + { + case ' ':case '\t': + ++p_; + ++column_; + break; + case '\r': + if (p_+1 < end_input_ && *(p_+1) == '\n') + ++p_; + ++line_; + column_ = 1; + ++p_; + break; + case '\n': + ++line_; + column_ = 1; + ++p_; + break; + default: + break; + } + } + }; + + } // namespace detail + + using path_element = basic_path_element>; + using wpath_element = basic_path_element>; + +} // namespace jsonpath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpath/jsonpath_error.hpp b/include/jsoncons_ext/jsonpath/jsonpath_error.hpp index 7f381817d7..06f2dce618 100644 --- a/include/jsoncons_ext/jsonpath/jsonpath_error.hpp +++ b/include/jsoncons_ext/jsonpath/jsonpath_error.hpp @@ -16,7 +16,7 @@ namespace jsoncons { namespace jsonpath { { success = 0, expected_root_or_current_node, - expected_lbracket, + expected_lbracket_or_dot, expected_single_quote_or_digit, expected_root_or_function, expected_current_node, @@ -70,8 +70,8 @@ namespace jsoncons { namespace jsonpath { { case jsonpath_errc::expected_root_or_current_node: return "Expected '$' or '@'"; - case jsonpath_errc::expected_lbracket: - return "Expected '['"; + case jsonpath_errc::expected_lbracket_or_dot: + return "Expected '[' or '.'"; case jsonpath_errc::expected_single_quote_or_digit: return "Expected '\'' or digit"; case jsonpath_errc::expected_root_or_function: diff --git a/include/jsoncons_ext/jsonpath/normalized_path_parser.hpp b/include/jsoncons_ext/jsonpath/normalized_path_parser.hpp index 1880a5ea58..8fcbe3ef5b 100644 --- a/include/jsoncons_ext/jsonpath/normalized_path_parser.hpp +++ b/include/jsoncons_ext/jsonpath/normalized_path_parser.hpp @@ -4,8 +4,8 @@ // See https://github.com/danielaparker/jsoncons for latest version -#ifndef JSONCONS_JSONPATH_NORMALIZED_PATH_PARSER_HPP -#define JSONCONS_JSONPATH_NORMALIZED_PATH_PARSER_HPP +#ifndef JSONCONS_JSONPATH_JSON_LOCATION_PARSER_HPP +#define JSONCONS_JSONPATH_JSON_LOCATION_PARSER_HPP #include #include @@ -100,19 +100,23 @@ namespace jsonpath { namespace detail { - enum class normalized_path_state + enum class json_location_state { start, relative_location, single_quoted_string, - bracket_specifier, + double_quoted_string, + unquoted_string, + selector, digit, expect_rbracket, quoted_string_escape_char }; + enum class selector_separator_kind{bracket,dot}; + template - class normalized_path_parser + class json_location_parser { public: using allocator_type = Allocator; @@ -132,14 +136,14 @@ namespace detail { const char_type* p_; public: - normalized_path_parser(const allocator_type& alloc = allocator_type()) + json_location_parser(const allocator_type& alloc = allocator_type()) : alloc_(alloc), line_(1), column_(1), end_input_(nullptr), p_(nullptr) { } - normalized_path_parser(std::size_t line, std::size_t column, + json_location_parser(std::size_t line, std::size_t column, const allocator_type& alloc = allocator_type()) : alloc_(alloc), line_(line), column_(column), end_input_(nullptr), @@ -177,12 +181,16 @@ namespace detail { end_input_ = path.data() + path.length(); p_ = path.data(); - normalized_path_state state = normalized_path_state::start; + + selector_separator_kind separator_kind = selector_separator_kind::bracket; + + json_location_state state = json_location_state::start; + while (p_ < end_input_) { switch (state) { - case normalized_path_state::start: + case json_location_state::start: { switch (*p_) { @@ -192,7 +200,7 @@ namespace detail { case '$': case '@': { - state = normalized_path_state::relative_location; + state = json_location_state::relative_location; ++p_; ++column_; break; @@ -205,53 +213,83 @@ namespace detail { } break; } - case normalized_path_state::relative_location: + case json_location_state::relative_location: switch (*p_) { case ' ':case '\t':case '\r':case '\n': advance_past_space_character(); break; case '[': - state = normalized_path_state::bracket_specifier; + separator_kind = selector_separator_kind::bracket; + state = json_location_state::selector; + ++p_; + ++column_; + break; + case '.': + separator_kind = selector_separator_kind::dot; + state = json_location_state::selector; ++p_; ++column_; break; default: - ec = jsonpath_errc::expected_lbracket; + ec = jsonpath_errc::expected_lbracket_or_dot; return path_type(); }; break; - case normalized_path_state::bracket_specifier: + case json_location_state::selector: switch (*p_) { case ' ':case '\t':case '\r':case '\n': advance_past_space_character(); break; case '\'': - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; + ++p_; + ++column_; + break; + case '\"': + state = json_location_state::double_quoted_string; ++p_; ++column_; break; case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': - state = normalized_path_state::digit; + state = json_location_state::digit; break; - default: + case '-': ec = jsonpath_errc::expected_single_quote_or_digit; return path_type(); + default: + if (separator_kind == selector_separator_kind::dot) + { + state = json_location_state::unquoted_string; + } + else + { + ec = jsonpath_errc::expected_single_quote_or_digit; + return path_type(); + } + break; } break; - case normalized_path_state::single_quoted_string: + case json_location_state::single_quoted_string: switch (*p_) { case '\'': elements.emplace_back(buffer); buffer.clear(); - state = normalized_path_state::expect_rbracket; + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } ++p_; ++column_; break; case '\\': - state = normalized_path_state::quoted_string_escape_char; + state = json_location_state::quoted_string_escape_char; ++p_; ++column_; break; @@ -262,14 +300,76 @@ namespace detail { break; }; break; - case normalized_path_state::expect_rbracket: + case json_location_state::double_quoted_string: + switch (*p_) + { + case '\"': + elements.emplace_back(buffer); + buffer.clear(); + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } + ++p_; + ++column_; + break; + case '\\': + state = json_location_state::quoted_string_escape_char; + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case json_location_state::unquoted_string: + switch (*p_) + { + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case 'n':case 'o':case 'p':case 'q':case 'r':case 's':case 't':case 'u':case 'v':case 'w':case 'x':case 'y':case 'z': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case 'N':case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z': + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + case '_': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + case '\\': + state = json_location_state::quoted_string_escape_char; + ++p_; + ++column_; + break; + default: + if (typename std::make_unsigned::type(*p_) > 127) + { + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + elements.emplace_back(buffer); + buffer.clear(); + advance_past_space_character(); + state = json_location_state::relative_location; + } + break; + }; + break; + case json_location_state::expect_rbracket: switch (*p_) { case ' ':case '\t':case '\r':case '\n': advance_past_space_character(); break; case ']': - state = normalized_path_state::relative_location; + state = json_location_state::relative_location; ++p_; ++column_; break; @@ -279,7 +379,7 @@ namespace detail { } break; - case normalized_path_state::digit: + case json_location_state::digit: switch(*p_) { case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': @@ -297,71 +397,78 @@ namespace detail { } elements.emplace_back(n); buffer.clear(); - state = normalized_path_state::expect_rbracket; + if (separator_kind == selector_separator_kind::bracket) + { + state = json_location_state::expect_rbracket; + } + else + { + state = json_location_state::relative_location; + } break; } break; - case normalized_path_state::quoted_string_escape_char: + case json_location_state::quoted_string_escape_char: switch (*p_) { case '\"': buffer.push_back('\"'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case '\'': buffer.push_back('\''); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case '\\': buffer.push_back('\\'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case '/': buffer.push_back('/'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 'b': buffer.push_back('\b'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 'f': buffer.push_back('\f'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 'n': buffer.push_back('\n'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 'r': buffer.push_back('\r'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 't': buffer.push_back('\t'); ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; case 'u': ++p_; ++column_; - state = normalized_path_state::single_quoted_string; + state = json_location_state::single_quoted_string; break; default: ec = jsonpath_errc::illegal_escaped_character; @@ -374,8 +481,22 @@ namespace detail { break; } } - - if (state != normalized_path_state::relative_location) + if (state == json_location_state::unquoted_string) + { + elements.emplace_back(buffer); + } + else if (state == json_location_state::digit) + { + std::size_t n{ 0 }; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_type(alloc_); + } + elements.emplace_back(n); + } + else if (state != json_location_state::relative_location) { ec = jsonpath_errc::unexpected_eof; return path_type(); diff --git a/test/jsonpath/src/normalized_path_parser_tests.cpp b/test/jsonpath/src/json_location_parser_tests.cpp similarity index 56% rename from test/jsonpath/src/normalized_path_parser_tests.cpp rename to test/jsonpath/src/json_location_parser_tests.cpp index 75b6692e44..86e2115039 100644 --- a/test/jsonpath/src/normalized_path_parser_tests.cpp +++ b/test/jsonpath/src/json_location_parser_tests.cpp @@ -2,21 +2,21 @@ // Distributed under Boost license #include -#include +#include #include #include using namespace jsoncons; -TEST_CASE("jsonpath normalized_path_parser tests") +TEST_CASE("jsonpath json_location_parser tests") { SECTION("test 1") { - jsonpath::detail::normalized_path_parser> parser; + jsonpath::detail::json_location_parser> parser; std::error_code ec; - std::vector location = parser.parse("$['foo'][3]['bar']", ec); - CHECK_FALSE(ec); + std::vector location = parser.parse(R"($['foo'][3]["bar"])", ec); + REQUIRE_FALSE(ec); CHECK(location.size() == 3); CHECK(location[0].has_name()); @@ -26,9 +26,25 @@ TEST_CASE("jsonpath normalized_path_parser tests") CHECK(location[2].has_name()); CHECK(location[2].name() == "bar"); } - SECTION("test 2") + SECTION("test dot") { - jsonpath::detail::normalized_path_parser> parser; + jsonpath::detail::json_location_parser> parser; + + std::error_code ec; + std::vector location = parser.parse(R"($.'foo'.3.bar)", ec); + REQUIRE_FALSE(ec); + + CHECK(location.size() == 3); + CHECK(location[0].has_name()); + CHECK(location[0].name() == "foo"); + CHECK(location[1].has_index()); + CHECK(location[1].index() == 3); + CHECK(location[2].has_name()); + CHECK(location[2].name() == "bar"); + } + SECTION("test errors") + { + jsonpath::detail::json_location_parser> parser; std::error_code ec; @@ -41,9 +57,6 @@ TEST_CASE("jsonpath normalized_path_parser tests") parser.parse("$['foo'][3a]['bar']", ec); CHECK(ec.value() == (int)jsonpath::jsonpath_errc::expected_rbracket); - parser.parse("$.foo", ec); - CHECK(ec.value() == (int)jsonpath::jsonpath_errc::expected_lbracket); - parser.parse("$['foo'][3]['bar'", ec); CHECK(ec.value() == (int)jsonpath::jsonpath_errc::unexpected_eof); } diff --git a/test/jsonpath/src/jsonpath_compile_jsonpath_tests.cpp b/test/jsonpath/src/jsonpath_compile_jsonpath_tests.cpp new file mode 100644 index 0000000000..1d84cb2588 --- /dev/null +++ b/test/jsonpath/src/jsonpath_compile_jsonpath_tests.cpp @@ -0,0 +1,111 @@ +// Copyright 2013-2023 Daniel Parker +// Distributed under Boost license + +#if defined(_MSC_VER) +#include "windows.h" // test no inadvertant macro expansions +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // std::unordered_set +#include + +using namespace jsoncons; + +TEST_CASE("jsonpath compile_jsonpath tests") +{ + std::string input = R"( + { + "books": + [ + { + "category": "fiction", + "title" : "A Wild Sheep Chase", + "author" : "Haruki Murakami", + "price" : 22.72 + }, + { + "category": "fiction", + "title" : "The Night Watch", + "author" : "Sergei Lukyanenko", + "price" : 23.58 + }, + { + "category": "fiction", + "title" : "The Comedians", + "author" : "Graham Greene", + "price" : 21.99 + }, + { + "category": "memoir", + "title" : "The Night Watch", + "author" : "Phillips, David Atlee" + } + ] + } + )"; + + SECTION("test 1") + { + int count = 0; + + const json doc = json::parse(input); + + auto expr = jsoncons::jsonpath::compile_jsonpath("$.books[*]"); + + auto callback = [&](const jsonpath::path_node& /*location*/, const json& book) + { + if (book.at("category") == "memoir" && !book.contains("price")) + { + ++count; + } + }; + + expr.select(doc, callback); + + CHECK(count == 1); + CHECK_FALSE(doc["books"][3].contains("price")); + } + + SECTION("test 2") + { + int count = 0; + + json doc = json::parse(input); + + auto expr = jsoncons::jsonpath::compile_jsonpath("$.books[*]"); + + auto callback1 = [&](const jsonpath::path_node& /*location*/, const json& book) + { + if (book.at("category") == "memoir" && !book.contains("price")) + { + ++count; + } + }; + + auto callback2 = [](const jsonpath::path_node& /*location*/, json& book) + { + if (book.at("category") == "memoir" && !book.contains("price")) + { + book.try_emplace("price", 140.0); + } + }; + + expr.select(doc, callback1); + + CHECK(count == 1); + + CHECK_FALSE(doc["books"][3].contains("price")); + expr.update(doc, callback2); + CHECK(doc["books"][3].contains("price")); + CHECK(doc["books"][3].at("price") == 140); + } +} + diff --git a/test/jsonpath/src/jsonpath_make_expression_tests.cpp b/test/jsonpath/src/jsonpath_make_expression_tests.cpp index dd6fd39dfc..1e8048988b 100644 --- a/test/jsonpath/src/jsonpath_make_expression_tests.cpp +++ b/test/jsonpath/src/jsonpath_make_expression_tests.cpp @@ -58,9 +58,9 @@ TEST_CASE("jsonpath make_expression test") const json doc = json::parse(input); - auto expr = jsoncons::jsonpath::compile_jsonpath("$.books[*]"); + auto expr = jsoncons::jsonpath::make_expression("$.books[*]"); - auto callback = [&](const jsonpath::path_node& /*location*/, const json& book) + auto callback = [&](const std::string& /*location*/, const json& book) { if (book.at("category") == "memoir" && !book.contains("price")) { @@ -68,44 +68,10 @@ TEST_CASE("jsonpath make_expression test") } }; - expr.select(doc, callback); + expr.evaluate(doc, callback); CHECK(count == 1); CHECK_FALSE(doc["books"][3].contains("price")); } - - SECTION("test 2") - { - int count = 0; - - json doc = json::parse(input); - - auto expr = jsoncons::jsonpath::compile_jsonpath("$.books[*]"); - - auto callback1 = [&](const jsonpath::path_node& /*location*/, const json& book) - { - if (book.at("category") == "memoir" && !book.contains("price")) - { - ++count; - } - }; - - auto callback2 = [](const jsonpath::path_node& /*location*/, json& book) - { - if (book.at("category") == "memoir" && !book.contains("price")) - { - book.try_emplace("price", 140.0); - } - }; - - expr.select(doc, callback1); - - CHECK(count == 1); - - CHECK_FALSE(doc["books"][3].contains("price")); - expr.update(doc, callback2); - CHECK(doc["books"][3].contains("price")); - CHECK(doc["books"][3].at("price") == 140); - } }