From 013939954140bfbe66c01d9645b942cc50c6855c Mon Sep 17 00:00:00 2001 From: Joao Paulo Magalhaes Date: Thu, 6 Jun 2024 14:37:26 +0100 Subject: [PATCH] ensure empty values are not deserialized re #434 --- changelog/current.md | 2 +- ext/c4core | 2 +- samples/quickstart.cpp | 2 +- src/c4/yml/common.hpp | 2 +- src/c4/yml/node.hpp | 82 ++++++++++++++++++++++++++++++---- test/test_serialize.cpp | 99 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 177 insertions(+), 12 deletions(-) diff --git a/changelog/current.md b/changelog/current.md index ba80c6ac..59919749 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -5,7 +5,7 @@ Most of the changes are from the giant Parser refactor described below. Before g - [#PR431](https://github.com/biojppm/rapidyaml/pull/431) - Emitter: prevent stack overflows when emitting malicious trees by providing a max tree depth for the emit visitor. This was done by adding an `EmitOptions` structure as an argument both to the emitter and to the emit functions, which is then forwarded to the emitter. This `EmitOptions` structure has a max tree depth setting with a default value of 64. - [#PR431](https://github.com/biojppm/rapidyaml/pull/431) - Fix `_RYML_CB_ALLOC()` using `(T)` in parenthesis, making the macro unusable. - +- [#434] - Ensure empty vals are not deserialized ([#PR436](https://github.com/biojppm/rapidyaml/pull/436)). ### New features diff --git a/ext/c4core b/ext/c4core index 5aa9fb58..6da88307 160000 --- a/ext/c4core +++ b/ext/c4core @@ -1 +1 @@ -Subproject commit 5aa9fb586851dd12c013ef0c3d69d9f98b0a52d1 +Subproject commit 6da883076e27ae36855cc512589a549797d5f6c0 diff --git a/samples/quickstart.cpp b/samples/quickstart.cpp index 71c38222..b306871c 100644 --- a/samples/quickstart.cpp +++ b/samples/quickstart.cpp @@ -3221,7 +3221,7 @@ QWxsIHRoYXQgZ2xpdHRlcnMgaXMgbm90IGdvbGQu: All that glitters is not gold. result.resize(len); // trim to the length of the decoded buffer CHECK(result == c.text); // - // Note also these are just syntatic wrappers to simplify client code. + // Note also these are just syntactic wrappers to simplify client code. // You can call into the lower level functions without much effort: result.clear(); // this is not needed. We do it just to show that the first call can fail. ryml::csubstr encoded = tree[c.base64].key(); diff --git a/src/c4/yml/common.hpp b/src/c4/yml/common.hpp index 6b574710..79bc9520 100644 --- a/src/c4/yml/common.hpp +++ b/src/c4/yml/common.hpp @@ -34,7 +34,7 @@ #ifndef RYML_LOGBUF_SIZE_MAX /// size for the fallback larger log buffer. When @ref -/// RYML_LOGBUG_SIZE is not large enough to convert a value to string, +/// RYML_LOGBUF_SIZE is not large enough to convert a value to string, /// then temporary stack memory is allocated up to /// RYML_LOGBUF_SIZE_MAX. This limit is in place to prevent a stack /// overflow. If the printed value requires more than diff --git a/src/c4/yml/node.hpp b/src/c4/yml/node.hpp index 8ff63943..ccda50c3 100644 --- a/src/c4/yml/node.hpp +++ b/src/c4/yml/node.hpp @@ -603,7 +603,7 @@ struct RoNodeMethods _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap)); _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_)); - id_type ch = tree_->child(id_, pos); + const id_type ch = tree_->child(id_, pos); _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } @@ -615,7 +615,8 @@ struct RoNodeMethods /** @name deserialization */ /** @{ */ - /** deserialize the node's val to the given variable */ + /** deserialize the node's val to the given variable, forwarding + * to the user-overrideable @ref read() function. */ template ConstImpl const& operator>> (T &v) const { @@ -625,13 +626,14 @@ struct RoNodeMethods return *((ConstImpl const*)this); } - /** deserialize the node's key to the given variable; use @ref key() + /** deserialize the node's key to the given variable, forwarding + * to the user-overrideable @ref read() function; use @ref key() * to disambiguate; for example: `node >> ryml::key(var)` */ template ConstImpl const& operator>> (Key v) const { _C4RR(); - if( ! from_chars(key(), &v.k)) + if(key().empty() || ! from_chars(key(), &v.k)) _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); return *((ConstImpl const*)this); } @@ -1602,30 +1604,94 @@ inline void write(NodeRef *n, T const& v) n->set_val_serialized(v); } +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ template typename std::enable_if< ! std::is_floating_point::value, bool>::type inline read(NodeRef const& n, T *v) { - return from_chars(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars(val, v); } +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ template typename std::enable_if< ! std::is_floating_point::value, bool>::type inline read(ConstNodeRef const& n, T *v) { - return from_chars(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars(val, v); } +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(NodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(ConstNodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } /** @} */ diff --git a/test/test_serialize.cpp b/test/test_serialize.cpp index 5cb3b936..78869835 100644 --- a/test/test_serialize.cpp +++ b/test/test_serialize.cpp @@ -517,6 +517,105 @@ TEST(serialize, create_anchor_ref_trip) EXPECT_EQ(emitrs_yaml(tree), expected_yaml); } +TEST(deserialize, issue434_0) +{ + Tree tree = parse_in_arena("{empty: }"); + { + int value; + EXPECT_FALSE(read(tree["empty"], &value)); + } + ExpectError::do_check(&tree, [&]{ + int value = 0; + tree["empty"] >> value; + }); + { + double value; + EXPECT_FALSE(read(tree["empty"], &value)); + } + ExpectError::do_check(&tree, [&]{ + double value = 0; + tree["empty"] >> value; + }); +} + +TEST(deserialize, issue434_1) +{ + Tree tree = parse_in_arena("{val: 0.r3}"); + ASSERT_EQ(tree["val"].val(), "0.r3"); + EXPECT_EQ(tree["val"].val().first_int_span(), ""); + EXPECT_EQ(tree["val"].val().first_real_span(), ""); + { + int value; + EXPECT_FALSE(read(tree["val"], &value)); + } + ExpectError::do_check(&tree, [&]{ + int value = 1; + tree["val"] >> value; + }); + { + float value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 0); + } + { + double value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 0); + } +} + +TEST(deserialize, issue434_2) +{ + Tree tree = parse_in_arena("{val: 34gf3}"); + ASSERT_EQ(tree["val"].val(), "34gf3"); + EXPECT_EQ(tree["val"].val().first_int_span(), ""); + EXPECT_EQ(tree["val"].val().first_real_span(), ""); + { + int value; + EXPECT_FALSE(read(tree["val"], &value)); + } + ExpectError::do_check(&tree, [&]{ + int value = 1; + tree["val"] >> value; + }); + { + float value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 34); + } + { + double value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 34); + } +} + +TEST(deserialize, issue434_3) +{ + Tree tree = parse_in_arena("{val: 34 gf3}"); + ASSERT_EQ(tree["val"].val(), "34 gf3"); + EXPECT_EQ(tree["val"].val().first_int_span(), "34"); + EXPECT_EQ(tree["val"].val().first_real_span(), "34"); + { + int value; + EXPECT_FALSE(read(tree["val"], &value)); + } + ExpectError::do_check(&tree, [&]{ + int value = 1; + tree["val"] >> value; + }); + { + float value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 34); + } + { + double value = 1; + tree["val"] >> value; + EXPECT_EQ(value, 34); + } +} + //------------------------------------------- // this is needed to use the test case library