Skip to content

Commit

Permalink
ensure empty values are not deserialized
Browse files Browse the repository at this point in the history
re #434
  • Loading branch information
biojppm committed Jun 7, 2024
1 parent e432c2f commit 0139399
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 12 deletions.
2 changes: 1 addition & 1 deletion changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Most of the changes are from the giant Parser refactor described below. Before g

- [#PR431](https://github.com/biojppm/rapidyaml/pull/431) - Emitter: prevent stack overflows when emitting malicious trees by providing a max tree depth for the emit visitor. This was done by adding an `EmitOptions` structure as an argument both to the emitter and to the emit functions, which is then forwarded to the emitter. This `EmitOptions` structure has a max tree depth setting with a default value of 64.
- [#PR431](https://github.com/biojppm/rapidyaml/pull/431) - Fix `_RYML_CB_ALLOC()` using `(T)` in parenthesis, making the macro unusable.

- [#434] - Ensure empty vals are not deserialized ([#PR436](https://github.com/biojppm/rapidyaml/pull/436)).

### New features

Expand Down
2 changes: 1 addition & 1 deletion ext/c4core
2 changes: 1 addition & 1 deletion samples/quickstart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3221,7 +3221,7 @@ QWxsIHRoYXQgZ2xpdHRlcnMgaXMgbm90IGdvbGQu: All that glitters is not gold.
result.resize(len); // trim to the length of the decoded buffer
CHECK(result == c.text);
//
// Note also these are just syntatic wrappers to simplify client code.
// Note also these are just syntactic wrappers to simplify client code.
// You can call into the lower level functions without much effort:
result.clear(); // this is not needed. We do it just to show that the first call can fail.
ryml::csubstr encoded = tree[c.base64].key();
Expand Down
2 changes: 1 addition & 1 deletion src/c4/yml/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

#ifndef RYML_LOGBUF_SIZE_MAX
/// size for the fallback larger log buffer. When @ref
/// RYML_LOGBUG_SIZE is not large enough to convert a value to string,
/// RYML_LOGBUF_SIZE is not large enough to convert a value to string,
/// then temporary stack memory is allocated up to
/// RYML_LOGBUF_SIZE_MAX. This limit is in place to prevent a stack
/// overflow. If the printed value requires more than
Expand Down
82 changes: 74 additions & 8 deletions src/c4/yml/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ struct RoNodeMethods
_RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap));
_RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable());
_RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_));
id_type ch = tree_->child(id_, pos);
const id_type ch = tree_->child(id_, pos);
_RYML_CB_CHECK(tree_->m_callbacks, ch != NONE);
return {tree_, ch};
}
Expand All @@ -615,7 +615,8 @@ struct RoNodeMethods
/** @name deserialization */
/** @{ */

/** deserialize the node's val to the given variable */
/** deserialize the node's val to the given variable, forwarding
* to the user-overrideable @ref read() function. */
template<class T>
ConstImpl const& operator>> (T &v) const
{
Expand All @@ -625,13 +626,14 @@ struct RoNodeMethods
return *((ConstImpl const*)this);
}

/** deserialize the node's key to the given variable; use @ref key()
/** deserialize the node's key to the given variable, forwarding
* to the user-overrideable @ref read() function; use @ref key()
* to disambiguate; for example: `node >> ryml::key(var)` */
template<class T>
ConstImpl const& operator>> (Key<T> v) const
{
_C4RR();
if( ! from_chars(key(), &v.k))
if(key().empty() || ! from_chars(key(), &v.k))
_RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key");
return *((ConstImpl const*)this);
}
Expand Down Expand Up @@ -1602,30 +1604,94 @@ inline void write(NodeRef *n, T const& v)
n->set_val_serialized(v);
}

/** convert the val of a scalar node to a particular type, by
* forwarding its val to @ref from_chars<T>(). The full string is
* used.
* @return false if the conversion failed */
template<class T>
typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
inline read(NodeRef const& n, T *v)
{
return from_chars(n.val(), v);
csubstr val = n.val();
if(val.empty())
return false;
return from_chars(val, v);
}
/** convert the val of a scalar node to a particular type, by
* forwarding its val to @ref from_chars<T>(). The full string is
* used.
* @return false if the conversion failed */
template<class T>
typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
inline read(ConstNodeRef const& n, T *v)
{
return from_chars(n.val(), v);
csubstr val = n.val();
if(val.empty())
return false;
return from_chars(val, v);
}

/** convert the val of a scalar node to a floating point type, by
* forwarding its val to @ref from_chars_float<T>().
*
* @return false if the conversion failed
*
* @warning Unlike non-floating types, only the leading part of the
* string that may constitute a number is processed. This happens
* because the float parsing is delegated to fast_float, which is
* implemented that way. Consequently, for example, all of `"34"`,
* `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
* about the contents of the data, you can use
* csubstr::first_real_span() to check before calling `>>`, for
* example like this:
*
* ```cpp
* csubstr val = node.val();
* if(val.first_real_span() == val)
* node >> v;
* else
* ERROR("not a real")
* ```
*/
template<class T>
typename std::enable_if<std::is_floating_point<T>::value, bool>::type
inline read(NodeRef const& n, T *v)
{
return from_chars_float(n.val(), v);
csubstr val = n.val();
if(val.empty())
return false;
return from_chars_float(val, v);

Check warning on line 1663 in src/c4/yml/node.hpp

View check run for this annotation

Codecov / codecov/patch

src/c4/yml/node.hpp#L1663

Added line #L1663 was not covered by tests
}
/** convert the val of a scalar node to a floating point type, by
* forwarding its val to @ref from_chars_float<T>().
*
* @return false if the conversion failed
*
* @warning Unlike non-floating types, only the leading part of the
* string that may constitute a number is processed. This happens
* because the float parsing is delegated to fast_float, which is
* implemented that way. Consequently, for example, all of `"34"`,
* `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
* about the contents of the data, you can use
* csubstr::first_real_span() to check before calling `>>`, for
* example like this:
*
* ```cpp
* csubstr val = node.val();
* if(val.first_real_span() == val)
* node >> v;
* else
* ERROR("not a real")
* ```
*/
template<class T>
typename std::enable_if<std::is_floating_point<T>::value, bool>::type
inline read(ConstNodeRef const& n, T *v)
{
return from_chars_float(n.val(), v);
csubstr val = n.val();
if(val.empty())
return false;
return from_chars_float(val, v);
}

/** @} */
Expand Down
99 changes: 99 additions & 0 deletions test/test_serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,105 @@ TEST(serialize, create_anchor_ref_trip)
EXPECT_EQ(emitrs_yaml<std::string>(tree), expected_yaml);
}

TEST(deserialize, issue434_0)
{
Tree tree = parse_in_arena("{empty: }");
{
int value;
EXPECT_FALSE(read(tree["empty"], &value));
}
ExpectError::do_check(&tree, [&]{
int value = 0;
tree["empty"] >> value;
});
{
double value;
EXPECT_FALSE(read(tree["empty"], &value));
}
ExpectError::do_check(&tree, [&]{
double value = 0;
tree["empty"] >> value;
});
}

TEST(deserialize, issue434_1)
{
Tree tree = parse_in_arena("{val: 0.r3}");
ASSERT_EQ(tree["val"].val(), "0.r3");
EXPECT_EQ(tree["val"].val().first_int_span(), "");
EXPECT_EQ(tree["val"].val().first_real_span(), "");
{
int value;
EXPECT_FALSE(read(tree["val"], &value));
}
ExpectError::do_check(&tree, [&]{
int value = 1;
tree["val"] >> value;
});
{
float value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 0);
}
{
double value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 0);
}
}

TEST(deserialize, issue434_2)
{
Tree tree = parse_in_arena("{val: 34gf3}");
ASSERT_EQ(tree["val"].val(), "34gf3");
EXPECT_EQ(tree["val"].val().first_int_span(), "");
EXPECT_EQ(tree["val"].val().first_real_span(), "");
{
int value;
EXPECT_FALSE(read(tree["val"], &value));
}
ExpectError::do_check(&tree, [&]{
int value = 1;
tree["val"] >> value;
});
{
float value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 34);
}
{
double value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 34);
}
}

TEST(deserialize, issue434_3)
{
Tree tree = parse_in_arena("{val: 34 gf3}");
ASSERT_EQ(tree["val"].val(), "34 gf3");
EXPECT_EQ(tree["val"].val().first_int_span(), "34");
EXPECT_EQ(tree["val"].val().first_real_span(), "34");
{
int value;
EXPECT_FALSE(read(tree["val"], &value));
}
ExpectError::do_check(&tree, [&]{
int value = 1;
tree["val"] >> value;
});
{
float value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 34);
}
{
double value = 1;
tree["val"] >> value;
EXPECT_EQ(value, 34);
}
}


//-------------------------------------------
// this is needed to use the test case library
Expand Down

0 comments on commit 0139399

Please sign in to comment.