Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Post newparser #432

Merged
merged 4 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bm/bm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ int main(int argc, char** argv)

ryml::id_type estimate_capacity(ryml::csubstr src)
{
return (3 * ryml::Parser::estimate_tree_capacity(src)) >> 1;
return (3 * ryml::estimate_tree_capacity(src)) >> 1;
}


Expand Down
4 changes: 4 additions & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ Most of the changes are from the giant Parser refactor described below. Before g
NodeRef::depth_asc() const;
NodeRef::depth_desc() const;
```
- [#PR432](https://github.com/biojppm/rapidyaml/pull/432) - Added a function to estimate the required tree capacity, based on yaml markup:
```cpp
size_t estimate_tree_capacity(csubstr); // estimate number of nodes resulting from yaml
```


------
Expand Down
46 changes: 23 additions & 23 deletions samples/quickstart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,8 @@ john: doe)";
// The lower level index API is based on the indices of nodes,
// where the node's id is the node's position in the tree's data
// array. This API is very efficient, but somewhat difficult to use:
size_t root_id = tree.root_id();
size_t bar_id = tree.find_child(root_id, "bar"); // need to get the index right
ryml::id_type root_id = tree.root_id();
ryml::id_type bar_id = tree.find_child(root_id, "bar"); // need to get the index right
CHECK(tree.is_map(root_id)); // all of the index methods are in the tree
CHECK(tree.is_seq(bar_id)); // ... and receive the subject index

Expand Down Expand Up @@ -426,14 +426,14 @@ john: doe)";

// IMPORTANT. The ryml tree uses an index-based linked list for
// storing children, so the complexity of
// `Tree::operator[csubstr]` and `Tree::operator[size_t]` is O(n),
// `Tree::operator[csubstr]` and `Tree::operator[id_type]` is O(n),
// linear on the number of root children. If you use
// `Tree::operator[]` with a large tree where the root has many
// children, you will see a performance hit.
//
// To avoid this hit, you can create your own accelerator
// structure. For example, before doing a lookup, do a single
// traverse at the root level to fill an `map<csubstr,size_t>`
// traverse at the root level to fill an `map<csubstr,id_type>`
// mapping key names to node indices; with a node index, a lookup
// (via `Tree::get()`) is O(1), so this way you can get O(log n)
// lookup from a key. (But please do not use `std::map` if you
Expand Down Expand Up @@ -479,29 +479,29 @@ john: doe)";
ryml::csubstr expected_keys[] = {"foo", "bar", "john"};
// iterate children using the high-level node API:
{
size_t count = 0;
ryml::id_type count = 0;
for(ryml::ConstNodeRef const& child : root.children())
CHECK(child.key() == expected_keys[count++]);
}
// iterate siblings using the high-level node API:
{
size_t count = 0;
ryml::id_type count = 0;
for(ryml::ConstNodeRef const& child : root["foo"].siblings())
CHECK(child.key() == expected_keys[count++]);
}
// iterate children using the lower-level tree index API:
{
size_t count = 0;
for(size_t child_id = tree.first_child(root_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
ryml::id_type count = 0;
for(ryml::id_type child_id = tree.first_child(root_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
CHECK(tree.key(child_id) == expected_keys[count++]);
}
// iterate siblings using the lower-level tree index API:
// (notice the only difference from above is in the loop
// preamble, which calls tree.first_sibling(bar_id) instead of
// tree.first_child(root_id))
{
size_t count = 0;
for(size_t child_id = tree.first_sibling(bar_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
ryml::id_type count = 0;
for(ryml::id_type child_id = tree.first_sibling(bar_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
CHECK(tree.key(child_id) == expected_keys[count++]);
}
}
Expand Down Expand Up @@ -3629,7 +3629,7 @@ void write(ryml::NodeRef *n, my_type const& val)
template<class T>
bool read(ryml::ConstNodeRef const& n, my_seq_type<T> *seq)
{
seq->seq_member.resize(n.num_children()); // num_children() is O(N)
seq->seq_member.resize(static_cast<size_t>(n.num_children())); // num_children() is O(N)
size_t pos = 0;
for(auto const ch : n.children())
ch >> seq->seq_member[pos++];
Expand Down Expand Up @@ -3813,7 +3813,7 @@ void sample_float_precision()
CHECK(output.size() == reference.size());
for(size_t i = 0; i < reference.size(); ++i)
{
CHECK(get_num_digits(tree[i].val()) == num_digits_original);
CHECK(get_num_digits(tree[(ryml::id_type)i].val()) == num_digits_original);
CHECK(fabs(output[i] - reference[i]) < precision_safe);
}
}
Expand Down Expand Up @@ -4577,12 +4577,12 @@ d: 3
CHECK(tree.docref(1).id() == stream.child(1).id());
CHECK(tree.docref(2).id() == stream.child(2).id());
// equivalent: using the lower level index API
const size_t stream_id = tree.root_id();
const ryml::id_type stream_id = tree.root_id();
CHECK(tree.is_root(stream_id));
CHECK(tree.is_stream(stream_id));
CHECK(!tree.is_doc(stream_id));
CHECK(tree.num_children(stream_id) == 3);
for(size_t doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(stream_id))
for(ryml::id_type doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(stream_id))
CHECK(tree.is_doc(doc_id));
CHECK(tree.doc(0) == tree.child(stream_id, 0));
CHECK(tree.doc(1) == tree.child(stream_id, 1));
Expand All @@ -4594,7 +4594,7 @@ d: 3
CHECK(stream[0]["a"].val() == "0");
CHECK(stream[0]["b"].val() == "1");
// equivalent: using the index API
const size_t doc0_id = tree.first_child(stream_id);
const ryml::id_type doc0_id = tree.first_child(stream_id);
CHECK(tree.is_doc(doc0_id));
CHECK(tree.is_map(doc0_id));
CHECK(tree.val(tree.find_child(doc0_id, "a")) == "0");
Expand All @@ -4606,7 +4606,7 @@ d: 3
CHECK(stream[1]["c"].val() == "2");
CHECK(stream[1]["d"].val() == "3");
// equivalent: using the index API
const size_t doc1_id = tree.next_sibling(doc0_id);
const ryml::id_type doc1_id = tree.next_sibling(doc0_id);
CHECK(tree.is_doc(doc1_id));
CHECK(tree.is_map(doc1_id));
CHECK(tree.val(tree.find_child(doc1_id, "c")) == "2");
Expand All @@ -4620,7 +4620,7 @@ d: 3
CHECK(stream[2][2].val() == "6");
CHECK(stream[2][3].val() == "7");
// equivalent: using the index API
const size_t doc2_id = tree.next_sibling(doc1_id);
const ryml::id_type doc2_id = tree.next_sibling(doc1_id);
CHECK(tree.is_doc(doc2_id));
CHECK(tree.is_seq(doc2_id));
CHECK(tree.val(tree.child(doc2_id, 0)) == "4");
Expand All @@ -4644,18 +4644,18 @@ d: 3
};
// using the node API
{
size_t count = 0;
ryml::id_type count = 0;
const ryml::ConstNodeRef stream = tree.rootref();
CHECK(stream.num_children() == C4_COUNTOF(expected_json));
CHECK(stream.num_children() == (ryml::id_type)C4_COUNTOF(expected_json));
for(ryml::ConstNodeRef doc : stream.children())
CHECK(ryml::emitrs_json<std::string>(doc) == expected_json[count++]);
}
// equivalent: using the index API
{
size_t count = 0;
const size_t stream_id = tree.root_id();
CHECK(tree.num_children(stream_id) == C4_COUNTOF(expected_json));
for(size_t doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(doc_id))
ryml::id_type count = 0;
const ryml::id_type stream_id = tree.root_id();
CHECK(tree.num_children(stream_id) == (ryml::id_type)C4_COUNTOF(expected_json));
for(ryml::id_type doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(doc_id))
CHECK(ryml::emitrs_json<std::string>(tree, doc_id) == expected_json[count++]);
}
}
Expand Down
36 changes: 18 additions & 18 deletions src/c4/yml/emit.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void Emitter<Writer>::_emit_yaml(id_type id)
break;
++end;
}
const size_t parent = m_tree->parent(next_node);
const id_type parent = m_tree->parent(next_node);
for( ; tagds.b != end; ++tagds.b)
{
if(next_node != m_tree->first_child(parent))
Expand Down Expand Up @@ -199,7 +199,7 @@ void Emitter<Writer>::_write_doc(id_type id)
}
else // docval
{
RYML_ASSERT(m_tree->has_val(id));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id));
// some plain scalars such as '...' and '---' must not
// appear at 0-indentation
const csubstr val = m_tree->val(id);
Expand Down Expand Up @@ -245,9 +245,9 @@ void Emitter<Writer>::_do_visit_flow_sl(id_type node, id_type depth, id_type ile
{
const bool prev_flow = m_flow;
m_flow = true;
RYML_ASSERT(!m_tree->is_stream(node));
RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
_RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
if(C4_UNLIKELY(depth > m_opts.max_depth()))
_RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");

Expand All @@ -273,7 +273,7 @@ void Emitter<Writer>::_do_visit_flow_sl(id_type node, id_type depth, id_type ile
}
else if(m_tree->is_container(node))
{
RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));

bool spc = false; // write a space

Expand Down Expand Up @@ -451,9 +451,9 @@ void Emitter<Writer>::_do_visit_block_container(id_type node, id_type depth, id_
template<class Writer>
void Emitter<Writer>::_do_visit_block(id_type node, id_type depth, id_type ilevel, id_type do_indent)
{
RYML_ASSERT(!m_tree->is_stream(node));
RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
_RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
if(C4_UNLIKELY(depth > m_opts.max_depth()))
_RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
if(m_tree->is_doc(node))
Expand All @@ -464,7 +464,7 @@ void Emitter<Writer>::_do_visit_block(id_type node, id_type depth, id_type ileve
}
else if(m_tree->is_container(node))
{
RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));
bool spc = false; // write a space
bool nl = false; // write a newline
if(m_tree->has_key(node))
Expand Down Expand Up @@ -672,9 +672,9 @@ size_t Emitter<Writer>::_write_escaped_newlines(csubstr s, size_t i)
this->Writer::_do_write('\n'); // write the newline again
++i; // increase the outer loop counter!
} while(i < s.len && s.str[i] == '\n');
RYML_ASSERT(i > 0);
_RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
--i;
RYML_ASSERT(s.str[i] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == '\n');
return i;
}

Expand All @@ -690,10 +690,10 @@ template<class Writer>
size_t Emitter<Writer>::_write_indented_block(csubstr s, size_t i, id_type ilevel)
{
//_c4dbgpf("indblock@i={} rem=[{}]~~~\n{}~~~", i, s.sub(i).len, s.sub(i));
RYML_ASSERT(i > 0);
RYML_ASSERT(s.str[i-1] == '\n');
RYML_ASSERT(i < s.len);
RYML_ASSERT(s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i-1] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), i < s.len);
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n');
again:
size_t pos = s.find("\n ", i);
if(pos == npos)
Expand Down Expand Up @@ -725,7 +725,7 @@ size_t Emitter<Writer>::_write_indented_block(csubstr s, size_t i, id_type ileve
template<class Writer>
void Emitter<Writer>::_write_scalar_literal(csubstr s, id_type ilevel, bool explicit_key)
{
RYML_ASSERT(s.find("\r") == csubstr::npos);
_RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
if(explicit_key)
this->Writer::_do_write("? ");
csubstr trimmed = s.trimr('\n');
Expand Down Expand Up @@ -773,7 +773,7 @@ void Emitter<Writer>::_write_scalar_folded(csubstr s, id_type ilevel, bool expli
{
if(explicit_key)
this->Writer::_do_write("? ");
RYML_ASSERT(s.find("\r") == csubstr::npos);
_RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
csubstr trimmed = s.trimr('\n');
const size_t numnewlines_at_end = s.len - trimmed.len;
const bool is_newline_only = (trimmed.len == 0 && (s.len > 0));
Expand Down
Loading
Loading