Skip to content

Commit

Permalink
wip [ci skip]
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Apr 16, 2024
1 parent 95e3436 commit 19a4d79
Show file tree
Hide file tree
Showing 7 changed files with 332 additions and 175 deletions.
3 changes: 2 additions & 1 deletion samples/quickstart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,8 @@ I am something: indeed
// Getting the location of nodes in the source:
//
// Location tracking is opt-in:
ryml::Parser parser(ryml::ParserOptions().locations(true));
ryml::EventHandlerTree evt_handler(ryml::ParserOptions().locations(true));
ryml::Parser parser(&evt_handler);
// Now the parser will start by building the accelerator structure:
ryml::Tree tree2;
parser.parse_in_arena("expected.yml", expected_result, &tree2);
Expand Down
80 changes: 40 additions & 40 deletions src/c4/yml/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,46 +368,6 @@ struct RYML_EXPORT Callbacks
/** @} */


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

typedef enum {
BLOCK_LITERAL, //!< keep newlines (|)
BLOCK_FOLD //!< replace newline with single space (>)
} BlockStyle_e;

typedef enum {
CHOMP_CLIP, //!< single newline at end (default)
CHOMP_STRIP, //!< no newline at end (-)
CHOMP_KEEP //!< all newlines from end (+)
} BlockChomp_e;


/** Abstracts the fact that a filter result may not fit in the intended memory. */
struct FilterResult
{
C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
C4_ALWAYS_INLINE size_t required_len() const noexcept { return str.len; }
C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
csubstr str;
};
/** Abstracts the fact that a filter result may not fit in the intended memory. */
struct FilterResultExtending
{
C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
C4_ALWAYS_INLINE size_t required_len() const noexcept { return reqlen; }
C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
csubstr str;
size_t reqlen;
};


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -469,6 +429,46 @@ do \
} while(0)



//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

typedef enum {
BLOCK_LITERAL, //!< keep newlines (|)
BLOCK_FOLD //!< replace newline with single space (>)
} BlockStyle_e;

typedef enum {
CHOMP_CLIP, //!< single newline at end (default)
CHOMP_STRIP, //!< no newline at end (-)
CHOMP_KEEP //!< all newlines from end (+)
} BlockChomp_e;


/** Abstracts the fact that a filter result may not fit in the intended memory. */
struct FilterResult
{
C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
C4_ALWAYS_INLINE size_t required_len() const noexcept { return str.len; }
C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
csubstr str;
};
/** Abstracts the fact that a filter result may not fit in the intended memory. */
struct FilterResultExtending
{
C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
C4_ALWAYS_INLINE size_t required_len() const noexcept { return reqlen; }
C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
csubstr str;
size_t reqlen;
};


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

namespace detail {
template<int8_t signedval, uint8_t unsignedval>
struct _charconstant_t
Expand Down
145 changes: 39 additions & 106 deletions src/c4/yml/event_handler_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@
namespace c4 {
namespace yml {

/** @addtogroup doc_event_handlers
* @{ */

/** See the documentation for @ref doc_event_handlers, which has
* important notes about the event model used by rapidyaml. */
struct EventHandlerTree
{
static constexpr const bool is_events = false; // remove
static constexpr const bool is_wtree = true;

/** @name types
* @{ */

// our internal state must inherit from parser state
struct HandlerState : public ParserState
Expand All @@ -42,8 +47,14 @@ struct EventHandlerTree

using state = HandlerState;

/** @} */

public:

/** @cond dev */
static constexpr const bool is_events = false; // remove
static constexpr const bool is_wtree = true;

detail::stack<state> m_stack;
state *C4_RESTRICT m_curr;
state *C4_RESTRICT m_parent;
Expand All @@ -59,9 +70,13 @@ struct EventHandlerTree
#define _disable_(bits) _disable__<bits>()
#endif
#define _has_any_(bits) _has_any__<bits>()
/** @endcond */

public:

/** @name construction and resetting
* @{ */

EventHandlerTree() : m_stack(), m_curr(), m_parent(), m_tree(), m_id(NONE) {}
EventHandlerTree(Callbacks const& cb) : m_stack(cb), m_curr(), m_parent(), m_tree(), m_id(NONE) {}
EventHandlerTree(Tree *tree, id_type id) : m_stack(tree->callbacks()), m_curr(), m_parent(), m_tree(tree), m_id(id)
Expand Down Expand Up @@ -98,14 +113,21 @@ struct EventHandlerTree
}
}

/** @} */

public:

/** @name parse events
* @{ */

void start_parse(const char* filename)
{
m_curr->start_parse(filename, m_curr->tr_id);
}

void finish_parse()
{
/** This pointer is temporary. Remember that:
/* This pointer is temporary. Remember that:
*
* - this handler object may be held by the user
* - it may be used with a temporary tree inside the parse function
Expand All @@ -125,6 +147,8 @@ struct EventHandlerTree
m_tree = nullptr;
}

/** @} */

public:

/** @name YAML stream events */
Expand Down Expand Up @@ -312,108 +336,8 @@ struct EventHandlerTree

/** set the previous val as the first key of a new map, with flow style.
*
* For example, consider an implicit map inside a seq: `[a: b, c:
* d]` which is parsed as `[{a: b}, {c: d}]`. The standard event
* sequence for this YAML would be the following:
*
* ```c++
* handler.begin_seq_val_flow();
* handler.begin_map_val_flow();
* handler.set_key_scalar_plain("a");
* handler.set_val_scalar_plain("b");
* handler.end_map();
* handler.add_sibling();
* handler.begin_map_val_flow();
* handler.set_key_scalar_plain("c");
* handler.set_val_scalar_plain("d");
* handler.end_map();
* handler.end_seq();
* ```
*
* The problem with this event sequence is that it forces the
* parser to delay setting the val scalar (in this case "a" and
* "c") until it knows whether the scalar is a key or a val. This
* would require the parser to store the scalar until this
* time. For instance, in the example above, the parser should
* delay setting "a" and "c", because they are in fact keys and
* not vals. Until then, the parser would have to store "a" and
* "c" in its internal state. The downside is that this complexity
* cost would apply even if there is no implicit map -- every val
* in a seq would have to be delayed until one of the
* disambiguating subsequent tokens ',-]:` is found.
*
* By calling this function, the parser can avoid this complexity,
* by preemptively setting the scalar as a val. Then a call to
* this function will create the map and rearrange the scalar as
* key. Now the cost applies only once: when a seqimap starts. So
* the following (easier and cheaper) event sequence below has the
* same effect as the event sequence above:
*
* ```c++
* handler.begin_seq_val_flow();
* handler.set_val_scalar_plain("notmap");
* handler.set_val_scalar_plain("a"); // preemptively set "a" as val!
* handler.actually_as_new_map_key(); // create a map, move the "a" val as the key of the first child of the new map
* handler.set_val_scalar_plain("b"); // now "a" is a key and "b" the val
* handler.end_map();
* handler.set_val_scalar_plain("c"); // "c" also as val!
* handler.actually_as_block_flow(); // likewise
* handler.set_val_scalar_plain("d"); // now "c" is a key and "b" the val
* handler.end_map();
* handler.end_seq();
* ```
*
* This also applies to container keys (although ryml's tree
* cannot accomodate these): the parser can preemptively set a
* container as a val, and call this event to turn that container
* into a key. For example, consider this yaml:
*
* ```yaml
* [aa, bb]: [cc, dd]
* ^ ^ ^
* | | |
* (2) (1) (3) <- event sequence
* ```
*
* The standard event sequence for this YAML would be the
* following:
*
* ```c++
* handler.begin_map_val_block(); // (1)
* handler.begin_seq_key_flow(); // (2)
* handler.set_val_scalar_plain("aa");
* handler.add_sibling();
* handler.set_val_scalar_plain("bb");
* handler.end_seq();
* handler.begin_seq_val_flow(); // (3)
* handler.set_val_scalar_plain("cc");
* handler.add_sibling();
* handler.set_val_scalar_plain("dd");
* handler.end_seq();
* handler.end_map();
* ```
*
* The problem with the sequence above is that, reading from
* left-to-right, the parser can only detect the proper calls at
* (1) and (2) once it reaches (1) in the YAML source. So, the
* parser would have to buffer the entire event sequence starting
* from the beginning until it reaches (1). Using this function,
* the parser can do instead:
*
* ```c++
* handler.begin_seq_val_flow(); // (2) -- preemptively as val!
* handler.set_val_scalar_plain("aa");
* handler.add_sibling();
* handler.set_val_scalar_plain("bb");
* handler.end_seq();
* handler.actually_as_new_map_key(); // (1) -- adjust when finding that the prev val was actually a key.
* handler.begin_seq_val_flow(); // (3) -- go on as before
* handler.set_val_scalar_plain("cc");
* handler.add_sibling();
* handler.set_val_scalar_plain("dd");
* handler.end_seq();
* handler.end_map();
* ```
* See the documentation for @ref doc_event_handlers, which has
* important notes about this event.
*/
void actually_val_is_first_key_of_new_map_flow()
{
Expand All @@ -432,7 +356,11 @@ struct EventHandlerTree
}

/** like its flow counterpart, but this function can only be
* called after the end of a flow-val at root or doc level. */
* called after the end of a flow-val at root or doc level.
*
* See the documentation for @ref doc_event_handlers, which has
* important notes about this event.
*/
void actually_val_is_first_key_of_new_map_block()
{
_RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
Expand Down Expand Up @@ -635,6 +563,7 @@ struct EventHandlerTree

public:

/** @cond dev */
void _reset_parser_state(state* st, id_type parse_root, id_type node)
{
_tr_set_state_(st, node);
Expand Down Expand Up @@ -833,8 +762,12 @@ for(auto const& s : m_stack) printf("popped! state[%zu]: ind=%zu node=%zu\n", s.
#undef _enable_
#undef _disable_
#undef _has_any_

/** @endcond */
};

/** @} */

} // namespace yml
} // namespace c4

Expand Down
Loading

0 comments on commit 19a4d79

Please sign in to comment.