diff options
| author | Stefan Boberg <[email protected]> | 2025-11-07 14:49:13 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-11-07 14:49:13 +0100 |
| commit | 24e43a913f29ac3b314354e8ce5175f135bcc64f (patch) | |
| tree | ca442937ceeb63461012b33a4576e9835099f106 /thirdparty/ryml/src/c4 | |
| parent | get oplog attachments (#622) (diff) | |
| download | zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip | |
switch to xmake for package management (#611)
This change removes our dependency on vcpkg for package management, in favour of bringing some code in-tree in the `thirdparty` folder as well as using the xmake build-in package management feature. For the latter, all the package definitions are maintained in the zen repo itself, in the `repo` folder.
It should now also be easier to build the project as it will no longer depend on having the right version of vcpkg installed, which has been a common problem for new people coming in to the codebase. Now you should only need xmake to build.
* Bumps xmake requirement on github runners to 2.9.9 to resolve an issue where xmake on Windows invokes cmake with `v144` toolchain which does not exist
* BLAKE3 is now in-tree at `thirdparty/blake3`
* cpr is now in-tree at `thirdparty/cpr`
* cxxopts is now in-tree at `thirdparty/cxxopts`
* fmt is now in-tree at `thirdparty/fmt`
* robin-map is now in-tree at `thirdparty/robin-map`
* ryml is now in-tree at `thirdparty/ryml`
* sol2 is now in-tree at `thirdparty/sol2`
* spdlog is now in-tree at `thirdparty/spdlog`
* utfcpp is now in-tree at `thirdparty/utfcpp`
* xmake package repo definitions is in `repo`
* implemented support for sanitizers. ASAN is supported on windows, TSAN, UBSAN, MSAN etc are supported on Linux/MacOS though I have not yet tested it extensively on MacOS
* the zencore encryption implementation also now supports using mbedTLS which is used on MacOS, though for now we still use openssl on Linux
* crashpad
* bumps libcurl to 8.11.0 (from 8.8.0) which should address a rare build upload bug
Diffstat (limited to 'thirdparty/ryml/src/c4')
23 files changed, 14579 insertions, 0 deletions
diff --git a/thirdparty/ryml/src/c4/yml/common.cpp b/thirdparty/ryml/src/c4/yml/common.cpp new file mode 100644 index 000000000..75b873549 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/common.cpp @@ -0,0 +1,117 @@ +#include "c4/yml/common.hpp" + +#ifndef RYML_NO_DEFAULT_CALLBACKS +# include <stdlib.h> +# include <stdio.h> +#endif // RYML_NO_DEFAULT_CALLBACKS + +namespace c4 { +namespace yml { + +namespace { +Callbacks s_default_callbacks; +} // anon namespace + +#ifndef RYML_NO_DEFAULT_CALLBACKS +void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) +{ + if(!f) + f = stderr; + if(loc) + { + if(!loc.name.empty()) + { + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + } + fprintf(f, "%.*s\n", (int)length, msg); + fflush(f); +} + +void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) +{ + report_error_impl(msg, length, loc, nullptr); + ::abort(); +} + +void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/) +{ + void *mem = ::malloc(length); + if(mem == nullptr) + { + const char msg[] = "could not allocate memory"; + error_impl(msg, sizeof(msg)-1, {}, nullptr); + } + return mem; +} + +void free_impl(void *mem, size_t /*length*/, void * /*user_data*/) +{ + ::free(mem); +} +#endif // RYML_NO_DEFAULT_CALLBACKS + + + +Callbacks::Callbacks() + : + m_user_data(nullptr), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(allocate_impl), + m_free(free_impl), + m_error(error_impl) + #else + m_allocate(nullptr), + m_free(nullptr), + m_error(nullptr) + #endif +{ +} + +Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_) + : + m_user_data(user_data), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(alloc_ ? alloc_ : allocate_impl), + m_free(free_ ? free_ : free_impl), + m_error(error_ ? error_ : error_impl) + #else + m_allocate(alloc_), + m_free(free_), + m_error(error_) + #endif +{ + C4_CHECK(m_allocate); + C4_CHECK(m_free); + C4_CHECK(m_error); +} + + +void set_callbacks(Callbacks const& c) +{ + s_default_callbacks = c; +} + +Callbacks const& get_callbacks() +{ + return s_default_callbacks; +} + +void reset_callbacks() +{ + set_callbacks(Callbacks()); +} + +void error(const char *msg, size_t msg_len, Location loc) +{ + s_default_callbacks.m_error(msg, msg_len, loc, s_default_callbacks.m_user_data); +} + +} // namespace yml +} // namespace c4 diff --git a/thirdparty/ryml/src/c4/yml/common.hpp b/thirdparty/ryml/src/c4/yml/common.hpp new file mode 100644 index 000000000..21aef6ba2 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/common.hpp @@ -0,0 +1,278 @@ +#ifndef _C4_YML_COMMON_HPP_ +#define _C4_YML_COMMON_HPP_ + +#include <cstddef> +#include <c4/substr.hpp> +#include <c4/yml/export.hpp> + + +#ifndef RYML_USE_ASSERT +# define RYML_USE_ASSERT C4_USE_ASSERT +#endif + + +#if RYML_USE_ASSERT +# define RYML_ASSERT(cond) RYML_CHECK(cond) +# define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) +#else +# define RYML_ASSERT(cond) +# define RYML_ASSERT_MSG(cond, msg) +#endif + + +#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) +# define RYML_DEBUG_BREAK() +#else +# define RYML_DEBUG_BREAK() \ + { \ + if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ + { \ + C4_DEBUG_BREAK(); \ + } \ + } +#endif + + +#define RYML_CHECK(cond) \ + do { \ + if(!(cond)) \ + { \ + RYML_DEBUG_BREAK() \ + c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + +#define RYML_CHECK_MSG(cond, msg) \ + do \ + { \ + if(!(cond)) \ + { \ + RYML_DEBUG_BREAK() \ + c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + + +#if C4_CPP >= 14 +# define RYML_DEPRECATED(msg) [[deprecated(msg)]] +#else +# if defined(_MSC_VER) +# define RYML_DEPRECATED(msg) __declspec(deprecated(msg)) +# else // defined(__GNUC__) || defined(__clang__) +# define RYML_DEPRECATED(msg) __attribute__((deprecated(msg))) +# endif +#endif + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace c4 { +namespace yml { + +enum : size_t { + /** a null position */ + npos = size_t(-1), + /** an index to none */ + NONE = size_t(-1) +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +//! holds a position into a source buffer +struct RYML_EXPORT LineCol +{ + //! number of bytes from the beginning of the source buffer + size_t offset; + //! line + size_t line; + //! column + size_t col; + + LineCol() : offset(), line(), col() {} + //! construct from line and column + LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {} + //! construct from offset, line and column + LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {} +}; + + +//! a source file position +struct RYML_EXPORT Location : public LineCol +{ + csubstr name; + + operator bool () const { return !name.empty() || line != 0 || offset != 0; } + + Location() : LineCol(), name() {} + Location( size_t l, size_t c) : LineCol{ l, c}, name( ) {} + Location( csubstr n, size_t l, size_t c) : LineCol{ l, c}, name(n) {} + Location( csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {} + Location(const char *n, size_t l, size_t c) : LineCol{ l, c}, name(to_csubstr(n)) {} + Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {} +}; + + +//----------------------------------------------------------------------------- + +/** the type of the function used to report errors. This function must + * interrupt execution, either by raising an exception or calling + * std::abort(). + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data); +/** the type of the function used to allocate memory */ +using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data); +/** the type of the function used to free memory */ +using pfn_free = void (*)(void* mem, size_t size, void *user_data); + +/** trigger an error: call the current error callback. */ +RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc); +/** @overload error */ +inline void error(const char *msg, size_t msg_len) +{ + error(msg, msg_len, Location{}); +} +/** @overload error */ +template<size_t N> +inline void error(const char (&msg)[N], Location loc) +{ + error(msg, N-1, loc); +} +/** @overload error */ +template<size_t N> +inline void error(const char (&msg)[N]) +{ + error(msg, N-1, Location{}); +} + +//----------------------------------------------------------------------------- + +/** a c-style callbacks class + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +struct RYML_EXPORT Callbacks +{ + void * m_user_data; + pfn_allocate m_allocate; + pfn_free m_free; + pfn_error m_error; + + Callbacks(); + Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error_); + + bool operator!= (Callbacks const& that) const { return !operator==(that); } + bool operator== (Callbacks const& that) const + { + return (m_user_data == that.m_user_data && + m_allocate == that.m_allocate && + m_free == that.m_free && + m_error == that.m_error); + } +}; + +/** set the global callbacks. + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +RYML_EXPORT void set_callbacks(Callbacks const& c); +/// get the global callbacks +RYML_EXPORT Callbacks const& get_callbacks(); +/// set the global callbacks back to their defaults +RYML_EXPORT void reset_callbacks(); + +/// @cond dev +#define _RYML_CB_ERR(cb, msg_literal) \ +do \ +{ \ + const char msg[] = msg_literal; \ + RYML_DEBUG_BREAK() \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ +} while(0) +#define _RYML_CB_CHECK(cb, cond) \ + do \ + { \ + if(!(cond)) \ + { \ + const char msg[] = "check failed: " #cond; \ + RYML_DEBUG_BREAK() \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ + } \ + } while(0) +#ifdef RYML_USE_ASSERT +#define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) +#else +#define _RYML_CB_ASSERT(cb, cond) do {} while(0) +#endif +#define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data) +#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr) +#define _RYML_CB_FREE(cb, buf, T, num) \ + do { \ + (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data); \ + (buf) = nullptr; \ + } while(0) + + + +namespace detail { +template<int8_t signedval, uint8_t unsignedval> +struct _charconstant_t + : public std::conditional<std::is_signed<char>::value, + std::integral_constant<int8_t, signedval>, + std::integral_constant<uint8_t, unsignedval>>::type +{}; +#define _RYML_CHCONST(signedval, unsignedval) ::c4::yml::detail::_charconstant_t<INT8_C(signedval), UINT8_C(unsignedval)>::value +} // namespace detail + + +namespace detail { +struct _SubstrWriter +{ + substr buf; + size_t pos; + _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {} + void append(csubstr s) + { + C4_ASSERT(!s.overlaps(buf)); + if(pos + s.len <= buf.len) + memcpy(buf.str + pos, s.str, s.len); + pos += s.len; + } + void append(char c) + { + if(pos < buf.len) + buf.str[pos] = c; + ++pos; + } + void append_n(char c, size_t numtimes) + { + if(pos + numtimes < buf.len) + memset(buf.str + pos, c, numtimes); + pos += numtimes; + } + size_t slack() const { return pos <= buf.len ? buf.len - pos : 0; } + size_t excess() const { return pos > buf.len ? pos - buf.len : 0; } + //! get the part written so far + csubstr curr() const { return pos <= buf.len ? buf.first(pos) : buf; } + //! get the part that is still free to write to (the remainder) + substr rem() { return pos < buf.len ? buf.sub(pos) : buf.last(0); } + + size_t advance(size_t more) { pos += more; return pos; } +}; +} // namespace detail + +/// @endcond + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_COMMON_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/detail/checks.hpp b/thirdparty/ryml/src/c4/yml/detail/checks.hpp new file mode 100644 index 000000000..39b49e856 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/detail/checks.hpp @@ -0,0 +1,200 @@ +#ifndef C4_YML_DETAIL_CHECKS_HPP_ +#define C4_YML_DETAIL_CHECKS_HPP_ + +#include "c4/yml/tree.hpp" + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // error: comparison of unsigned expression >= 0 is always true +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + + +void check_invariants(Tree const& t, size_t node=NONE); +void check_free_list(Tree const& t); +void check_arena(Tree const& t); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_invariants(Tree const& t, size_t node) +{ + if(node == NONE) + { + if(t.size() == 0) return; + node = t.root_id(); + } + + auto const& n = *t._p(node); +#ifdef RYML_DBG + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child); + } + else + { + printf("check(%zu)\n", node); + } +#endif + + C4_CHECK(n.m_parent != node); + if(n.m_parent == NONE) + { + C4_CHECK(t.is_root(node)); + } + else //if(n.m_parent != NONE) + { + C4_CHECK(t.has_child(n.m_parent, node)); + + auto const& p = *t._p(n.m_parent); + if(n.m_prev_sibling == NONE) + { + C4_CHECK(p.m_first_child == node); + C4_CHECK(t.first_sibling(node) == node); + } + else + { + C4_CHECK(p.m_first_child != node); + C4_CHECK(t.first_sibling(node) != node); + } + + if(n.m_next_sibling == NONE) + { + C4_CHECK(p.m_last_child == node); + C4_CHECK(t.last_sibling(node) == node); + } + else + { + C4_CHECK(p.m_last_child != node); + C4_CHECK(t.last_sibling(node) != node); + } + } + + C4_CHECK(n.m_first_child != node); + C4_CHECK(n.m_last_child != node); + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + C4_CHECK(n.m_first_child != NONE); + C4_CHECK(n.m_last_child != NONE); + } + + C4_CHECK(n.m_prev_sibling != node); + C4_CHECK(n.m_next_sibling != node); + if(n.m_prev_sibling != NONE) + { + C4_CHECK(t._p(n.m_prev_sibling)->m_next_sibling == node); + C4_CHECK(t._p(n.m_prev_sibling)->m_prev_sibling != node); + } + if(n.m_next_sibling != NONE) + { + C4_CHECK(t._p(n.m_next_sibling)->m_prev_sibling == node); + C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node); + } + + size_t count = 0; + for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i)) + { +#ifdef RYML_DBG + printf("check(%zu): descend to child[%zu]=%zu\n", node, count, i); +#endif + auto const& ch = *t._p(i); + C4_CHECK(ch.m_parent == node); + C4_CHECK(ch.m_next_sibling != i); + ++count; + } + C4_CHECK(count == t.num_children(node)); + + if(n.m_prev_sibling == NONE && n.m_next_sibling == NONE) + { + if(n.m_parent != NONE) + { + C4_CHECK(t.num_children(n.m_parent) == 1); + C4_CHECK(t.num_siblings(node) == 1); + } + } + + if(node == t.root_id()) + { + C4_CHECK(t.size() == t.m_size); + C4_CHECK(t.capacity() == t.m_cap); + C4_CHECK(t.m_cap == t.m_size + t.slack()); + check_free_list(t); + check_arena(t); + } + + for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i)) + { + check_invariants(t, i); + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_free_list(Tree const& t) +{ + if(t.m_free_head == NONE) + { + C4_CHECK(t.m_free_tail == t.m_free_head); + return; + } + + C4_CHECK(t.m_free_head >= 0 && t.m_free_head < t.m_cap); + C4_CHECK(t.m_free_tail >= 0 && t.m_free_tail < t.m_cap); + + auto const& head = *t._p(t.m_free_head); + //auto const& tail = *t._p(t.m_free_tail); + + //C4_CHECK(head.m_prev_sibling == NONE); + //C4_CHECK(tail.m_next_sibling == NONE); + + size_t count = 0; + for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) + { + auto const& elm = *t._p(i); + if(&elm != &head) + { + C4_CHECK(elm.m_prev_sibling == prev); + } + prev = i; + ++count; + } + C4_CHECK(count == t.slack()); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_arena(Tree const& t) +{ + C4_CHECK(t.m_arena.len == 0 || (t.m_arena_pos >= 0 && t.m_arena_pos <= t.m_arena.len)); + C4_CHECK(t.arena_size() == t.m_arena_pos); + C4_CHECK(t.arena_slack() + t.m_arena_pos == t.m_arena.len); +} + + +} /* namespace yml */ +} /* namespace c4 */ + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* C4_YML_DETAIL_CHECKS_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/detail/parser_dbg.hpp b/thirdparty/ryml/src/c4/yml/detail/parser_dbg.hpp new file mode 100644 index 000000000..457f1700d --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/detail/parser_dbg.hpp @@ -0,0 +1,137 @@ +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#define _C4_YML_DETAIL_PARSER_DBG_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "../common.hpp" +#endif +#include <cstdio> + +//----------------------------------------------------------------------------- +// some debugging scaffolds + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4068/*unknown pragma*/) +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +//#pragma GCC diagnostic ignored "-Wpragma-system-header-outside-header" +#pragma GCC system_header + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Werror" +#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" + +// some debugging scaffolds +#ifdef RYML_DBG +#include <c4/dump.hpp> +namespace c4 { +inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); }; +template<class ...Args> +void _dbg_printf(c4::csubstr fmt, Args&& ...args) +{ + static char writebuf[256]; + auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward<Args>(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward<Args>(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward<Args>(args)...); + } + } +} +} // namespace c4 + +# define _c4dbgt(fmt, ...) this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) +# define _c4dbgq(msg) _dbg_printf(msg "\n") +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) +#else +# define _c4dbgt(fmt, ...) +# define _c4dbgpf(fmt, ...) +# define _c4dbgp(msg) +# define _c4dbgq(msg) +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0) +#endif + +#define _c4prsp(sp) sp +#define _c4presc(s) __c4presc(s.str, s.len) +inline c4::csubstr _c4prc(const char &C4_RESTRICT c) +{ + switch(c) + { + case '\n': return c4::csubstr("\\n"); + case '\t': return c4::csubstr("\\t"); + case '\0': return c4::csubstr("\\0"); + case '\r': return c4::csubstr("\\r"); + case '\f': return c4::csubstr("\\f"); + case '\b': return c4::csubstr("\\b"); + case '\v': return c4::csubstr("\\v"); + case '\a': return c4::csubstr("\\a"); + default: return c4::csubstr(&c, 1); + } +} +inline void __c4presc(const char *s, size_t len) +{ + size_t prev = 0; + for(size_t i = 0; i < len; ++i) + { + switch(s[i]) + { + case '\n' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break; + case '\t' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break; + case '\0' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break; + case '\r' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break; + case '\f' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break; + case '\b' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break; + case '\v' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break; + case '\a' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break; + case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break; + case -0x3e/*0xc2u*/: + if(i+1 < len) + { + if(s[i+1] == -0x60/*0xa0u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i; + } + else if(s[i+1] == -0x7b/*0x85u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i; + } + break; + } + case -0x1e/*0xe2u*/: + if(i+2 < len && s[i+1] == -0x80/*0x80u*/) + { + if(s[i+2] == -0x58/*0xa8u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2; + } + else if(s[i+2] == -0x57/*0xa9u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2; + } + break; + } + } + } + fwrite(s + prev, 1, len - prev, stdout); +} + +#pragma clang diagnostic pop +#pragma GCC diagnostic pop + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + + +#endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/detail/print.hpp b/thirdparty/ryml/src/c4/yml/detail/print.hpp new file mode 100644 index 000000000..f88dc251d --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/detail/print.hpp @@ -0,0 +1,128 @@ +#ifndef C4_YML_DETAIL_PRINT_HPP_ +#define C4_YML_DETAIL_PRINT_HPP_ + +#include "c4/yml/tree.hpp" +#include "c4/yml/node.hpp" + + +namespace c4 { +namespace yml { + + +inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children) +{ + printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void*)p.get(node)); + if(p.is_root(node)) + { + printf(" [ROOT]"); + } + printf(" %s:", p.type_str(node)); + if(p.has_key(node)) + { + if(p.has_key_anchor(node)) + { + csubstr ka = p.key_anchor(node); + printf(" &%.*s", (int)ka.len, ka.str); + } + if(p.has_key_tag(node)) + { + csubstr kt = p.key_tag(node); + csubstr k = p.key(node); + printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str); + } + else + { + csubstr k = p.key(node); + printf(" '%.*s'", (int)k.len, k.str); + } + } + else + { + RYML_ASSERT( ! p.has_key_tag(node)); + } + if(p.has_val(node)) + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + csubstr v = p.val(node); + printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str); + } + else + { + csubstr v = p.val(node); + printf(" '%.*s'", (int)v.len, v.str); + } + } + else + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + printf(" %.*s", (int)vt.len, vt.str); + } + } + if(p.has_val_anchor(node)) + { + auto &a = p.val_anchor(node); + printf(" valanchor='&%.*s'", (int)a.len, a.str); + } + printf(" (%zd sibs)", p.num_siblings(node)); + + ++count; + + if(p.is_container(node)) + { + printf(" %zd children:\n", p.num_children(node)); + if(print_children) + { + for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i)) + { + count = print_node(p, i, level+1, count, print_children); + } + } + } + else + { + printf("\n"); + } + + return count; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void print_node(ConstNodeRef const& p, int level=0) +{ + print_node(*p.tree(), p.id(), level, 0, true); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline size_t print_tree(Tree const& p, size_t node=NONE) +{ + printf("--------------------------------------\n"); + size_t ret = 0; + if(!p.empty()) + { + if(node == NONE) + node = p.root_id(); + ret = print_node(p, node, 0, 0, true); + } + printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret); + printf("--------------------------------------\n"); + return ret; +} + + +} /* namespace yml */ +} /* namespace c4 */ + + +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/detail/stack.hpp b/thirdparty/ryml/src/c4/yml/detail/stack.hpp new file mode 100644 index 000000000..95677ae27 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/detail/stack.hpp @@ -0,0 +1,274 @@ +#ifndef _C4_YML_DETAIL_STACK_HPP_ +#define _C4_YML_DETAIL_STACK_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "../common.hpp" +#endif + +#ifdef RYML_DBG +# include <type_traits> +#endif + +#include <string.h> + +namespace c4 { +namespace yml { +namespace detail { + +/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */ +template<class T, size_t N=16> +class stack +{ + static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable"); + static_assert(std::is_trivially_destructible<T>::value, "T must be trivially destructible"); + + enum : size_t { sso_size = N }; + +public: + + T m_buf[N]; + T * m_stack; + size_t m_size; + size_t m_capacity; + Callbacks m_callbacks; + +public: + + constexpr static bool is_contiguous() { return true; } + + stack(Callbacks const& cb) + : m_buf() + , m_stack(m_buf) + , m_size(0) + , m_capacity(N) + , m_callbacks(cb) {} + stack() : stack(get_callbacks()) {} + ~stack() + { + _free(); + } + + stack(stack const& that) noexcept : stack(that.m_callbacks) + { + resize(that.m_size); + _cp(&that); + } + + stack(stack &&that) noexcept : stack(that.m_callbacks) + { + _mv(&that); + } + + stack& operator= (stack const& that) noexcept + { + _cb(that.m_callbacks); + resize(that.m_size); + _cp(&that); + return *this; + } + + stack& operator= (stack &&that) noexcept + { + _cb(that.m_callbacks); + _mv(&that); + return *this; + } + +public: + + size_t size() const { return m_size; } + size_t empty() const { return m_size == 0; } + size_t capacity() const { return m_capacity; } + + void clear() + { + m_size = 0; + } + + void resize(size_t sz) + { + reserve(sz); + m_size = sz; + } + + void reserve(size_t sz); + + void push(T const& C4_RESTRICT n) + { + RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = n; + ++m_size; + } + + void push_top() + { + RYML_ASSERT(m_size > 0); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = m_stack[m_size - 1]; + ++m_size; + } + + T const& C4_RESTRICT pop() + { + RYML_ASSERT(m_size > 0); + --m_size; + return m_stack[m_size]; + } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top() { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { RYML_ASSERT(m_size > 0); return m_stack[0]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top(size_t i) { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom(size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT operator[](size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + +public: + + using iterator = T *; + using const_iterator = T const *; + + iterator begin() { return m_stack; } + iterator end () { return m_stack + m_size; } + + const_iterator begin() const { return (const_iterator)m_stack; } + const_iterator end () const { return (const_iterator)m_stack + m_size; } + +public: + void _free(); + void _cp(stack const* C4_RESTRICT that); + void _mv(stack * that); + void _cb(Callbacks const& cb); +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template<class T, size_t N> +void stack<T, N>::reserve(size_t sz) +{ + if(sz <= m_size) + return; + if(sz <= N) + { + m_stack = m_buf; + m_capacity = N; + return; + } + T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data); + memcpy(buf, m_stack, m_size * sizeof(T)); + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + } + m_stack = buf; + m_capacity = sz; +} + + +//----------------------------------------------------------------------------- + +template<class T, size_t N> +void stack<T, N>::_free() +{ + RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_stack = m_buf; + m_size = N; + m_capacity = N; + } + else + { + RYML_ASSERT(m_capacity == N); + } +} + + +//----------------------------------------------------------------------------- + +template<class T, size_t N> +void stack<T, N>::_cp(stack const* C4_RESTRICT that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + memcpy(m_stack, that->m_stack, that->m_size * sizeof(T)); + m_size = that->m_size; + m_capacity = that->m_size < N ? N : that->m_size; + m_callbacks = that->m_callbacks; +} + + +//----------------------------------------------------------------------------- + +template<class T, size_t N> +void stack<T, N>::_mv(stack * that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + m_stack = that->m_stack; + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + memcpy(m_buf, that->m_buf, that->m_size * sizeof(T)); + m_stack = m_buf; + } + m_size = that->m_size; + m_capacity = that->m_capacity; + m_callbacks = that->m_callbacks; + // make sure no deallocation happens on destruction + RYML_ASSERT(that->m_stack != m_buf); + that->m_stack = that->m_buf; + that->m_capacity = N; + that->m_size = 0; +} + + +//----------------------------------------------------------------------------- + +template<class T, size_t N> +void stack<T, N>::_cb(Callbacks const& cb) +{ + if(cb != m_callbacks) + { + _free(); + m_callbacks = cb; + } +} + +} // namespace detail +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_DETAIL_STACK_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/emit.def.hpp b/thirdparty/ryml/src/c4/yml/emit.def.hpp new file mode 100644 index 000000000..d262a9e2a --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/emit.def.hpp @@ -0,0 +1,960 @@ +#ifndef _C4_YML_EMIT_DEF_HPP_ +#define _C4_YML_EMIT_DEF_HPP_ + +#ifndef _C4_YML_EMIT_HPP_ +#include "c4/yml/emit.hpp" +#endif + +namespace c4 { +namespace yml { + +template<class Writer> +substr Emitter<Writer>::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +{ + if(t.empty()) + { + _RYML_CB_ASSERT(t.callbacks(), id == NONE); + return {}; + } + _RYML_CB_CHECK(t.callbacks(), id < t.size()); + m_tree = &t; + if(type == EMIT_YAML) + _emit_yaml(id); + else if(type == EMIT_JSON) + _do_visit_json(id); + else + _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type"); + return this->Writer::_get(error_on_excess); +} + +template<class Writer> +substr Emitter<Writer>::emit_as(EmitType_e type, Tree const& t, bool error_on_excess) +{ + if(t.empty()) + return {}; + return this->emit_as(type, t, t.root_id(), error_on_excess); +} + +template<class Writer> +substr Emitter<Writer>::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return this->emit_as(type, *n.tree(), n.id(), error_on_excess); +} + + +//----------------------------------------------------------------------------- + +template<class Writer> +void Emitter<Writer>::_emit_yaml(size_t id) +{ + // save branches in the visitor by doing the initial stream/doc + // logic here, sparing the need to check stream/val/keyval inside + // the visitor functions + auto dispatch = [this](size_t node){ + NodeType ty = m_tree->type(node); + if(ty.marked_flow_sl()) + _do_visit_flow_sl(node, 0); + else if(ty.marked_flow_ml()) + _do_visit_flow_ml(node, 0); + else + { + _do_visit_block(node, 0); + } + }; + if(!m_tree->is_root(id)) + { + if(m_tree->is_container(id) && !m_tree->type(id).marked_flow()) + { + size_t ilevel = 0; + if(m_tree->has_key(id)) + { + this->Writer::_do_write(m_tree->key(id)); + this->Writer::_do_write(":\n"); + ++ilevel; + } + _do_visit_block_container(id, ilevel, ilevel); + return; + } + } + + auto *btd = m_tree->tag_directives().b; + auto *etd = m_tree->tag_directives().e; + auto write_tag_directives = [&btd, etd, this](size_t next_node){ + auto end = btd; + while(end < etd) + { + if(end->next_node_id > next_node) + break; + ++end; + } + for( ; btd != end; ++btd) + { + if(next_node != m_tree->first_child(m_tree->parent(next_node))) + this->Writer::_do_write("...\n"); + this->Writer::_do_write("%TAG "); + this->Writer::_do_write(btd->handle); + this->Writer::_do_write(' '); + this->Writer::_do_write(btd->prefix); + this->Writer::_do_write('\n'); + } + }; + if(m_tree->is_stream(id)) + { + if(m_tree->first_child(id) != NONE) + write_tag_directives(m_tree->first_child(id)); + for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child)) + { + dispatch(child); + if(m_tree->next_sibling(child) != NONE) + write_tag_directives(m_tree->next_sibling(child)); + } + } + else if(m_tree->is_container(id)) + { + dispatch(id); + } + else if(m_tree->is_doc(id)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_container(id)); // checked above + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_val(id)); // so it must be a val + _write_doc(id); + } + else if(m_tree->is_keyval(id)) + { + _writek(id, 0); + this->Writer::_do_write(": "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->is_val(id)) + { + //this->Writer::_do_write("- "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->type(id) == NOTYPE) + { + ; + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "unknown type"); + } +} + +template<class Writer> +void Emitter<Writer>::_write_doc(size_t id) +{ + RYML_ASSERT(m_tree->is_doc(id)); + if(!m_tree->is_root(id)) + { + RYML_ASSERT(m_tree->is_stream(m_tree->parent(id))); + this->Writer::_do_write("---"); + } + if(!m_tree->has_val(id)) // this is more frequent + { + if(m_tree->has_val_tag(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(id)); + } + if(m_tree->has_val_anchor(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(id)); + } + } + else // docval + { + RYML_ASSERT(m_tree->has_val(id)); + RYML_ASSERT(!m_tree->has_key(id)); + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _writev(id, 0); + } + this->Writer::_do_write('\n'); +} + +template<class Writer> +void Emitter<Writer>::_do_visit_flow_sl(size_t node, size_t ilevel) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + + if(m_tree->has_key(node)) + { + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + } + + if(spc) + this->Writer::_do_write(' '); + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('{'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node)); + this->Writer::_do_write('['); + } + } // container + + for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) + { + if(count++) + this->Writer::_do_write(','); + if(m_tree->is_keyval(child)) + { + _writek(child, ilevel); + this->Writer::_do_write(": "); + _writev(child, ilevel); + } + else if(m_tree->is_val(child)) + { + _writev(child, ilevel); + } + else + { + // with single-line flow, we can never go back to block + _do_visit_flow_sl(child, ilevel + 1); + } + } + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('}'); + } + else if(m_tree->is_seq(node)) + { + this->Writer::_do_write(']'); + } +} + +template<class Writer> +void Emitter<Writer>::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent) +{ + C4_UNUSED(id); + C4_UNUSED(ilevel); + C4_UNUSED(do_indent); + RYML_CHECK(false/*not implemented*/); +} + +template<class Writer> +void Emitter<Writer>::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent) +{ + RepC ind = indent_to(do_indent * next_level); + + if(m_tree->is_seq(node)) + { + for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child)); + if(m_tree->is_val(child)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _writev(child, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child)); + NodeType ty = m_tree->type(child); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_sl(child, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_ml(child, next_level, do_indent); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(child, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } + else // map + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node)); + for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich)); + if(m_tree->is_keyval(ich)) + { + this->Writer::_do_write(ind); + _writek(ich, next_level); + this->Writer::_do_write(": "); + _writev(ich, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich)); + NodeType ty = m_tree->type(ich); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + _do_visit_flow_sl(ich, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + _do_visit_flow_ml(ich, 0u); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(ich, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } +} + +template<class Writer> +void Emitter<Writer>::_do_visit_block(size_t node, size_t ilevel, size_t do_indent) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + RepC ind = indent_to(do_indent * ilevel); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + bool nl = false; // write a newline + + if(m_tree->has_key(node)) + { + this->Writer::_do_write(ind); + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + else if(!m_tree->is_root(node)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write('-'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + nl = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + nl = true; + } + + if(m_tree->has_children(node)) + { + if(m_tree->has_key(node)) + nl = true; + else + if(!m_tree->is_root(node) && !nl) + spc = true; + } + else + { + if(m_tree->is_seq(node)) + this->Writer::_do_write(" []\n"); + else if(m_tree->is_map(node)) + this->Writer::_do_write(" {}\n"); + return; + } + + if(spc && !nl) + this->Writer::_do_write(' '); + + do_indent = 0; + if(nl) + { + this->Writer::_do_write('\n'); + do_indent = 1; + } + } // container + + size_t next_level = ilevel + 1; + if(m_tree->is_root(node) || m_tree->is_doc(node)) + next_level = ilevel; // do not indent at top level + + _do_visit_block_container(node, next_level, do_indent); +} + +template<class Writer> +void Emitter<Writer>::_do_visit_json(size_t id) +{ + _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams + if(m_tree->is_keyval(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + _writev_json(id); + } + else if(m_tree->is_val(id)) + { + _writev_json(id); + } + else if(m_tree->is_container(id)) + { + if(m_tree->has_key(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + } + if(m_tree->is_seq(id)) + this->Writer::_do_write('['); + else if(m_tree->is_map(id)) + this->Writer::_do_write('{'); + } // container + + for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) + { + if(ich != m_tree->first_child(id)) + this->Writer::_do_write(','); + _do_visit_json(ich); + } + + if(m_tree->is_seq(id)) + this->Writer::_do_write(']'); + else if(m_tree->is_map(id)) + this->Writer::_do_write('}'); +} + +template<class Writer> +void Emitter<Writer>::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel) +{ + if( ! sc.tag.empty()) + { + _write_tag(sc.tag); + this->Writer::_do_write(' '); + } + if(flags.has_anchor()) + { + RYML_ASSERT(flags.is_ref() != flags.has_anchor()); + RYML_ASSERT( ! sc.anchor.empty()); + this->Writer::_do_write('&'); + this->Writer::_do_write(sc.anchor); + this->Writer::_do_write(' '); + } + else if(flags.is_ref()) + { + if(sc.anchor != "<<") + this->Writer::_do_write('*'); + this->Writer::_do_write(sc.anchor); + return; + } + + // ensure the style flags only have one of KEY or VAL + _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0))); + + auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE); + if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL)) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED)) + { + _write_scalar_folded(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO)) + { + _write_scalar_squo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO)) + { + _write_scalar_dquo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN)) + { + _write_scalar_plain(sc.scalar, ilevel); + } + else if(!style_marks) + { + size_t first_non_nl = sc.scalar.first_not_of('\n'); + bool all_newlines = first_non_nl == npos; + bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t"); + bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty())); + if(do_literal) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + } + else + { + for(size_t i = 0; i < sc.scalar.len; ++i) + { + if(sc.scalar.str[i] == '\n') + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + goto wrote_special; + } + // todo: check for escaped characters requiring double quotes + } + _write_scalar(sc.scalar, flags.is_quoted()); + wrote_special: + ; + } + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "not implemented"); + } +} +template<class Writer> +void Emitter<Writer>::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags) +{ + if(C4_UNLIKELY( ! sc.tag.empty())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); + if(C4_UNLIKELY(flags.has_anchor())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); + _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted()); +} + +#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); } + +template<class Writer> +void Emitter<Writer>::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation) +{ + if(explicit_key) + this->Writer::_do_write("? "); + csubstr trimmed = s.trimr("\n\r"); + size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r'); + // + if(!explicit_indentation) + this->Writer::_do_write('|'); + else + this->Writer::_do_write("|2"); + // + if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/) + this->Writer::_do_write("+\n"); + else if(numnewlines_at_end == 1) + this->Writer::_do_write('\n'); + else + this->Writer::_do_write("-\n"); + // + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + pos = i+1; // already written + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template<class Writer> +void Emitter<Writer>::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key) +{ + if(explicit_key) + { + this->Writer::_do_write("? "); + } + RYML_ASSERT(s.find("\r") == csubstr::npos); + csubstr trimmed = s.trimr('\n'); + size_t numnewlines_at_end = s.len - trimmed.len; + if(numnewlines_at_end == 0) + { + this->Writer::_do_write(">-\n"); + } + else if(numnewlines_at_end == 1) + { + this->Writer::_do_write(">\n"); + } + else if(numnewlines_at_end > 1) + { + this->Writer::_do_write(">+\n"); + } + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + pos = i+1; // because of the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + this->Writer::_do_write('\n'); // write the newline twice + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template<class Writer> +void Emitter<Writer>::_write_scalar_squo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('\''); + for(size_t i = 0; i < s.len; ++i) + { + if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\n'); // write the character again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + else if(s[i] == '\'') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\''); // write the character again + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + this->Writer::_do_write(s.sub(pos)); + this->Writer::_do_write('\''); +} + +template<class Writer> +void Emitter<Writer>::_write_scalar_dquo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '"' || curr == '\\') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // write the escape + this->Writer::_do_write(curr); // write the char + pos = i+1; + } + else if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + if(i+1 < s.len) // escape leading whitespace after the newline + { + const char next = s.str[i+1]; + if(next == ' ' || next == '\t') + this->Writer::_do_write('\\'); + } + } + else if(curr == ' ' || curr == '\t') + { + // escape trailing whitespace before a newline + size_t next = s.first_not_of(" \t\r", i); + if(next != npos && s[next] == '\n') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // escape the whitespace + pos = i; + } + } + else if(C4_UNLIKELY(curr == '\r')) + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\r"); // write the escaped char + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); +} + +template<class Writer> +void Emitter<Writer>::_write_scalar_plain(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } +} + +#undef _rymlindent_nextline + +template<class Writer> +void Emitter<Writer>::_write_scalar(csubstr s, bool was_quoted) +{ + // this block of code needed to be moved to before the needs_quotes + // assignment to work around a g++ optimizer bug where (s.str != nullptr) + // was evaluated as true even if s.str was actually a nullptr (!!!) + if(s.len == size_t(0)) + { + if(was_quoted || s.str != nullptr) + this->Writer::_do_write("''"); + return; + } + + const bool needs_quotes = ( + was_quoted + || + ( + ( ! s.is_number()) + && + ( + // has leading whitespace + // looks like reference or anchor + // would be treated as a directive + // see https://www.yaml.info/learn/quote.html#noplain + s.begins_with_any(" \n\t\r*&%@`") + || + s.begins_with("<<") + || + // has trailing whitespace + s.ends_with_any(" \n\t\r") + || + // has special chars + (s.first_of("#:-?,\n{}[]'\"") != npos) + ) + ) + ); + + if( ! needs_quotes) + { + this->Writer::_do_write(s); + } + else + { + const bool has_dquotes = s.first_of( '"') != npos; + const bool has_squotes = s.first_of('\'') != npos; + if(!has_squotes && has_dquotes) + { + this->Writer::_do_write('\''); + this->Writer::_do_write(s); + this->Writer::_do_write('\''); + } + else if(has_squotes && !has_dquotes) + { + RYML_ASSERT(s.count('\n') == 0); + this->Writer::_do_write('"'); + this->Writer::_do_write(s); + this->Writer::_do_write('"'); + } + else + { + _write_scalar_squo(s, /*FIXME FIXME FIXME*/0); + } + } +} +template<class Writer> +void Emitter<Writer>::_write_scalar_json(csubstr s, bool as_key, bool use_quotes) +{ + if((!use_quotes) + // json keys require quotes + && (!as_key) + && ( + // do not quote special cases + (s == "true" || s == "false" || s == "null") + || ( + // do not quote numbers + (s.is_number() + && ( + // quote integral numbers if they have a leading 0 + // https://github.com/biojppm/rapidyaml/issues/291 + (!(s.len > 1 && s.begins_with('0'))) + // do not quote reals with leading 0 + // https://github.com/biojppm/rapidyaml/issues/313 + || (s.find('.') != csubstr::npos) )) + ) + ) + ) + { + this->Writer::_do_write(s); + } + else + { + size_t pos = 0; + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + switch(s.str[i]) + { + case '"': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\""); + pos = i + 1; + break; + case '\n': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\n"); + pos = i + 1; + break; + case '\t': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\t"); + pos = i + 1; + break; + case '\\': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\\"); + pos = i + 1; + break; + case '\r': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\r"); + pos = i + 1; + break; + case '\b': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\b"); + pos = i + 1; + break; + case '\f': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\f"); + pos = i + 1; + break; + } + } + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); + } +} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_EMIT_DEF_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/emit.hpp b/thirdparty/ryml/src/c4/yml/emit.hpp new file mode 100644 index 000000000..c7cdd2a1a --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/emit.hpp @@ -0,0 +1,490 @@ +#ifndef _C4_YML_EMIT_HPP_ +#define _C4_YML_EMIT_HPP_ + +#ifndef _C4_YML_WRITER_HPP_ +#include "./writer.hpp" +#endif + +#ifndef _C4_YML_TREE_HPP_ +#include "./tree.hpp" +#endif + +#ifndef _C4_YML_NODE_HPP_ +#include "./node.hpp" +#endif + + +#define RYML_DEPRECATE_EMIT \ + RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") +#ifdef emit +#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120" +#endif +#define RYML_DEPRECATE_EMITRS \ + RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace c4 { +namespace yml { + +template<class Writer> class Emitter; + +template<class OStream> +using EmitterOStream = Emitter<WriterOStream<OStream>>; +using EmitterFile = Emitter<WriterFile>; +using EmitterBuf = Emitter<WriterBuf>; + +typedef enum { + EMIT_YAML = 0, + EMIT_JSON = 1 +} EmitType_e; + + +/** mark a tree or node to be emitted as json */ +struct as_json +{ + Tree const* tree; + size_t node; + as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} + as_json(Tree const& t, size_t id) : tree(&t), node(id) {} + as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {} +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template<class Writer> +class Emitter : public Writer +{ +public: + + using Writer::Writer; + + /** emit! + * + * When writing to a buffer, returns a substr of the emitted YAML. + * If the given buffer has insufficient space, the returned span will + * be null and its size will be the needed space. No writes are done + * after the end of the buffer. + * + * When writing to a file, the returned substr will be null, but its + * length will be set to the number of bytes written. */ + substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + /** emit starting at the root node */ + substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true); + /** emit the given node */ + substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true); + +private: + + Tree const* C4_RESTRICT m_tree; + + void _emit_yaml(size_t id); + void _do_visit_flow_sl(size_t id, size_t ilevel=0); + void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent); + void _do_visit_json(size_t id); + +private: + + void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level); + void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags); + + void _write_doc(size_t id); + void _write_scalar(csubstr s, bool was_quoted); + void _write_scalar_json(csubstr s, bool as_key, bool was_quoted); + void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false); + void _write_scalar_folded(csubstr s, size_t level, bool as_key); + void _write_scalar_squo(csubstr s, size_t level); + void _write_scalar_dquo(csubstr s, size_t level); + void _write_scalar_plain(csubstr s, size_t level); + + void _write_tag(csubstr tag) + { + if(!tag.begins_with('!')) + this->Writer::_do_write('!'); + this->Writer::_do_write(tag); + } + + enum : type_bits { + _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _keysc_json = (KEY) | ~(VAL), + _valsc_json = ~(KEY) | (VAL), + }; + + C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); } + C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); } + + C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } + C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f) +{ + return emit_yaml(t, id, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_yaml(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr) +{ + return emit_yaml(t, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) +{ + return emit_yaml(r, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; +} + + +//----------------------------------------------------------------------------- + +/** emit YAML to an STL-like ostream */ +template<class OStream> +inline OStream& operator<< (OStream& s, Tree const& t) +{ + EmitterOStream<OStream> em(s); + em.emit_as(EMIT_YAML, t); + return s; +} + +/** emit YAML to an STL-like ostream + * @overload */ +template<class OStream> +inline OStream& operator<< (OStream& s, ConstNodeRef const& n) +{ + EmitterOStream<OStream> em(s); + em.emit_as(EMIT_YAML, n); + return s; +} + +/** emit json to an STL-like stream */ +template<class OStream> +inline OStream& operator<< (OStream& s, as_json const& j) +{ + EmitterOStream<OStream> em(s); + em.emit_as(EMIT_JSON, *j.tree, j.node, true); + return s; +} + + +//----------------------------------------------------------------------------- + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, t, id, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, id, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, t, id, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, t, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, t, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, r, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + return emit_yaml(r, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, r, error_on_excess); +} + + +//----------------------------------------------------------------------------- + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit_yaml(t, id, buf, /*error_on_excess*/true); + } + return ret; +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +{ + return emitrs_yaml(t, id, cont); +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit_json(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit_json(t, id, buf, /*error_on_excess*/true); + } + return ret; +} + + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_yaml(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_yaml(t, id, &c); + return c; +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_yaml(t, id, &c); + return c; +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_json(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_json(t, id, &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +substr emitrs_yaml(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs_yaml(t, t.root_id(), cont); +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont) +{ + return emitrs_yaml(t, cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +substr emitrs_json(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs_json(t, t.root_id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_yaml(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs_yaml(t, t.root_id(), &c); + return c; +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t) +{ + return emitrs_yaml<CharOwningContainer>(t); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_json(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs_json(t, t.root_id(), &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs_yaml(*n.tree(), n.id(), cont); +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont) +{ + return emitrs_yaml(n, cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs_json(*n.tree(), n.id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_yaml(ConstNodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs_yaml(*n.tree(), n.id(), &c); + return c; +} +template<class CharOwningContainer> +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) +{ + return emitrs_yaml<CharOwningContainer>(n); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template<class CharOwningContainer> +CharOwningContainer emitrs_json(ConstNodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs_json(*n.tree(), n.id(), &c); + return c; +} + +} // namespace yml +} // namespace c4 + +#undef RYML_DEPRECATE_EMIT +#undef RYML_DEPRECATE_EMITRS + +#include "c4/yml/emit.def.hpp" + +#endif /* _C4_YML_EMIT_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/export.hpp b/thirdparty/ryml/src/c4/yml/export.hpp new file mode 100644 index 000000000..6b77f3f8d --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/export.hpp @@ -0,0 +1,18 @@ +#ifndef C4_YML_EXPORT_HPP_ +#define C4_YML_EXPORT_HPP_ + +#ifdef _WIN32 + #ifdef RYML_SHARED + #ifdef RYML_EXPORTS + #define RYML_EXPORT __declspec(dllexport) + #else + #define RYML_EXPORT __declspec(dllimport) + #endif + #else + #define RYML_EXPORT + #endif +#else + #define RYML_EXPORT +#endif + +#endif /* C4_YML_EXPORT_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/node.cpp b/thirdparty/ryml/src/c4/yml/node.cpp new file mode 100644 index 000000000..50c7a0b60 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/node.cpp @@ -0,0 +1,30 @@ +#include "c4/yml/node.hpp" + +namespace c4 { +namespace yml { + + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_key(encoded); + return encoded.len; +} + +size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_val(encoded); + return encoded.len; +} + +} // namespace yml +} // namespace c4 diff --git a/thirdparty/ryml/src/c4/yml/node.hpp b/thirdparty/ryml/src/c4/yml/node.hpp new file mode 100644 index 000000000..42ed50442 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/node.hpp @@ -0,0 +1,1276 @@ +#ifndef _C4_YML_NODE_HPP_ +#define _C4_YML_NODE_HPP_ + +/** @file node.hpp + * @see NodeRef */ + +#include <cstddef> + +#include "c4/yml/tree.hpp" +#include "c4/base64.hpp" + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + +template<class K> struct Key { K & k; }; +template<> struct Key<fmt::const_base64_wrapper> { fmt::const_base64_wrapper wrapper; }; +template<> struct Key<fmt::base64_wrapper> { fmt::base64_wrapper wrapper; }; + +template<class K> C4_ALWAYS_INLINE Key<K> key(K & k) { return Key<K>{k}; } +C4_ALWAYS_INLINE Key<fmt::const_base64_wrapper> key(fmt::const_base64_wrapper w) { return {w}; } +C4_ALWAYS_INLINE Key<fmt::base64_wrapper> key(fmt::base64_wrapper w) { return {w}; } + +template<class T> void write(NodeRef *n, T const& v); + +template<class T> +typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type +read(NodeRef const& n, T *v); + +template<class T> +typename std::enable_if< std::is_floating_point<T>::value, bool>::type +read(NodeRef const& n, T *v); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// forward decls +class NodeRef; +class ConstNodeRef; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + +template<class NodeRefType> +struct child_iterator +{ + using value_type = NodeRefType; + using tree_type = typename NodeRefType::tree_type; + + tree_type * C4_RESTRICT m_tree; + size_t m_child_id; + + child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {} + + child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } + child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } + + NodeRefType operator* () const { return NodeRefType(m_tree, m_child_id); } + NodeRefType operator-> () const { return NodeRefType(m_tree, m_child_id); } + + bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; } + bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; } +}; + +template<class NodeRefType> +struct children_view_ +{ + using n_iterator = child_iterator<NodeRefType>; + + n_iterator b, e; + + inline children_view_(n_iterator const& C4_RESTRICT b_, + n_iterator const& C4_RESTRICT e_) : b(b_), e(e_) {} + + inline n_iterator begin() const { return b; } + inline n_iterator end () const { return e; } +}; + +template<class NodeRefType, class Visitor> +bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + return true; + ++increment; + } + if(node.has_children()) + { + for(auto ch : node.children()) + { + if(_visit(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + return true; + } + } + } + return false; +} + +template<class NodeRefType, class Visitor> +bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + { + return true; + } + ++increment; + } + if(node.has_children()) + { + fn.push(node, indentation_level); + for(auto ch : node.children()) + { + if(_visit_stacked(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + fn.pop(node, indentation_level); + return true; + } + } + fn.pop(node, indentation_level); + } + return false; +} + + +//----------------------------------------------------------------------------- + +/** a CRTP base for read-only node methods */ +template<class Impl, class ConstImpl> +struct RoNodeMethods +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align") + // helper CRTP macros, undefined at the end + #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree + #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id + #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree + #define id__ ((Impl const* C4_RESTRICT)this)->m_id + // require valid + #define _C4RV() \ + RYML_ASSERT(tree_ != nullptr); \ + _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE) + #define _C4_IF_MUTABLE(ty) typename std::enable_if<!std::is_same<U, ConstImpl>::value, ty>::type + +public: + + /** @name node property getters */ + /** @{ */ + + /** returns the data or null when the id is NONE */ + C4_ALWAYS_INLINE C4_PURE NodeData const* get() const noexcept { RYML_ASSERT(tree_ != nullptr); return tree_->get(id_); } + /** returns the data or null when the id is NONE */ + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto get() noexcept -> _C4_IF_MUTABLE(NodeData*) { RYML_ASSERT(tree_ != nullptr); return tree__->get(id__); } + + C4_ALWAYS_INLINE C4_PURE NodeType type() const noexcept { _C4RV(); return tree_->type(id_); } + C4_ALWAYS_INLINE C4_PURE const char* type_str() const noexcept { return tree_->type_str(id_); } + + C4_ALWAYS_INLINE C4_PURE csubstr key() const noexcept { _C4RV(); return tree_->key(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_tag() const noexcept { _C4RV(); return tree_->key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_ref() const noexcept { _C4RV(); return tree_->key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_anchor() const noexcept { _C4RV(); return tree_->key_anchor(id_); } + + C4_ALWAYS_INLINE C4_PURE csubstr val() const noexcept { _C4RV(); return tree_->val(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_tag() const noexcept { _C4RV(); return tree_->val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_ref() const noexcept { _C4RV(); return tree_->val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_anchor() const noexcept { _C4RV(); return tree_->val_anchor(id_); } + + C4_ALWAYS_INLINE C4_PURE NodeScalar const& keysc() const noexcept { _C4RV(); return tree_->keysc(id_); } + C4_ALWAYS_INLINE C4_PURE NodeScalar const& valsc() const noexcept { _C4RV(); return tree_->valsc(id_); } + + C4_ALWAYS_INLINE C4_PURE bool key_is_null() const noexcept { _C4RV(); return tree_->key_is_null(id_); } + C4_ALWAYS_INLINE C4_PURE bool val_is_null() const noexcept { _C4RV(); return tree_->val_is_null(id_); } + + /** @} */ + +public: + + /** @name node property predicates */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { _C4RV(); return tree_->empty(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_stream() const noexcept { _C4RV(); return tree_->is_stream(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_doc() const noexcept { _C4RV(); return tree_->is_doc(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_container() const noexcept { _C4RV(); return tree_->is_container(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_map() const noexcept { _C4RV(); return tree_->is_map(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_seq() const noexcept { _C4RV(); return tree_->is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val() const noexcept { _C4RV(); return tree_->has_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key() const noexcept { _C4RV(); return tree_->has_key(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val() const noexcept { _C4RV(); return tree_->is_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_keyval() const noexcept { _C4RV(); return tree_->is_keyval(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_tag() const noexcept { _C4RV(); return tree_->has_key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_tag() const noexcept { _C4RV(); return tree_->has_val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_anchor() const noexcept { _C4RV(); return tree_->has_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_anchor() const noexcept { _C4RV(); return tree_->is_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_anchor() const noexcept { _C4RV(); return tree_->has_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_anchor() const noexcept { _C4RV(); return tree_->is_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_anchor() const noexcept { _C4RV(); return tree_->has_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor() const noexcept { _C4RV(); return tree_->is_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_ref() const noexcept { _C4RV(); return tree_->is_key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_ref() const noexcept { _C4RV(); return tree_->is_val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_ref() const noexcept { _C4RV(); return tree_->is_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor_or_ref() const noexcept { _C4RV(); return tree_->is_anchor_or_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_quoted() const noexcept { _C4RV(); return tree_->is_key_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_quoted() const noexcept { _C4RV(); return tree_->is_val_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_quoted() const noexcept { _C4RV(); return tree_->is_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_seq() const noexcept { _C4RV(); return tree_->parent_is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_map() const noexcept { _C4RV(); return tree_->parent_is_map(id_); } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool is_root() const noexcept { _C4RV(); return tree_->is_root(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_parent() const noexcept { _C4RV(); return tree_->has_parent(id_); } + + C4_ALWAYS_INLINE C4_PURE bool has_child(ConstImpl const& ch) const noexcept { _C4RV(); return tree_->has_child(id_, ch.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_child(csubstr name) const noexcept { _C4RV(); return tree_->has_child(id_, name); } + C4_ALWAYS_INLINE C4_PURE bool has_children() const noexcept { _C4RV(); return tree_->has_children(id_); } + + C4_ALWAYS_INLINE C4_PURE bool has_sibling(ConstImpl const& n) const noexcept { _C4RV(); return tree_->has_sibling(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_sibling(csubstr name) const noexcept { _C4RV(); return tree_->has_sibling(id_, name); } + /** counts with this */ + C4_ALWAYS_INLINE C4_PURE bool has_siblings() const noexcept { _C4RV(); return tree_->has_siblings(id_); } + /** does not count with this */ + C4_ALWAYS_INLINE C4_PURE bool has_other_siblings() const noexcept { _C4RV(); return tree_->has_other_siblings(id_); } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto doc(size_t num) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->doc(num)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl doc(size_t num) const noexcept { _C4RV(); return {tree_, tree_->doc(num)}; } + + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto parent() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->parent(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl parent() const noexcept { _C4RV(); return {tree_, tree_->parent(id_)}; } + + + /** O(#num_children) */ + C4_ALWAYS_INLINE C4_PURE size_t child_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE size_t num_children() const noexcept { _C4RV(); return tree_->num_children(id_); } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto first_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_child() const noexcept { _C4RV(); return {tree_, tree_->first_child(id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto last_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_child () const noexcept { _C4RV(); return {tree_, tree_->last_child (id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto child(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->child(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl child(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->child(id_, pos)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto find_child(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_child(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_child(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_child(id_, name)}; } + + + /** O(#num_siblings) */ + C4_ALWAYS_INLINE C4_PURE size_t num_siblings() const noexcept { _C4RV(); return tree_->num_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t num_other_siblings() const noexcept { _C4RV(); return tree_->num_other_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t sibling_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(tree_->parent(id_), n.m_id); } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto prev_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->prev_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl prev_sibling() const noexcept { _C4RV(); return {tree_, tree_->prev_sibling(id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto next_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->next_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl next_sibling() const noexcept { _C4RV(); return {tree_, tree_->next_sibling(id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto first_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_sibling() const noexcept { _C4RV(); return {tree_, tree_->first_sibling(id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto last_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_sibling () const noexcept { _C4RV(); return {tree_, tree_->last_sibling(id_)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto sibling(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->sibling(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl sibling(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->sibling(id_, pos)}; } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto find_sibling(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_sibling(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_sibling(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_sibling(id_, name)}; } + + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (csubstr k) const noexcept + { + _C4RV(); + size_t ch = tree_->find_child(id_, k); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + /** Find child by key. O(num_children). returns a seed node if no such child is found. */ + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto operator[] (csubstr k) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->find_child(id__, k); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, k); + } + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (size_t pos) const noexcept + { + _C4RV(); + size_t ch = tree_->child(id_, pos); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + + /** Find child by position. O(pos). returns a seed node if no such child is found. */ + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto operator[] (size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->child(id__, pos); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, pos); + } + + /** @} */ + +public: + + /** deserialization */ + /** @{ */ + + template<class T> + ConstImpl const& operator>> (T &v) const + { + _C4RV(); + if( ! read((ConstImpl const&)*this, &v)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key to the given variable */ + template<class T> + ConstImpl const& operator>> (Key<T> v) const + { + _C4RV(); + if( ! from_chars(key(), &v.k)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key as base64 */ + ConstImpl const& operator>> (Key<fmt::base64_wrapper> w) const + { + deserialize_key(w.wrapper); + return *((ConstImpl const*)this); + } + + /** deserialize the node's val as base64 */ + ConstImpl const& operator>> (fmt::base64_wrapper w) const + { + deserialize_val(w); + return *((ConstImpl const*)this); + } + + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_key(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(key(), &v); + } + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_val(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(val(), &v); + }; + + template<class T> + bool get_if(csubstr name, T *var) const + { + auto ch = find_child(name); + if(!ch.valid()) + return false; + ch >> *var; + return true; + } + + template<class T> + bool get_if(csubstr name, T *var, T const& fallback) const + { + auto ch = find_child(name); + if(ch.valid()) + { + ch >> *var; + return true; + } + else + { + *var = fallback; + return false; + } + } + + /** @} */ + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + /** @name iteration */ + /** @{ */ + + using iterator = detail::child_iterator<Impl>; + using const_iterator = detail::child_iterator<ConstImpl>; + using children_view = detail::children_view_<Impl>; + using const_children_view = detail::children_view_<ConstImpl>; + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto begin() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, tree__->first_child(id__)); } + C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + C4_ALWAYS_INLINE C4_PURE const_iterator cbegin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto end() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator end() const noexcept { _C4RV(); return const_iterator(tree_, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator cend() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + /** get an iterable view over children */ + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto children() noexcept -> _C4_IF_MUTABLE(children_view) { _C4RV(); return children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view children() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view cchildren() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + + /** get an iterable view over all siblings (including the calling node) */ + template<class U=Impl> + C4_ALWAYS_INLINE C4_PURE auto siblings() noexcept -> _C4_IF_MUTABLE(children_view) + { + _C4RV(); + NodeData const *nd = tree__->get(id__); + return (nd->m_parent != NONE) ? // does it have a parent? + children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE)) + : + children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view siblings() const noexcept + { + _C4RV(); + NodeData const *nd = tree_->get(id_); + return (nd->m_parent != NONE) ? // does it have a parent? + const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE)) + : + const_children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view csiblings() const noexcept { return siblings(); } + + /** visit every child node calling fn(node) */ + template<class Visitor> + C4_ALWAYS_INLINE C4_PURE bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node) */ + template<class Visitor, class U=Impl> + auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** visit every child node calling fn(node, level) */ + template<class Visitor> + C4_ALWAYS_INLINE C4_PURE bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit_stacked(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node, level) */ + template<class Visitor, class U=Impl> + auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** @} */ + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + + #undef _C4_IF_MUTABLE + #undef _C4RV + #undef tree_ + #undef tree__ + #undef id_ + #undef id__ + + C4_SUPPRESS_WARNING_GCC_CLANG_POP +}; + +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods<ConstNodeRef, ConstNodeRef> +{ +public: + + using tree_type = Tree const; + +public: + + Tree const* C4_RESTRICT m_tree; + size_t m_id; + + friend NodeRef; + friend struct detail::RoNodeMethods<ConstNodeRef, ConstNodeRef>; + +public: + + /** @name construction */ + /** @{ */ + + ConstNodeRef() : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef(Tree const &t) : m_tree(&t), m_id(t .root_id()) {} + ConstNodeRef(Tree const *t) : m_tree(t ), m_id(t->root_id()) {} + ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {} + ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {} + + ConstNodeRef(ConstNodeRef const&) = default; + ConstNodeRef(ConstNodeRef &&) = default; + + ConstNodeRef(NodeRef const&); + ConstNodeRef(NodeRef &&); + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + ConstNodeRef& operator= (std::nullptr_t) { m_tree = nullptr; m_id = NONE; return *this; } + + ConstNodeRef& operator= (ConstNodeRef const&) = default; + ConstNodeRef& operator= (ConstNodeRef &&) = default; + + ConstNodeRef& operator= (NodeRef const&); + ConstNodeRef& operator= (NodeRef &&); + + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool valid() const noexcept { return m_tree != nullptr && m_id != NONE; } + + /** @} */ + +public: + + /** @name member getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool operator== (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return ! this->operator==(that); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return ! this->operator== (nullptr); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + /** @} */ + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a reference to a node in an existing yaml tree, offering a more + * convenient API than the index-based API used in the tree. */ +class RYML_EXPORT NodeRef : public detail::RoNodeMethods<NodeRef, ConstNodeRef> +{ +public: + + using tree_type = Tree; + using base_type = detail::RoNodeMethods<NodeRef, ConstNodeRef>; + +private: + + Tree *C4_RESTRICT m_tree; + size_t m_id; + + /** This member is used to enable lazy operator[] writing. When a child + * with a key or index is not found, m_id is set to the id of the parent + * and the asked-for key or index are stored in this member until a write + * does happen. Then it is given as key or index for creating the child. + * When a key is used, the csubstr stores it (so the csubstr's string is + * non-null and the csubstr's size is different from NONE). When an index is + * used instead, the csubstr's string is set to null, and only the csubstr's + * size is set to a value different from NONE. Otherwise, when operator[] + * does find the child then this member is empty: the string is null and + * the size is NONE. */ + csubstr m_seed; + + friend ConstNodeRef; + friend struct detail::RoNodeMethods<NodeRef, ConstNodeRef>; + + // require valid: a helper macro, undefined at the end + #define _C4RV() \ + RYML_ASSERT(m_tree != nullptr); \ + _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed()) + +public: + + /** @name construction */ + /** @{ */ + + NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } + NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + NodeRef(NodeRef const&) = default; + NodeRef(NodeRef &&) = default; + + NodeRef& operator= (NodeRef const&) = default; + NodeRef& operator= (NodeRef &&) = default; + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + inline bool valid() const { return m_tree != nullptr && m_id != NONE; } + inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } + + inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (ConstNodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } + inline bool operator!= (std::nullptr_t) const { return m_tree != nullptr && m_id != NONE && !is_seed(); } + + inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + + void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); } + void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); } + void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); } + void set_key_tag(csubstr key_tag) { _C4RV(); m_tree->set_key_tag(m_id, key_tag); } + void set_val_tag(csubstr val_tag) { _C4RV(); m_tree->set_val_tag(m_id, val_tag); } + void set_key_anchor(csubstr key_anchor) { _C4RV(); m_tree->set_key_anchor(m_id, key_anchor); } + void set_val_anchor(csubstr val_anchor) { _C4RV(); m_tree->set_val_anchor(m_id, val_anchor); } + void set_key_ref(csubstr key_ref) { _C4RV(); m_tree->set_key_ref(m_id, key_ref); } + void set_val_ref(csubstr val_ref) { _C4RV(); m_tree->set_val_ref(m_id, val_ref); } + + template<class T> + size_t set_key_serialized(T const& C4_RESTRICT k) + { + _C4RV(); + csubstr s = m_tree->to_arena(k); + m_tree->_set_key(m_id, s); + return s.len; + } + template<class T> + size_t set_val_serialized(T const& C4_RESTRICT v) + { + _C4RV(); + csubstr s = m_tree->to_arena(v); + m_tree->_set_val(m_id, s); + return s.len; + } + size_t set_val_serialized(std::nullptr_t) + { + _C4RV(); + m_tree->_set_val(m_id, csubstr{}); + return 0; + } + + /** encode a blob as base64, then assign the result to the node's key + * @return the size of base64-encoded blob */ + size_t set_key_serialized(fmt::const_base64_wrapper w); + /** encode a blob as base64, then assign the result to the node's val + * @return the size of base64-encoded blob */ + size_t set_val_serialized(fmt::const_base64_wrapper w); + +public: + + inline void clear() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + m_tree->_clear(m_id); + } + + inline void clear_key() + { + if(is_seed()) + return; + m_tree->_clear_key(m_id); + } + + inline void clear_val() + { + if(is_seed()) + return; + m_tree->_clear_val(m_id); + } + + inline void clear_children() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + } + + void create() { _apply_seed(); } + + inline void operator= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator|= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator= (NodeInit const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (NodeScalar const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (std::nullptr_t) + { + _apply_seed(); + _apply(csubstr{}); + } + + inline void operator= (csubstr v) + { + _apply_seed(); + _apply(v); + } + + template<size_t N> + inline void operator= (const char (&v)[N]) + { + _apply_seed(); + csubstr sv; + sv.assign<N>(v); + _apply(sv); + } + + /** @} */ + +public: + + /** @name serialization */ + /** @{ */ + + /** serialize a variable to the arena */ + template<class T> + inline csubstr to_arena(T const& C4_RESTRICT s) + { + _C4RV(); + return m_tree->to_arena(s); + } + + /** serialize a variable, then assign the result to the node's val */ + inline NodeRef& operator<< (csubstr s) + { + // this overload is needed to prevent ambiguity (there's also + // operator<< for writing a substr to a stream) + _apply_seed(); + write(this, s); + RYML_ASSERT(val() == s); + return *this; + } + + template<class T> + inline NodeRef& operator<< (T const& C4_RESTRICT v) + { + _apply_seed(); + write(this, v); + return *this; + } + + /** serialize a variable, then assign the result to the node's key */ + template<class T> + inline NodeRef& operator<< (Key<const T> const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + /** serialize a variable, then assign the result to the node's key */ + template<class T> + inline NodeRef& operator<< (Key<T> const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + NodeRef& operator<< (Key<fmt::const_base64_wrapper> w) + { + set_key_serialized(w.wrapper); + return *this; + } + + NodeRef& operator<< (fmt::const_base64_wrapper w) + { + set_val_serialized(w); + return *this; + } + + /** @} */ + +private: + + void _apply_seed() + { + if(m_seed.str) // we have a seed key: use it to create the new child + { + //RYML_ASSERT(i.key.scalar.empty() || m_key == i.key.scalar || m_key.empty()); + m_id = m_tree->append_child(m_id); + m_tree->_set_key(m_id, m_seed); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else if(m_seed.len != NONE) // we have a seed index: create a child at that position + { + RYML_ASSERT(m_tree->num_children(m_id) == m_seed.len); + m_id = m_tree->append_child(m_id); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else + { + RYML_ASSERT(valid()); + } + } + + inline void _apply(csubstr v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeScalar const& v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeInit const& i) + { + m_tree->_set(m_id, i); + } + +public: + + /** @name modification of hierarchy */ + /** @{ */ + + inline NodeRef insert_child(NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_child(NodeInit const& i, NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + return r; + } + + inline NodeRef prepend_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + r._apply(i); + return r; + } + + inline NodeRef append_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + return r; + } + + inline NodeRef append_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + r._apply(i); + return r; + } + +public: + + inline NodeRef insert_sibling(ConstNodeRef const& after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + return r; + } + + inline NodeRef prepend_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + r._apply(i); + return r; + } + + inline NodeRef append_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + return r; + } + + inline NodeRef append_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + r._apply(i); + return r; + } + +public: + + inline void remove_child(NodeRef & child) + { + _C4RV(); + RYML_ASSERT(has_child(child)); + RYML_ASSERT(child.parent().id() == id()); + m_tree->remove(child.id()); + child.clear(); + } + + //! remove the nth child of this node + inline void remove_child(size_t pos) + { + _C4RV(); + RYML_ASSERT(pos >= 0 && pos < num_children()); + size_t child = m_tree->child(m_id, pos); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + + //! remove a child by name + inline void remove_child(csubstr key) + { + _C4RV(); + size_t child = m_tree->find_child(m_id, key); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + +public: + + /** change the node's position within its parent, placing it after + * @p after. To move to the first position in the parent, simply + * pass an empty or default-constructed reference like this: + * `n.move({})`. */ + inline void move(ConstNodeRef const& after) + { + _C4RV(); + m_tree->move(m_id, after.m_id); + } + + /** move the node to a different @p parent (which may belong to a + * different tree), placing it after @p after. When the + * destination parent is in a new tree, then this node's tree + * pointer is reset to the tree of the parent node. */ + inline void move(NodeRef const& parent, ConstNodeRef const& after) + { + _C4RV(); + if(parent.m_tree == m_tree) + { + m_tree->move(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->move(m_tree, m_id, parent.m_id, after.m_id); + m_tree = parent.m_tree; + } + } + + /** duplicate the current node somewhere within its parent, and + * place it after the node @p after. To place into the first + * position of the parent, simply pass an empty or + * default-constructed reference like this: `n.move({})`. */ + inline NodeRef duplicate(ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(m_tree == after.m_tree || after.m_id == NONE); + size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); + NodeRef r(m_tree, dup); + return r; + } + + /** duplicate the current node somewhere into a different @p parent + * (possibly from a different tree), and place it after the node + * @p after. To place into the first position of the parent, + * simply pass an empty or default-constructed reference like + * this: `n.move({})`. */ + inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree || after.m_id == NONE); + if(parent.m_tree == m_tree) + { + size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); + NodeRef r(m_tree, dup); + return r; + } + else + { + size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); + NodeRef r(parent.m_tree, dup); + return r; + } + } + + inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree); + if(parent.m_tree == m_tree) + { + m_tree->duplicate_children(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->duplicate_children(m_tree, m_id, parent.m_id, after.m_id); + } + } + + /** @} */ + +#undef _C4RV +}; + + +//----------------------------------------------------------------------------- + +inline ConstNodeRef::ConstNodeRef(NodeRef const& that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} + +inline ConstNodeRef::ConstNodeRef(NodeRef && that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} + + +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} + +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} + + +//----------------------------------------------------------------------------- + +template<class T> +inline void write(NodeRef *n, T const& v) +{ + n->set_val_serialized(v); +} + +template<class T> +typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars(n.val(), v); +} +template<class T> +typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type +inline read(ConstNodeRef const& n, T *v) +{ + return from_chars(n.val(), v); +} + +template<class T> +typename std::enable_if<std::is_floating_point<T>::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars_float(n.val(), v); +} +template<class T> +typename std::enable_if<std::is_floating_point<T>::value, bool>::type +inline read(ConstNodeRef const& n, T *v) +{ + return from_chars_float(n.val(), v); +} + + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_YML_NODE_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/parse.cpp b/thirdparty/ryml/src/c4/yml/parse.cpp new file mode 100644 index 000000000..7b038e672 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/parse.cpp @@ -0,0 +1,5724 @@ +#include "c4/yml/parse.hpp" +#include "c4/error.hpp" +#include "c4/utf.hpp" +#include <c4/dump.hpp> + +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> + +#include "c4/yml/detail/parser_dbg.hpp" +#ifdef RYML_DBG +#include "c4/yml/detail/print.hpp" +#endif + +#ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 +#endif + +//#define RYML_WITH_TAB_TOKENS +#ifdef RYML_WITH_TAB_TOKENS +#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with +#else +#define _RYML_WITH_TAB_TOKENS(...) +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without +#endif + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wduplicated-branches" +# endif +#endif + +namespace c4 { +namespace yml { + +namespace { + +template<class DumpFn, class ...Args> +void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args) +{ + char writebuf[256]; + auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward<Args>(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...); + } + } +} + +bool _is_scalar_next__runk(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'")); +} + +bool _is_scalar_next__rseq_rval(csubstr s) +{ + return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-"); +} + +bool _is_scalar_next__rmap(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t"))); +} + +bool _is_scalar_next__rmap_val(csubstr s) +{ + return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-"); +} + +bool _is_doc_sep(csubstr s) +{ + constexpr const csubstr dashes = "---"; + constexpr const csubstr ellipsis = "..."; + constexpr const csubstr whitesp = " \t"; + if(s.begins_with(dashes)) + return s == dashes || s.sub(3).begins_with_any(whitesp); + else if(s.begins_with(ellipsis)) + return s == ellipsis || s.sub(3).begins_with_any(whitesp); + return false; +} + +/** @p i is set to the first non whitespace character after the line + * @return the number of empty lines after the initial position */ +size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + { + ++numnl_following; + if(indentation) // skip the indentation after the newline + { + size_t stop = *i + indentation; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] != ' ' && r.str[*i] != '\r') + break; + RYML_ASSERT(*i < stop); + } + C4_UNUSED(stop); + } + } + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace + ; + else + break; + } + return numnl_following; +} + +} // anon namespace + + +//----------------------------------------------------------------------------- + +Parser::~Parser() +{ + _free(); + _clr(); +} + +Parser::Parser(Callbacks const& cb, ParserOptions opts) + : m_options(opts) + , m_file() + , m_buf() + , m_root_id(NONE) + , m_tree() + , m_stack(cb) + , m_state() + , m_key_tag_indentation(0) + , m_key_tag2_indentation(0) + , m_key_tag() + , m_key_tag2() + , m_val_tag_indentation(0) + , m_val_tag() + , m_key_anchor_was_before(false) + , m_key_anchor_indentation(0) + , m_key_anchor() + , m_val_anchor_indentation(0) + , m_val_anchor() + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size(0) + , m_newline_offsets_capacity(0) + , m_newline_offsets_buf() +{ + m_stack.push(State{}); + m_state = &m_stack.top(); +} + +Parser::Parser(Parser &&that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(std::move(that.m_stack)) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena(that.m_filter_arena) + , m_newline_offsets(that.m_newline_offsets) + , m_newline_offsets_size(that.m_newline_offsets_size) + , m_newline_offsets_capacity(that.m_newline_offsets_capacity) + , m_newline_offsets_buf(that.m_newline_offsets_buf) +{ + that._clr(); +} + +Parser::Parser(Parser const& that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(that.m_stack) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size() + , m_newline_offsets_capacity() + , m_newline_offsets_buf() +{ + if(that.m_newline_offsets_capacity) + { + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + } + if(that.m_filter_arena.len) + { + _resize_filter_arena(that.m_filter_arena.len); + } +} + +Parser& Parser::operator=(Parser &&that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = std::move(that.m_stack); + m_state = (&m_stack.top()); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + m_filter_arena = that.m_filter_arena; + m_newline_offsets = (that.m_newline_offsets); + m_newline_offsets_size = (that.m_newline_offsets_size); + m_newline_offsets_capacity = (that.m_newline_offsets_capacity); + m_newline_offsets_buf = (that.m_newline_offsets_buf); + that._clr(); + return *this; +} + +Parser& Parser::operator=(Parser const& that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = that.m_stack; + m_state = &m_stack.top(); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + if(that.m_filter_arena.len > 0) + _resize_filter_arena(that.m_filter_arena.len); + if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + m_newline_offsets_buf = that.m_newline_offsets_buf; + return *this; +} + +void Parser::_clr() +{ + m_options = {}; + m_file = {}; + m_buf = {}; + m_root_id = {}; + m_tree = {}; + m_stack.clear(); + m_state = {}; + m_key_tag_indentation = {}; + m_key_tag2_indentation = {}; + m_key_tag = {}; + m_key_tag2 = {}; + m_val_tag_indentation = {}; + m_val_tag = {}; + m_key_anchor_was_before = {}; + m_key_anchor_indentation = {}; + m_key_anchor = {}; + m_val_anchor_indentation = {}; + m_val_anchor = {}; + m_filter_arena = {}; + m_newline_offsets = {}; + m_newline_offsets_size = {}; + m_newline_offsets_capacity = {}; + m_newline_offsets_buf = {}; +} + +void Parser::_free() +{ + if(m_newline_offsets) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = nullptr; + m_newline_offsets_size = 0u; + m_newline_offsets_capacity = 0u; + m_newline_offsets_buf = 0u; + } + if(m_filter_arena.len) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + m_filter_arena = {}; + } + m_stack._free(); +} + + +//----------------------------------------------------------------------------- +void Parser::_reset() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1); + m_stack.clear(); + m_stack.push({}); + m_state = &m_stack.top(); + m_state->reset(m_file.str, m_root_id); + + m_key_tag_indentation = 0; + m_key_tag2_indentation = 0; + m_key_tag.clear(); + m_key_tag2.clear(); + m_val_tag_indentation = 0; + m_val_tag.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + m_key_anchor.clear(); + m_val_anchor_indentation = 0; + m_val_anchor.clear(); + + if(m_options.locations()) + { + _prepare_locations(); + } +} + +//----------------------------------------------------------------------------- +template<class DumpFn> +void Parser::_fmt_msg(DumpFn &&dumpfn) const +{ + auto const& lc = m_state->line_contents; + csubstr contents = lc.stripped; + if(contents.len) + { + // print the yaml src line + size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col); + if(m_file.len) + { + _parse_dump(dumpfn, "{}:", m_file); + offs += m_file.len + 1; + } + _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col); + csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); + csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); + _parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + // highlight the remaining portion of the previous line + size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); + size_t lastcol = firstcol + lc.rem.len; + for(size_t i = 0; i < offs + firstcol; ++i) + dumpfn(" "); + dumpfn("^"); + for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) + dumpfn("~"); + _parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + } + else + { + dumpfn("\n"); + } + +#ifdef RYML_DBG + // next line: print the state flags + { + char flagbuf_[64]; + _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags)); + } +#endif +} + + +//----------------------------------------------------------------------------- +template<class ...Args> +void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + char errmsg[RYML_ERRMSG_SIZE]; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + _parse_dump(dumpfn, fmt, args...); + writer.append('\n'); + _fmt_msg(dumpfn); + size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data); +} + +//----------------------------------------------------------------------------- +#ifdef RYML_DBG +template<class ...Args> +void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); }; + _parse_dump(dumpfn, fmt, args...); + dumpfn("\n"); + _fmt_msg(dumpfn); +} +#endif + +//----------------------------------------------------------------------------- +bool Parser::_finished_file() const +{ + bool ret = m_state->pos.offset >= m_buf.len; + if(ret) + { + _c4dbgp("finished file!!!"); + } + return ret; +} + +//----------------------------------------------------------------------------- +bool Parser::_finished_line() const +{ + return m_state->line_contents.rem.empty(); +} + +//----------------------------------------------------------------------------- +void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id) +{ + m_file = file; + m_buf = buf; + m_root_id = node_id; + m_tree = t; + _reset(); + while( ! _finished_file()) + { + _scan_line(); + while( ! _finished_line()) + _handle_line(); + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); + } + _handle_finished_file(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_finished_file() +{ + _end_stream(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_line() +{ + _c4dbgq("\n-----------"); + _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty()); + if(has_any(RSEQ)) + { + if(has_any(FLOW)) + { + if(_handle_seq_flow()) + return; + } + else + { + if(_handle_seq_blck()) + return; + } + } + else if(has_any(RMAP)) + { + if(has_any(FLOW)) + { + if(_handle_map_flow()) + return; + } + else + { + if(_handle_map_blck()) + return; + } + } + else if(has_any(RUNK)) + { + if(_handle_unk()) + return; + } + + if(_handle_top()) + return; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_unk() +{ + _c4dbgp("handle_unk"); + + csubstr rem = m_state->line_contents.rem; + const bool start_as_child = (node(m_state) == nullptr); + + if(C4_UNLIKELY(has_any(NDOC))) + { + if(rem == "---" || rem.begins_with("--- ")) + { + _start_new_doc(rem); + return true; + } + auto trimmed = rem.triml(' '); + if(trimmed == "---" || trimmed.begins_with("--- ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len); + _line_progressed(rem.len - trimmed.len); + _start_new_doc(trimmed); + _save_indentation(); + return true; + } + else if(trimmed.begins_with("...")) + { + _end_stream(); + } + else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag + { + _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem); + size_t indref = m_state->indref; + _push_level(); + _start_doc(); + _set_indentation(indref); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty()); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + if(m_state->indref > 0) + { + csubstr ws = rem.left_of(rem.first_not_of(' ')); + if(m_state->indref <= ws.len) + { + _c4dbgpf("skipping base indentation of {}", m_state->indref); + _line_progressed(m_state->indref); + rem = rem.sub(m_state->indref); + } + } + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgpf("it's a seq, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_seq(start_as_child); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgpf("it's a map, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_map(start_as_child); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + addrem_flags(RKEY|QMRK, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(": ") && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == ':' && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(!rem.begins_with('*') && _handle_key_anchors_and_refs()) + { + return true; + } + else if(has_all(SSCL)) + { + _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar); + + csubstr saved_scalar; + bool is_quoted; + if(_scan_scalar_unk(&saved_scalar, &is_quoted)) + { + rem = m_state->line_contents.rem; + _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); + if(rem.begins_with_any(" \t")) + { + size_t n = rem.first_not_of(" \t"); + _c4dbgpf("skipping {} spaces/tabs", n); + rem = rem.sub(n); + _line_progressed(n); + } + } + + _c4dbgpf("rem='{}'", rem); + + if(rem.begins_with(", ")) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(2); + } + else if(rem.begins_with(',')) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(1); + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child); + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(2); + } + else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'")) + { + if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); } + else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); } + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(1); // advance only 1 + } + else if(rem.begins_with('}')) + { + if(!has_all(RMAP|FLOW)) + { + _c4err("invalid token: not reading a map"); + } + if(!has_all(SSCL)) + { + _c4err("no scalar stored"); + } + _append_key_val(saved_scalar); + _stop_map(); + _line_progressed(1); + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + } + else if(rem.begins_with('#')) + { + _c4dbgpf("it's a comment: '{}'", rem); + _scan_comment(); + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(" ") || rem.begins_with("\t")) + { + size_t n = rem.first_not_of(" \t"); + if(n == npos) + n = rem.len; + _c4dbgpf("has {} spaces/tabs, skip...", n); + _line_progressed(n); + return true; + } + else if(rem.empty()) + { + // nothing to do + } + else if(rem == "---" || rem.begins_with("--- ")) + { + _c4dbgp("caught ---: starting doc"); + _start_new_doc(rem); + return true; + } + else if(rem.begins_with('%')) + { + _c4dbgp("caught a directive: ignoring..."); + _line_progressed(rem.len); + return true; + } + else + { + _c4err("parse error"); + } + + if( ! saved_scalar.empty()) + { + _store_scalar(saved_scalar, is_quoted); + } + + return true; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL)); + csubstr scalar; + size_t indentation = m_state->line_contents.indentation; // save + bool is_quoted; + if(_scan_scalar_unk(&scalar, &is_quoted)) + { + _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); + rem = m_state->line_contents.rem; + { + size_t first = rem.first_not_of(" \t"); + if(first && first != npos) + { + _c4dbgpf("skip {} whitespace characters", first); + _line_progressed(first); + rem = rem.sub(first); + } + } + _store_scalar(scalar, is_quoted); + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(2); // call this AFTER saving the indentation + } + else if(rem == ":") + { + _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(1); // call this AFTER saving the indentation + } + else + { + // we still don't know whether it's a seq or a map + // so just store the scalar + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + csubstr ws = rem.left_of(rem.first_not_of(" \t")); + rem = rem.right_of(ws); + if(has_all(RTOP) && rem.begins_with("---")) + { + _c4dbgp("there's a doc starting, and it's indented"); + _set_indentation(ws.len); + } + _c4dbgpf("skipping {} spaces/tabs", ws.len); + _line_progressed(ws.len); + return true; + } + } + + return false; +} + + +//----------------------------------------------------------------------------- +C4_ALWAYS_INLINE void Parser::_skipchars(char c) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c)); + size_t pos = m_state->line_contents.rem.first_not_of(c); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} '{}'", pos, c); + _line_progressed(pos); +} + +template<size_t N> +C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N]) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars)); + size_t pos = m_state->line_contents.rem.first_not_of(chars); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} characters", pos); + _line_progressed(pos); +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_flow() +{ + _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with(']')) + { + _c4dbgp("end the sequence"); + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + bool is_quoted; + if(_scan_scalar_seq_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL); + _append_val(rem, is_quoted); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == ""); + addrem_flags(QMRK|RKEY, RVAL|SSCL); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(", ")) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('\t')) + { + _skipchars('\t'); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + if(rem.begins_with(", ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else + { + _c4err("was expecting a comma"); + } + } + else + { + _c4err("internal error"); + } + + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_blck() +{ + _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + if(_handle_indentation()) + return true; + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(" \t"); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("got document start '---'"); + _start_new_doc(rem); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + // there can be empty values + if(_handle_indentation()) + return true; + + csubstr s; + bool is_quoted; + if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' '))) + { + _c4dbgp("skipping whitespace..."); + size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(skip == csubstr::npos) + skip = rem.len; // maybe the line is just whitespace + _line_progressed(skip); + rem = rem.sub(skip); + } + + _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem); + if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + if(m_key_anchor.empty()) + _move_val_anchor_to_key_anchor(); + if(m_key_tag.empty()) + _move_val_tag_to_key_tag(); + addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _start_map(); + _store_scalar(s, is_quoted); + if( ! _maybe_set_indentation_from_anchor_or_tag()) + { + _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col); + _set_indentation(m_state->scalar_col); // this is the column where the scalar starts + } + _move_key_tag2_to_key_tag(); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending val to current seq"); + _append_val(s, is_quoted); + addrem_flags(RNXT, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(2); + return true; + } + else if(rem == '-') + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgp("val is a child map + this key is complex"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_map(); + addrem_flags(QMRK|RKEY, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + /* pathological case: + * - &key : val + * - &key : + * - : val + */ + else if((!has_all(SSCL)) && + (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":")) + { + if(!m_val_anchor.empty() || !m_val_tag.empty()) + { + _c4dbgp("val is a child map + this key is empty, with anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + _move_val_tag_to_key_tag(); + _move_val_anchor_to_key_anchor(); + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + else + { + _c4dbgp("val is a child map + this key is empty, no anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + size_t ind = m_state->indref; + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _c4dbgpf("set indentation from map anchor: {}", ind + 2); + _set_indentation(ind + 2); // this is the column where the map starts + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + } + else + { + _c4err("parse error"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- + +bool Parser::_rval_dash_start_or_continue_seq() +{ + size_t ind = m_state->line_contents.current_col(); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref); + size_t delta_ind = ind - m_state->indref; + if( ! delta_ind) + { + _c4dbgp("prev val was empty"); + addrem_flags(RNXT, RVAL); + _append_val_null(&m_state->line_contents.full[ind]); + return false; + } + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_seq(); + _save_indentation(); + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_flow() +{ + // explicit flow, ie, inside {}, separated by commas + _c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("end the map"); + if(has_all(SSCL)) + { + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + } + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP)); + + if(rem.begins_with(", ")) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + bool is_quoted; + if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + _store_scalar(rem, is_quoted); + rem = m_state->line_contents.rem; + csubstr trimmed = rem.triml(" \t"); + if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str); + size_t num = static_cast<size_t>(trimmed.str - rem.str); + _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num)); + rem = rem.sub(num); + _line_progressed(num); + } + } + + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(2); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem == ':') + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('?')) + { + _c4dbgp("complex key"); + add_flags(QMRK); + _line_progressed(1); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("prev scalar was a key with null value"); + _append_key_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("map terminates after a key..."); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + _pop_level(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem == "") + { + return true; + } + else + { + size_t pos = rem.first_not_of(" \t"); + if(pos == csubstr::npos) + pos = 0; + rem = rem.sub(pos); + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(pos + 1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + _line_progressed(pos); + rem = _scan_comment(); // also progresses the line + return true; + } + else + { + _c4err("parse error"); + } + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + bool is_quoted; + if(_scan_scalar_map_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL|RKEY); + _append_key_val(rem, is_quoted); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RNXT|RVAL); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("appending empty val"); + _append_key_val_null(rem.str - 1); + addrem_flags(RKEY, RVAL); + _line_progressed(1); + if(has_any(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(has_any(RSEQIMAP) && rem.begins_with(']')) + { + _c4dbgp("stopping implicitly nested 1x map"); + if(has_any(SSCL)) + { + _append_key_val_null(rem.str - 1); + } + _stop_seqimap(); + _pop_level(); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_blck() +{ + _c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + // actually, we don't need RNXT in indent-based maps. + addrem_flags(RKEY, RNXT); + } + + if(_handle_indentation()) + { + _c4dbgp("indentation token"); + return true; + } + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + _c4dbgp("RMAP|RKEY read scalar?"); + bool is_quoted; + if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + if(has_all(QMRK|SSCL)) + { + _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar); + _append_key_val_null(rem.str - 1); + } + _store_scalar(rem, is_quoted); + if(has_all(QMRK|RSET)) + { + _c4dbgp("it's a complex key, so use null value '~'"); + _append_key_val_null(rem.str); + } + rem = m_state->line_contents.rem; + + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + rem = rem.left_of(rem.first_not_of(" \t")); + _c4dbgpf("skip {} spaces/tabs", rem.len); + _line_progressed(rem.len); + } + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + size_t pos = rem.first_not_of(" \t"); + if(pos == npos) + pos = rem.len; + _c4dbgpf("skip {} spaces/tabs", pos); + _line_progressed(pos); + return true; + } + else if(rem == '?' || rem.begins_with("? ")) + { + _c4dbgp("it's a complex key"); + _line_progressed(rem.begins_with("? ") ? 2u : 1u); + if(has_any(SSCL)) + _append_key_val_null(rem.str - 1); + add_flags(QMRK); + return true; + } + else if(has_all(QMRK) && rem.begins_with(':')) + { + _c4dbgp("complex key finished"); + if(!has_any(SSCL)) + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with(' ')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(' '); + } + return true; + } + else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("key finished"); + if(!has_all(SSCL)) + { + _c4dbgp("key was empty..."); + _store_scalar_null(rem.str); + rem_flags(QMRK); + } + addrem_flags(RVAL, RKEY); + _line_progressed(rem == ':' ? 1 : 2); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("start new document '---'"); + _start_new_doc(rem); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + + _c4dbgp("RMAP|RVAL read scalar?"); + csubstr s; + bool is_quoted; + if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + + if(rem.begins_with(": ")) + { + _c4dbgp("actually, the scalar is the first key of a map"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(m_state->scalar_col); + addrem_flags(RVAL, RKEY); + _line_progressed(2); + } + else if(rem.begins_with(':')) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(/*behind*/s.len); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending keyval to current map"); + _append_key_val(s, is_quoted); + addrem_flags(RKEY, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(); + _move_scalar_from_top(); + _start_seq(); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("maybe a seq. start unknown, indented"); + _start_unk(); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t")) + { + _start_new_doc(rem); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_top() +{ + _c4dbgp("handle_top"); + csubstr rem = m_state->line_contents.rem; + + if(rem.begins_with('#')) + { + _c4dbgp("a comment line"); + _scan_comment(); + return true; + } + + csubstr trimmed = rem.triml(' '); + + if(trimmed.begins_with('%')) + { + _handle_directive(trimmed); + _line_progressed(rem.len); + return true; + } + else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t")) + { + _start_new_doc(rem); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + _save_indentation(); + } + return true; + } + else if(trimmed.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + } + _line_progressed(3); + return true; + } + else + { + _c4err("parse error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_key_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + _c4dbgp("found a key anchor!!!"); + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this anchor is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + return true; + } + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _move_key_anchor_to_val_anchor(); + _c4dbgpf("key anchor value: '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +bool Parser::_handle_val_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem)); + if(m_val_anchor.empty()) + { + _c4dbgpf("save val anchor: '{}'", anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor); + if(m_tree->is_seq(m_state->node_id)) + { + if(m_tree->has_children(m_state->node_id)) + { + _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("current node={} is a seq, has no children", m_state->node_id); + if(m_tree->has_val_anchor(m_state->node_id)) + { + _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + } + } + } + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +void Parser::_move_key_anchor_to_val_anchor() +{ + if(m_key_anchor.empty()) + return; + _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor); + if(!m_val_anchor.empty()) + _c4err("triple-pending anchor"); + m_val_anchor = m_key_anchor; + m_val_anchor_indentation = m_key_anchor_indentation; + m_key_anchor = {}; + m_key_anchor_indentation = {}; +} + +void Parser::_move_val_anchor_to_key_anchor() +{ + if(m_val_anchor.empty()) + return; + if(!_token_is_from_this_line(m_val_anchor)) + return; + _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor); + if(!m_key_anchor.empty()) + _c4err("triple-pending anchor"); + m_key_anchor = m_val_anchor; + m_key_anchor_indentation = m_val_anchor_indentation; + m_val_anchor = {}; + m_val_anchor_indentation = {}; +} + +void Parser::_move_key_tag_to_val_tag() +{ + if(m_key_tag.empty()) + return; + _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag); + m_val_tag = m_key_tag; + m_val_tag_indentation = m_key_tag_indentation; + m_key_tag.clear(); + m_key_tag_indentation = 0; +} + +void Parser::_move_val_tag_to_key_tag() +{ + if(m_val_tag.empty()) + return; + if(!_token_is_from_this_line(m_val_tag)) + return; + _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag); + m_key_tag = m_val_tag; + m_key_tag_indentation = m_val_tag_indentation; + m_val_tag.clear(); + m_val_tag_indentation = 0; +} + +void Parser::_move_key_tag2_to_key_tag() +{ + if(m_key_tag2.empty()) + return; + _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2); + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_types() +{ + csubstr rem = m_state->line_contents.rem.triml(' '); + csubstr t; + + if(rem.begins_with("!!")) + { + _c4dbgp("begins with '!!'"); + t = rem.left_of(rem.first_of(" ,")); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2); + if(t == "!!set") + add_flags(RSET); + } + else if(rem.begins_with("!<")) + { + _c4dbgp("begins with '!<'"); + t = rem.left_of(rem.first_of('>'), true); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2, t.len-1); + } + else if(rem.begins_with("!h!")) + { + _c4dbgp("begins with '!h!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3); + //t = t.sub(3); + } + else if(rem.begins_with('!')) + { + _c4dbgp("begins with '!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + //t = t.sub(1); + } + + if(t.empty()) + return false; + + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this tag is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + } + + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + const char *tag_beginning = rem.str; + #endif + size_t tag_indentation = m_state->line_contents.current_col(t); + _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin()); + _line_progressed(static_cast<size_t>(t.end() - m_state->line_contents.rem.begin())); + { + size_t pos = m_state->line_contents.rem.first_not_of(" \t"); + if(pos != csubstr::npos) + _line_progressed(pos); + } + + if(has_all(RMAP|RKEY)) + { + _c4dbgpf("saving map key tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty()); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else if(has_all(RMAP|RVAL)) + { + /* foo: !!str + * !!str : bar */ + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + _c4dbgpf("rem='{}'", rem); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(rem == ':' || rem.begins_with(": ")) + { + _c4dbgp("the last val was null, and this is a tag from a null key"); + _append_key_val_null(tag_beginning - 1); + _store_scalar_null(rem.str - 1); + // do not change the flag to key, it is ~ + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin()); + size_t token_len = rem == ':' ? 1 : 2; + _line_progressed(static_cast<size_t>(token_len + rem.begin() - m_state->line_contents.rem.begin())); + } + #endif + _c4dbgpf("saving map val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC)) + { + if(m_val_tag.empty()) + { + _c4dbgpf("saving seq/doc val tag '{}'", t); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving seq/doc key tag '{}'", t); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + } + else if(has_all(RTOP|RUNK) || has_any(RUNK)) + { + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + if(rem.empty()) + { + _c4dbgpf("saving val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving key tag '{}'", t); + if(m_key_tag.empty()) + { + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else + { + /* handle this case: + * !!str foo: !!map + * !!int 1: !!float 20.0 + * !!int 3: !!float 40.0 + * + * (m_key_tag would be !!str and m_key_tag2 would be !!int) + */ + m_key_tag2 = t; + m_key_tag2_indentation = tag_indentation; + } + } + } + else + { + _c4err("internal error"); + } + + if(m_val_tag.not_empty()) + { + YamlTag_e tag = to_tag(t); + if(tag == TAG_STR) + { + _c4dbgpf("tag '{}' is a str-type tag", t); + if(has_all(RTOP|RUNK|NDOC)) + { + _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset); + csubstr scalar = _slurp_doc_scalar(); + _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar); + m_tree->to_val(m_state->node_id, scalar, DOC); + _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + if(!m_val_anchor.empty()) + { + _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor.clear(); + } + _end_stream(); + } + } + } + return true; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_slurp_doc_scalar() +{ + csubstr s = m_state->line_contents.rem; + size_t pos = m_state->pos.offset; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos); + _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(s.len == 0) + { + _line_ended(); + _scan_line(); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + } + + size_t skipws = s.first_not_of(" \t"); + _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(skipws != npos) + { + _line_progressed(skipws); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty()); + _handle_val_anchors_and_refs(); + if(!m_val_anchor.empty()) + { + s = m_state->line_contents.rem; + skipws = s.first_not_of(" \t"); + if(skipws != npos) + { + _line_progressed(skipws); + } + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + if(s.begins_with('\'')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_squot_scalar(); + } + else if(s.begins_with('"')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_dquot_scalar(); + } + else if(s.begins_with('|') || s.begins_with('>')) + { + return _scan_block(); + } + + _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); + _line_progressed(static_cast<size_t>(s.end() - (m_buf.begin() + pos))); + + _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + if(_at_line_end()) + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _c4dbgp("_scan_scalar_map_blck"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + + csubstr s = m_state->line_contents.rem; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + if(s.len == 0) + return false; + #endif + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + if( ! _is_scalar_next__rmap(s)) + return false; + + size_t colon_token = s.find(": "); + if(colon_token == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_token = s.find(":\t"); + if(colon_token == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; + } + , + // without tab tokens + colon_token = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_token != s.len-1) + colon_token = npos; + ) + } + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _c4dbgp("RMAP|RKEY"); + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + { + return false; + } + else if(s.begins_with("...")) + { + return false; + } + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS( + else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RMAP|RVAL: scalar"); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + return false; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + else if(s.begins_with("...")) + return false; + #endif + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + + if(has_all(RVAL)) + { + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RSEQ|RVAL|FLOW"); + s = s.left_of(s.first_of(",]")); + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + + if( ! _is_scalar_next__rmap(s)) + return false; + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + size_t colon_token = s.find(": "); + if(colon_token == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_token = s.find(":\t"); + if(colon_token == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; + } + , + // without tab tokens + colon_token = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_token != s.len-1) + colon_token = npos; + ) + } + if(s.begins_with("? ") || s == '?') + return false; + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.left_of(s.first_of(':')); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + _c4dbgp("RMAP|RKEY"); + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s); + s = s.left_of(s.first_of(",}")); + if(s.ends_with(':')) + --s.len; + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RMAP|RVAL|FLOW"); + if(has_none(RSEQIMAP)) + s = s.left_of(s.first_of(",}")); + else + s = s.left_of(s.first_of(",]")); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); + if( ! _is_scalar_next__runk(s)) + { + _c4dbgp("RUNK: no scalar next"); + return false; + } + size_t pos = s.find(" #"); + if(pos != npos) + s = s.left_of(pos); + pos = s.find(": "); + if(pos != npos) + s = s.left_of(pos); + else if(s.ends_with(':')) + s = s.left_of(s.len-1); + _RYML_WITH_TAB_TOKENS( + else if((pos = s.find(":\t")) != npos) // TABS + s = s.left_of(pos); + ) + else + s = s.left_of(s.first_of(',')); + s = s.trim(" \t"); + _c4dbgpf("RUNK: scalar='{}'", s); + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + + +//----------------------------------------------------------------------------- + +csubstr Parser::_extend_scanned_scalar(csubstr s) +{ + if(has_all(RMAP|RKEY|QMRK)) + { + size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col; + _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col); + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + substr full = _scan_complex_key(s, n).trimr(" \t\r\n"); + if(full != s) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + // deal with plain (unquoted) scalars that continue to the next line + else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference) + { + _c4dbgpf("extend_scalar: line ended, scalar='{}'", s); + if(has_none(FLOW)) + { + size_t scalar_indentation = m_state->indref + 1; + if(has_all(RUNK) && scalar_indentation == 1) + scalar_indentation = 0; + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n)); + substr full = _scan_plain_scalar_blck(s, n, scalar_indentation); + if(full.len >= s.len) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + csubstr n = _scan_to_next_nonempty_line(/*indentation*/0); + if(!n.empty()) + { + _c4dbgp("rscalar[FLOW]"); + substr full = _scan_plain_scalar_flow(s, n); + s = _filter_plain_scalar(full, /*indentation*/0); + } + } + } + + return s; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) +{ + static constexpr const csubstr chars = "[]{}?#,"; + size_t pos = peeked_line.first_of(chars); + bool first = true; + while(pos != 0) + { + if(has_all(RMAP|RKEY) || has_any(RUNK)) + { + csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); + if(tpkl.begins_with(": ") || tpkl == ':') + { + _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line); + peeked_line = peeked_line.first(0); + break; + } + else + { + auto colon_pos = peeked_line.first_of_any(": ", ":"); + if(colon_pos && colon_pos.pos < pos) + { + peeked_line = peeked_line.first(colon_pos.pos); + _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + } + } + if(pos != npos) + { + _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); + peeked_line = peeked_line.left_of(pos); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n")); + if(!first) + { + RYML_CHECK(_advance_to_peeked()); + } + peeked_line = _scan_to_next_nonempty_line(/*indentation*/0); + if(peeked_line.empty()) + { + _c4err("expected token or continuation"); + } + pos = peeked_line.first_of(chars); + first = false; + } + substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset); + full = full.trimr("\n\r "); + return full; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation)); + while(true) + { + _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented? + { + if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank? + { + _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n")); + _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation); + csubstr next_peeked = _scan_to_next_nonempty_line(indentation); + if(next_peeked.empty()) + { + _c4dbgp("rscalar[IMPL]: ... finished."); + break; + } + _c4dbgp("rscalar[IMPL]: ... continuing."); + peeked_line = next_peeked; + } + + _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n")); + size_t token_pos; + if(peeked_line.find(": ") != npos) + { + _line_progressed(peeked_line.find(": ")); + _c4err("': ' is not a valid token in plain flow (unquoted) scalars"); + } + else if(peeked_line.ends_with(':')) + { + _line_progressed(peeked_line.find(':')); + _c4err("lines cannot end with ':' in plain flow (unquoted) scalars"); + } + else if((token_pos = peeked_line.find(" #")) != npos) + { + _line_progressed(token_pos); + break; + //_c4err("' #' is not a valid token in plain flow (unquoted) scalars"); + } + + _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar[IMPL]: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + full = full.trimr("\r\n "); + return full; +} + +substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin()); + while(true) + { + _c4dbgp("rcplxkey: continuing..."); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else + { + size_t pos = peeked_line.first_of("?:[]{}"); + if(pos == csubstr::npos) + { + pos = peeked_line.find("- "); + } + if(pos != csubstr::npos) + { + _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n")); + _line_progressed(pos); + break; + } + } + + _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n")); + csubstr next_peeked = _scan_to_next_nonempty_line(0); + if(next_peeked.empty()) + { + _c4dbgp("rcplxkey: empty ... finished."); + break; + } + _c4dbgp("rcplxkey: ... continuing."); + peeked_line = next_peeked; + + _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n")); + size_t colpos; + if((colpos = peeked_line.find(": ")) != npos) + { + _c4dbgp("rcplxkey: found ': ', stopping."); + _line_progressed(colpos); + break; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if((colpos = peeked_line.ends_with(':'))) + { + _c4dbgp("rcplxkey: ends with ':', stopping."); + _line_progressed(colpos); + break; + } + #endif + _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rcplxkey: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + return full; +} + +//! scans to the next non-blank line starting with the given indentation +csubstr Parser::_scan_to_next_nonempty_line(size_t indentation) +{ + csubstr next_peeked; + while(true) + { + _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation); + next_peeked = _peek_next_line(m_state->pos.offset); + csubstr next_peeked_triml = next_peeked.triml(' '); + _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n")); + if(next_peeked_triml.begins_with('#')) + { + _c4dbgp("rscalar: ... first non-space character is #"); + return {}; + } + else if(next_peeked.begins_with(' ', indentation)) + { + _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation); + _advance_to_peeked(); + return next_peeked; + } + else // check for de-indentation + { + csubstr trimmed = next_peeked_triml.trimr("\t\r\n"); + _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed); + if(!trimmed.empty()) + { + _c4dbgp("rscalar: ... and not empty. bailing out."); + return {}; + } + } + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar: file finished"); + return {}; + } + } + return {}; +} + +// returns false when the file finished +bool Parser::_advance_to_peeked() +{ + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos); + _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len); + _scan_line(); // puts the peeked-at line in the buffer + if(_finished_file()) + { + _c4dbgp("rscalar: finished file!"); + return false; + } + return true; +} + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) +{ + return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); +} + +//! look for the next newline chars, and jump to the right of those +csubstr from_next_line(csubstr rem) +{ + size_t nlpos = rem.first_of("\r\n"); + if(nlpos == csubstr::npos) + return {}; + const char nl = rem[nlpos]; + rem = rem.right_of(nlpos); + if(rem.empty()) + return {}; + if(_extend_from_combined_newline(nl, rem.front())) + rem = rem.sub(1); + return rem; +} + +csubstr Parser::_peek_next_line(size_t pos) const +{ + csubstr rem{}; // declare here because of the goto + size_t nlpos{}; // declare here because of the goto + pos = pos == npos ? m_state->pos.offset : pos; + if(pos >= m_buf.len) + goto next_is_empty; + + // look for the next newline chars, and jump to the right of those + rem = from_next_line(m_buf.sub(pos)); + if(rem.empty()) + goto next_is_empty; + + // now get everything up to and including the following newline chars + nlpos = rem.first_of("\r\n"); + if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) + nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); + rem = rem.left_of(nlpos, /*include_pos*/true); + + _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); + return rem; + +next_is_empty: + _c4dbgpf("peek next line @ {}: (len=0)''", pos); + return {}; +} + + +//----------------------------------------------------------------------------- +void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset) +{ + RYML_ASSERT(offset <= buf.len); + char const* C4_RESTRICT b = &buf[offset]; + char const* C4_RESTRICT e = b; + // get the current line stripped of newline chars + while(e < buf.end() && (*e != '\n' && *e != '\r')) + ++e; + RYML_ASSERT(e >= b); + const csubstr stripped_ = buf.sub(offset, static_cast<size_t>(e - b)); + // advance pos to include the first line ending + if(e != buf.end() && *e == '\r') + ++e; + if(e != buf.end() && *e == '\n') + ++e; + RYML_ASSERT(e >= b); + const csubstr full_ = buf.sub(offset, static_cast<size_t>(e - b)); + reset(full_, stripped_); +} + +void Parser::_scan_line() +{ + if(m_state->pos.offset >= m_buf.len) + { + m_state->line_contents.reset(m_buf.last(0), m_buf.last(0)); + return; + } + m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset); +} + + +//----------------------------------------------------------------------------- +void Parser::_line_progressed(size_t ahead) +{ + _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead); + m_state->pos.offset += ahead; + m_state->pos.col += ahead; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1); + m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead); +} + +void Parser::_line_ended() +{ + _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1); + m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len; + ++m_state->pos.line; + m_state->pos.col = 1; +} + +void Parser::_line_ended_undo() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); + size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len; + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta); + m_state->pos.offset -= delta; + --m_state->pos.line; + m_state->pos.col = m_state->line_contents.stripped.len + 1u; + // don't forget to undo also the changes to the remainder of the line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r'); + m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0); +} + + +//----------------------------------------------------------------------------- +void Parser::_set_indentation(size_t indentation) +{ + m_state->indref = indentation; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +void Parser::_save_indentation(size_t behind) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin()); + m_state->indref = static_cast<size_t>(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref); + m_state->indref -= behind; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +bool Parser::_maybe_set_indentation_from_anchor_or_tag() +{ + if(m_key_anchor.not_empty()) + { + _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation); + _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts + return true; + } + else if(m_key_tag.not_empty()) + { + _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation); + _set_indentation(m_key_tag_indentation); // this is the column where the tag starts + return true; + } + return false; +} + + +//----------------------------------------------------------------------------- +void Parser::_write_key_anchor(size_t node_id) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id)); + if( ! m_key_anchor.empty()) + { + _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor); + m_tree->set_key_anchor(node_id, m_key_anchor); + m_key_anchor.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + } + else if( ! m_tree->is_key_quoted(node_id)) + { + csubstr r = m_tree->key(node_id); + if(r.begins_with('*')) + { + _c4dbgpf("node={}: set key reference: '{}'", node_id, r); + m_tree->set_key_ref(node_id, r.sub(1)); + } + else if(r == "<<") + { + m_tree->set_key_ref(node_id, r); + _c4dbgpf("node={}: it's an inheriting reference", node_id); + if(m_tree->is_seq(node_id)) + { + _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id)); + for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i)) + { + if( ! (m_tree->val(i).begins_with('*'))) + _c4err("malformed reference: '{}'", m_tree->val(i)); + } + } + else if( ! m_tree->val(node_id).begins_with('*')) + { + _c4err("malformed reference: '{}'", m_tree->val(node_id)); + } + //m_tree->set_key_ref(node_id, r); + } + } +} + +//----------------------------------------------------------------------------- +void Parser::_write_val_anchor(size_t node_id) +{ + if( ! m_val_anchor.empty()) + { + _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor); + m_tree->set_val_anchor(node_id, m_val_anchor); + m_val_anchor.clear(); + } + csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : ""; + if(!m_tree->is_val_quoted(node_id) && r.begins_with('*')) + { + _c4dbgpf("node={}: set val reference: '{}'", node_id, r); + RYML_CHECK(!m_tree->has_val_anchor(node_id)); + m_tree->set_val_ref(node_id, r.sub(1)); + } +} + +//----------------------------------------------------------------------------- +void Parser::_push_level(bool explicit_flow_chars) +{ + _c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + if(node(m_state) == nullptr) + { + _c4dbgp("pushing level! actually no, current node is null"); + //_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars); + return; + } + flag_t st = RUNK; + if(explicit_flow_chars || has_all(FLOW)) + { + st |= FLOW; + } + m_stack.push_top(); + m_state = &m_stack.top(); + set_flags(st); + m_state->node_id = (size_t)NONE; + m_state->indref = (size_t)NONE; + ++m_state->level; + _c4dbgpf("pushing level: now, currlevel={}", m_state->level); +} + +void Parser::_pop_level() +{ + _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level); + if(has_any(RMAP) || m_tree->is_map(m_state->node_id)) + { + _stop_map(); + } + if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id)) + { + _stop_seq(); + } + if(m_tree->is_doc(m_state->node_id)) + { + _stop_doc(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); + _prepare_pop(); + m_stack.pop(); + m_state = &m_stack.top(); + /*if(has_any(RMAP)) + { + _toggle_key_val(); + }*/ + if(m_state->line_contents.indentation == 0) + { + //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP)); + add_flags(RTOP); + } + _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level); +} + +//----------------------------------------------------------------------------- +void Parser::_start_unk(bool /*as_child*/) +{ + _c4dbgp("start_unk"); + _push_level(); + _move_scalar_from_top(); +} + +//----------------------------------------------------------------------------- +void Parser::_start_doc(bool as_child) +{ + _c4dbgpf("start_doc (as child={})", as_child); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + _c4dbgpf("start_doc: parent={}", parent_id); + if( ! m_tree->is_stream(parent_id)) + { + _c4dbgp("start_doc: rearranging with root as STREAM"); + m_tree->set_root_as_stream(); + } + m_state->node_id = m_tree->append_child(parent_id); + m_tree->to_doc(m_state->node_id); + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id)); + m_state->node_id = parent_id; + if( ! m_tree->is_doc(parent_id)) + { + m_tree->to_doc(parent_id, DOC); + } + } + #endif + _c4dbgpf("start_doc: id={}", m_state->node_id); + add_flags(RUNK|RTOP|NDOC); + _handle_types(); + rem_flags(NDOC); +} + +void Parser::_stop_doc() +{ + size_t doc_node = m_state->node_id; + _c4dbgpf("stop_doc[{}]", doc_node); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node)); + if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node); + m_tree->to_val(doc_node, {}, DOC); + } +} + +void Parser::_end_stream() +{ + _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty()); + NodeData *added = nullptr; + if(has_any(SSCL)) + { + if(m_tree->is_seq(m_state->node_id)) + { + _c4dbgp("append val..."); + added = _append_val(_consume_scalar()); + } + else if(m_tree->is_map(m_state->node_id)) + { + _c4dbgp("append null key val..."); + added = _append_key_val_null(m_state->line_contents.rem.str); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(has_any(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + #endif + } + else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE) + { + NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar + csubstr scalar = _consume_scalar(); + _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : ""); + m_tree->to_val(m_state->node_id, scalar, DOC|quoted); + added = m_tree->get(m_state->node_id); + } + else + { + _c4err("internal error"); + } + } + else if(has_all(RSEQ|RVAL) && has_none(FLOW)) + { + _c4dbgp("add last..."); + added = _append_val_null(m_state->line_contents.rem.str); + } + else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)) + { + csubstr scalar = m_state->line_contents.rem.first(0); + _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id); + m_tree->to_val(m_state->node_id, scalar, DOC); + added = m_tree->get(m_state->node_id); + } + + if(added) + { + size_t added_id = m_tree->id(added); + if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id)) + { + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor); + m_val_anchor = m_key_anchor; + m_key_anchor = {}; + } + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag); + m_val_tag = m_key_tag; + m_key_tag = {}; + } + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor); + m_tree->set_key_anchor(added_id, m_key_anchor); + m_key_anchor = {}; + } + #endif + if(!m_val_anchor.empty()) + { + _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor); + m_tree->set_val_anchor(added_id, m_val_anchor); + m_val_anchor = {}; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(added_id, normalize_tag(m_key_tag)); + m_key_tag = {}; + } + #endif + if(!m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(added_id, normalize_tag(m_val_tag)); + m_val_tag = {}; + } + } + + while(m_stack.size() > 1) + { + _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size()); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top())); + if(has_all(RSEQ|FLOW)) + _err("closing ] not found"); + _pop_level(); + } + add_flags(NDOC); +} + +void Parser::_start_new_doc(csubstr rem) +{ + _c4dbgp("_start_new_doc"); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---")); + C4_UNUSED(rem); + + _end_stream(); + + size_t indref = m_state->indref; + _c4dbgpf("start a document, indentation={}", indref); + _line_progressed(3); + _push_level(); + _start_doc(); + _set_indentation(indref); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_map(bool as_child) +{ + _c4dbgpf("start_map (as child={})", as_child); + addrem_flags(RMAP|RVAL, RKEY|RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + type_bits key_quoted = NOTYPE; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_map(m_state->node_id, key, key_quoted); + _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + m_tree->to_map(m_state->node_id); + _c4dbgpf("start_map: id={}", m_state->node_id); + } + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + _write_val_anchor(m_state->node_id); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + m_state->node_id = parent_id; + _c4dbgpf("start_map: id={}", m_state->node_id); + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_map(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_map(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + if(m_key_anchor.not_empty()) + m_key_anchor_was_before = true; + _write_val_anchor(parent_id); + if(m_stack.size() >= 2) + { + State const& parent_state = m_stack.top(1); + if(parent_state.flags & RSET) + add_flags(RSET); + } + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_start_map_unk(bool as_child) +{ + if(!m_key_anchor_was_before) + { + _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor); + csubstr ka = m_key_anchor; + m_key_anchor = {}; + _start_map(as_child); + m_key_anchor = ka; + } + else + { + _start_map(as_child); + m_key_anchor_was_before = false; + } + if(m_key_tag2.not_empty()) + { + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; + } +} + +void Parser::_stop_map() +{ + _c4dbgpf("stop_map[{}]", m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + if(has_all(QMRK|RKEY) && !has_all(SSCL)) + { + _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id); + _store_scalar_null(m_state->line_contents.rem.str); + _append_key_val_null(m_state->line_contents.rem.str); + } +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seq(bool as_child) +{ + _c4dbgpf("start_seq (as child={})", as_child); + if(has_all(RTOP|RUNK)) + { + _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag); + m_val_tag = m_key_tag; + m_key_tag.clear(); + } + addrem_flags(RSEQ|RVAL, RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id)); + type_bits key_quoted = 0; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_seq(m_state->node_id, key, key_quoted); + _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + type_bits as_doc = 0; + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id)); + m_tree->to_seq(m_state->node_id, as_doc); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : ""); + } + _write_val_anchor(m_state->node_id); + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + else + { + m_state->node_id = parent_id; + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_seq(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_seq(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : ""); + _write_val_anchor(parent_id); + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_stop_seq() +{ + _c4dbgp("stop_seq"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seqimap() +{ + _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + // create a map, and turn the last scalar of this sequence + // into the key of the map's first child. This scalar was + // understood to be a value in the sequence, but it is + // actually a key of a map, implicitly opened here. + // Eg [val, key: val] + // + // Yep, YAML is crazy. + if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id))) + { + size_t prev = m_tree->last_child(m_state->node_id); + NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes + NodeScalar tmp = m_tree->valsc(prev); + _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted()); + m_tree->remove(prev); + _push_level(); + _start_map(); + _store_scalar(tmp.scalar, ty.is_val_quoted()); + m_key_anchor = tmp.anchor; + m_key_tag = tmp.tag; + } + else + { + _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id); + _push_level(); + _start_map(); + _store_scalar_null(m_state->line_contents.rem.str); + } + add_flags(RSEQIMAP|FLOW); +} + +void Parser::_stop_seqimap() +{ + _c4dbgp("stop_seqimap"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP)); +} + + +//----------------------------------------------------------------------------- +NodeData* Parser::_append_val(csubstr val, flag_t quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); + type_bits additional_flags = quoted ? VALQUO : NOTYPE; + _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_val(nid, val, additional_flags); + + _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar); + if( ! m_val_tag.empty()) + { + _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_val_anchor(nid); + return m_tree->get(nid); +} + +NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + type_bits additional_flags = 0; + if(m_state->flags & QSCL) + additional_flags |= KEYQUO; + if(val_quoted) + additional_flags |= VALQUO; + + csubstr key = _consume_scalar(); + _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_keyval(nid, key, val, additional_flags); + _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid)); + if( ! m_key_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(nid, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_key_anchor(nid); + _write_val_anchor(nid); + rem_flags(QMRK); + return m_tree->get(nid); +} + + +//----------------------------------------------------------------------------- +void Parser::_store_scalar(csubstr s, flag_t is_quoted) +{ + _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')", + m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar); + RYML_CHECK(has_none(SSCL)); + add_flags(SSCL | (is_quoted * QSCL)); + m_state->scalar = s; +} + +csubstr Parser::_consume_scalar() +{ + _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL); + RYML_CHECK(m_state->flags & SSCL); + csubstr s = m_state->scalar; + rem_flags(SSCL | QSCL); + m_state->scalar.clear(); + return s; +} + +void Parser::_move_scalar_from_top() +{ + if(m_stack.size() < 2) return; + State &prev = m_stack.top(1); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev); + if(prev.flags & SSCL) + { + _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar); + add_flags(prev.flags & (SSCL | QSCL)); + m_state->scalar = prev.scalar; + rem_flags(SSCL | QSCL, &prev); + prev.scalar.clear(); + } +} + +//----------------------------------------------------------------------------- +/** @todo this function is a monster and needs love. Likely, it needs + * to be split like _scan_scalar_*() */ +bool Parser::_handle_indentation() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + if( ! _at_line_begin()) + return false; + + size_t ind = m_state->line_contents.indentation; + csubstr rem = m_state->line_contents.rem; + /** @todo instead of trimming, we should use the indentation index from above */ + csubstr remt = rem.triml(' '); + + if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line + { + _line_progressed(rem.size()); + return true; + } + + _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); + if(ind == m_state->indref) + { + _c4dbgpf("same indentation: {}", ind); + if(!rem.sub(ind).begins_with('-')) + { + _c4dbgp("does not begin with -"); + if(has_any(RMAP)) + { + if(has_all(SSCL|RVAL)) + { + _c4dbgp("add with null val"); + _append_key_val_null(rem.str + ind - 1); + addrem_flags(RKEY, RVAL); + } + } + else if(has_any(RSEQ)) + { + if(m_stack.size() > 2) // do not pop to root level + { + if(has_any(RNXT)) + { + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + else if(has_any(RVAL)) + { + _c4dbgp("add with null val"); + _append_val_null(rem.str); + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + } + } + } + _line_progressed(ind); + return ind > 0; + } + else if(ind < m_state->indref) + { + _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref); + if(has_all(RVAL)) + { + _c4dbgp("there was an empty val -- appending"); + if(has_all(RMAP)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _append_key_val_null(rem.sub(ind).str - 1); + } + else if(has_all(RSEQ)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _append_val_null(rem.sub(ind).str - 1); + } + } + // search the stack frame to jump to based on its indentation + State const* popto = nullptr; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous + for(State const* s = m_state-1; s >= m_stack.begin(); --s) + { + _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); + if(s->indref == ind) + { + _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); + popto = s; + // while it may be tempting to think we're done at this + // point, we must still determine whether we're jumping to a + // parent with the same indentation. Consider this case with + // an indentless sequence: + // + // product: + // - sku: BL394D + // quantity: 4 + // description: Basketball + // price: 450.00 + // - sku: BL4438H + // quantity: 1 + // description: Super Hoop + // price: 2392.00 # jumping one level here would be wrong. + // tax: 1234.5 # we must jump two levels + if(popto > m_stack.begin()) + { + auto parent = popto - 1; + if(parent->indref == popto->indref) + { + _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref); + _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id)); + if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id)) + { + if( ! remt.begins_with('-')) + { + _c4dbgp("this is an indentless sequence"); + popto = parent; + } + else + { + _c4dbgp("not an indentless sequence"); + } + } + } + } + break; + } + } + if(!popto || popto >= m_state || popto->level >= m_state->level) + { + _c4err("parse error: incorrect indentation?"); + } + _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level); + while(m_state != popto) + { + _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref); + _pop_level(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref); + _line_progressed(ind); + return true; + } + else + { + _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref); + if(has_all(RMAP|RVAL)) + { + if(_is_scalar_next__rmap_val(remt) && remt.first_of(":?") == npos) + { + _c4dbgpf("actually it seems a value: '{}'", remt); + } + else + { + addrem_flags(RKEY, RVAL); + _start_unk(); + //_move_scalar_from_top(); + _line_progressed(ind); + _save_indentation(); + return true; + } + } + else if(has_all(RSEQ|RVAL)) + { + // nothing to do here + } + else + { + _c4err("parse error - indentation should not increase at this point"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_comment() +{ + csubstr s = m_state->line_contents.rem; + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#')); + _line_progressed(s.len); + // skip the # character + s = s.sub(1); + // skip leading whitespace + s = s.right_of(s.first_not_of(' '), /*include_pos*/true); + _c4dbgpf("comment was '{}'", s); + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_squot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\'')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr == '\'') // single quotes are escaped with two single quotes + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + if(next != '\'') // so just look for the first quote + { // without another after it + pos = i; + break; + } + else + { + needs_filter = true; // needs filter to remove escaped quotes + ++i; // skip the escaped quote + } + } + else if(curr != ' ') + { + line_is_blank = false; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\''); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file while looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_squot_scalar(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + _c4dbgpf("final scalar: \"{}\"", ret); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_dquot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr != ' ') + line_is_blank = false; + // every \ is an escape + if(curr == '\\') + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + needs_filter = true; + if(next == '"' || next == '\\') + ++i; + } + else if(curr == '"') + { + pos = i; + break; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"'); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_dquot_scalar(s); + _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_block() +{ + // nice explanation here: http://yaml-multiline.info/ + csubstr s = m_state->line_contents.rem; + csubstr trimmed = s.triml(' '); + if(trimmed.str > s.str) + { + _c4dbgp("skipping whitespace"); + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str); + _line_progressed(static_cast<size_t>(trimmed.str - s.str)); + s = trimmed; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); + + _c4dbgpf("scanning block: specs=\"{}\"", s); + + // parse the spec + BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL; + BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used + size_t indentation = npos; // have to find out if no spec is given + csubstr digits; + if(s.len > 1) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>")); + csubstr t = s.sub(1); + _c4dbgpf("scanning block: spec is multichar: '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + size_t pos = t.first_of("-+"); + _c4dbgpf("scanning block: spec chomp char at {}", pos); + if(pos != npos) + { + if(t[pos] == '-') + chomp = CHOMP_STRIP; + else if(t[pos] == '+') + chomp = CHOMP_KEEP; + if(pos == 0) + t = t.sub(1); + else + t = t.first(pos); + } + // from here to the end, only digits are considered + digits = t.left_of(t.first_not_of("0123456789")); + if( ! digits.empty()) + { + if( ! c4::atou(digits, &indentation)) + _c4err("parse error: could not read decimal"); + _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref); + indentation += m_state->indref; + } + } + + // finish the current line + _line_progressed(s.len); + _line_ended(); + _scan_line(); + + _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + + // start with a zero-length block, already pointing at the right place + substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin()); + + // read every full line into a raw block, + // from which newlines are to be stripped as needed. + // + // If no explicit indentation was given, pick it from the first + // non-empty line. See + // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator + size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos; + LineContents lc; + while(( ! _finished_file())) + { + // peek next line, but do not advance immediately + lc.reset_with_next_line(m_buf, m_state->pos.offset); + _c4dbgpf("scanning block: peeking at '{}'", lc.stripped); + // evaluate termination conditions + if(indentation != npos) + { + // stop when the line is deindented and not empty + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) + { + _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation); + break; + } + else if(indentation == 0) + { + if((lc.rem == "..." || lc.rem.begins_with("... ")) + || + (lc.rem == "---" || lc.rem.begins_with("--- "))) + { + _c4dbgp("scanning block: stop. indentation=0 and stream ended"); + break; + } + } + } + else + { + _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' ')); + if(lc.stripped.first_not_of(' ') != npos) // non-empty line + { + _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); + if(provisional_indentation == npos) + { + if(lc.indentation < m_state->indref) + { + _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); + if(raw_block.len == 0) + { + _c4dbgp("scanning block: was empty, undo next line"); + _line_ended_undo(); + } + break; + } + else if(lc.indentation == m_state->indref) + { + if(has_any(RSEQ|RMAP)) + { + _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref); + break; + } + } + _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation); + indentation = lc.indentation; + } + else + { + if(lc.indentation >= provisional_indentation) + { + _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); + //indentation = provisional_indentation ? provisional_indentation : lc.indentation; + indentation = lc.indentation; + } + else + { + break; + //_c4err("parse error: first non-empty block line should have at least the original indentation"); + } + } + } + else // empty line + { + _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); + if(provisional_indentation != npos) + { + if(lc.stripped.len >= provisional_indentation) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); + provisional_indentation = lc.stripped.len; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(lc.indentation >= provisional_indentation && lc.indentation != npos) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); + provisional_indentation = lc.indentation; + } + #endif + } + else + { + provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + if(provisional_indentation == npos) + { + provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + } + } + } + } + // advance now that we know the folded scalar continues + m_state->line_contents = lc; + _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem); + raw_block.len += m_state->line_contents.full.len; + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); + ++num_lines; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0)); + C4_UNUSED(num_lines); + C4_UNUSED(first); + + if(indentation == npos) + { + _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation); + indentation = provisional_indentation; + } + + if(num_lines) + _line_ended_undo(); + + _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block); + + // ok! now we strip the newlines and spaces according to the specs + s = _filter_block_scalar(raw_block, newline, chomp, indentation); + + _c4dbgpf("scanning block: final=~~~{}~~~", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +template<bool backslash_is_escape, bool keep_trailing_whitespace> +bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfnl(...) + #endif + + const char curr = r[*i]; + bool replaced = false; + + _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n'); + + _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + size_t ii = *i; + size_t numnl_following = count_following_newlines(r, &ii, indentation); + if(numnl_following) + { + _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[(*pos)++] = '\n'; + } + else + { + if(r.first_not_of(" \t", *i+1) != npos) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len); + *i = r.len; + } + } + if C4_IF_CONSTEXPR (backslash_is_escape) + { + if(ii < r.len && r.str[ii] == '\\') + { + const char next = ii+1 < r.len ? r.str[ii+1] : '\0'; + if(next == ' ' || next == '\t') + { + _c4dbgfnl("extend skip to backslash{}", ""); + ++ii; + } + } + } + } + *i = ii - 1; // correct for the loop increment + + #undef _c4dbgfnl + + return replaced; +} + + +//----------------------------------------------------------------------------- + +template<bool keep_trailing_whitespace> +void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfws(...) + #endif + + const char curr = r[*i]; + _c4dbgfws("found whitespace '{}'", _c4prc(curr)); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t'); + + size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i); + if(first != npos) + { + if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace + { + _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first); + *i = first - 1; // correct for the loop increment + } + else // a legit whitespace + { + m_filter_arena.str[(*pos)++] = curr; + _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + } + } + else + { + _c4dbgfws("... everything else is trailing whitespace{}", ""); + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + for(size_t j = *i; j < r.len; ++j) + m_filter_arena.str[(*pos)++] = r[j]; + *i = r.len; + } + + #undef _c4dbgfws +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_plain_scalar(substr s, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__) + #else + #define _c4dbgfps(...) + #endif + + _c4dbgfps("before=~~~{}~~~", s); + + substr r = s.triml(" \t"); + _grow_filter_arena(r.len); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfps("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws</*keep_trailing_ws*/false>(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/false>(r, &i, &pos, indentation); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else + { + m_filter_arena.str[pos++] = r[i]; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfps + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_squot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar") + #else + #define _c4dbgfsq(...) + #endif + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted + + _c4dbgfsq(": before=~~~{}~~~", s); + + _grow_filter_arena(s.len); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws</*keep_trailing_ws*/true>(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\'') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + if(next == '\'') + { + _c4dbgfsq("[{}]: two consecutive quotes", i); + filtered_chars = true; + m_filter_arena.str[pos++] = '\''; + ++i; + } + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfsq + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_dquot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__) + #else + #define _c4dbgfdq(...) + #endif + + _c4dbgfdq(": before=~~~{}~~~", s); + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted + // + // All leading and trailing white space characters are excluded + // from the content. Each continuation line must therefore contain + // at least one non-space character. Empty lines, if any, are + // consumed as part of the line folding. + + _grow_filter_arena(s.len + 2u * s.count('\\')); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws</*keep_trailing_ws*/true>(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl</*backslash_is_escape*/true, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\\') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next)); + filtered_chars = true; + if(next == '\r') + { + if(i+2 < r.len && r[i+2] == '\n') + { + ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented) + next = '\n'; + _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i); + } + } + // remember the loop will also increment i + if(next == '\n') + { + size_t ii = i + 2; + for( ; ii < r.len; ++ii) + { + if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace + ; + else + break; + } + i += ii - i - 1; + } + else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility + { + m_filter_arena.str[pos++] = next; + ++i; + } + else if(next == '\r') + { + //++i; + } + else if(next == 'n') + { + m_filter_arena.str[pos++] = '\n'; + ++i; + } + else if(next == 'r') + { + m_filter_arena.str[pos++] = '\r'; + ++i; // skip + } + else if(next == 't') + { + m_filter_arena.str[pos++] = '\t'; + ++i; + } + else if(next == '\\') + { + m_filter_arena.str[pos++] = '\\'; + ++i; + } + else if(next == 'x') // UTF8 + { + if(i + 1u + 2u >= r.len) + _c4err("\\x requires 2 hex digits"); + uint8_t byteval = {}; + if(!read_hex(r.sub(i + 2u, 2u), &byteval)) + _c4err("failed to read \\x codepoint"); + m_filter_arena.str[pos++] = *(char*)&byteval; + i += 1u + 2u; + } + else if(next == 'u') // UTF16 + { + if(i + 1u + 4u >= r.len) + _c4err("\\u requires 4 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 4u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\u codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 4u; + } + else if(next == 'U') // UTF32 + { + if(i + 1u + 8u >= r.len) + _c4err("\\U requires 8 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 8u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\U codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 8u; + } + // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char + else if(next == '0') + { + m_filter_arena.str[pos++] = '\0'; + ++i; + } + else if(next == 'b') // backspace + { + m_filter_arena.str[pos++] = '\b'; + ++i; + } + else if(next == 'f') // form feed + { + m_filter_arena.str[pos++] = '\f'; + ++i; + } + else if(next == 'a') // bell character + { + m_filter_arena.str[pos++] = '\a'; + ++i; + } + else if(next == 'v') // vertical tab + { + m_filter_arena.str[pos++] = '\v'; + ++i; + } + else if(next == 'e') // escape character + { + m_filter_arena.str[pos++] = '\x1b'; + ++i; + } + else if(next == '_') // unicode non breaking space \u00a0 + { + // https://www.compart.com/en/unicode/U+00a0 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0); + ++i; + } + else if(next == 'N') // unicode next line \u0085 + { + // https://www.compart.com/en/unicode/U+0085 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85); + ++i; + } + else if(next == 'L') // unicode line separator \u2028 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8); + ++i; + } + else if(next == 'P') // unicode paragraph separator \u2029 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9); + ++i; + } + _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos)); + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfdq + + return r; +} + + +//----------------------------------------------------------------------------- +bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp) +{ + substr trimmed = buf.first(*pos).trimr('\n'); + bool added_newline = false; + switch(chomp) + { + case CHOMP_KEEP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos); + //m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + break; + case CHOMP_CLIP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos); + m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + else + { + _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1); + *pos = trimmed.len + 1; + } + break; + case CHOMP_STRIP: + _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len); + *pos = trimmed.len; + break; + default: + _c4err("unknown chomp style"); + } + return added_newline; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__) + #else + #define _c4dbgfbl(...) + #endif + + _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); + + if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u) + { + _c4dbgp("filt_block: empty scalar"); + return s.first(0); + } + + substr r = s; + + switch(style) + { + case BLOCK_LITERAL: + { + _c4dbgp("filt_block: style=literal"); + // trim leading whitespace up to indentation + { + size_t numws = r.first_not_of(' '); + if(numws != npos) + { + if(numws > indentation) + r = r.sub(indentation); + else + r = r.sub(numws); + _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r); + } + else + { + if(chomp != CHOMP_KEEP || r.len == 0) + { + _c4dbgfbl(": all spaces {}, return empty", r.len); + return r.first(0); + } + else + { + r[0] = '\n'; + return r.first(1); + } + } + } + _grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline + size_t pos = 0; // the filtered size + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos); + if(curr == '\r') + continue; + m_filter_arena.str[pos++] = curr; + if(curr == '\n') + { + _c4dbgfbl("[{}]: found newline", i); + // skip indentation on the next line + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + if(first != npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first) + { + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + --pos; + break; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else if(i+1 == r.len) + { + if(chomp == CHOMP_STRIP) + --pos; + break; + } + } + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos); + _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len); + if(pos < r.len || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + break; + } + case BLOCK_FOLD: + { + _c4dbgp("filt_block: style=fold"); + _grow_filter_arena(r.len + 2); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + bool started = false; + bool is_indented = false; + size_t i = r.first_not_of(' '); + _c4dbgfbl(": first non space at {}", i); + if(i > indentation) + { + is_indented = true; + i = indentation; + } + _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented); + auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){ + _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i) + { + if(r.str[i] == '\r') + continue; + _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i])); + m_filter_arena.str[pos++] = r.str[i]; + } + --i; + }; + for( ; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}'", i, _c4prc(curr)); + if(curr == '\n') + { + filtered_chars = true; + // skip indentation on the next line, and advance over the next non-indented blank lines as well + size_t first_non_whitespace; + size_t numnl_following = (size_t)-1; + while(r[i] == '\n') + { + ++numnl_following; + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len); + if(first != npos) + { + first_non_whitespace = first + i+1; + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r') + ++first_non_whitespace; + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first])); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation); + goto finished_counting_newlines; + } + } + // prepare the next while loop iteration + // by setting i at the next newline after + // an empty line + if(r[first_non_whitespace] == '\n') + i = first_non_whitespace; + else + goto finished_counting_newlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + first_non_whitespace = first + i+1; + if(first) + { + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first); + goto finished_counting_newlines; + } + } + } + else // if(i+1 == r.len) + { + _c4dbgfbl("[{}]: it's the final newline", i); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0); + } + goto end_of_scalar; + } + } + end_of_scalar: + // Write all the trailing newlines. Since we're + // at the end no folding is needed, so write every + // newline (add 1). + _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + break; + finished_counting_newlines: + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t') + ++first_non_whitespace; + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len); + size_t last_newl = r.last_of('\n', first_non_whitespace); + size_t this_indentation = first_non_whitespace - last_newl - 1; + _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1); + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation); + if(!started) + { + _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + if(this_indentation > indentation) + { + is_indented = true; + _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation); + i = last_newl + indentation; + } + else + { + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace); + } + } + else if(this_indentation == indentation) + { + _c4dbgfbl("[{}]: same indentation", i); + if(!is_indented) + { + if(numnl_following == 0) + { + _c4dbgfbl("[{}]: fold!", i); + m_filter_arena.str[pos++] = ' '; + } + else + { + _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + } + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + else + { + _c4dbgfbl("[{}]: back to ref indentation", i); + is_indented = false; + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else + { + _c4dbgfbl("[{}]: increased indentation.", i); + is_indented = true; + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation); + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else if(curr != '\r') + { + if(curr != '\t') + started = true; + m_filter_arena.str[pos++] = curr; + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos)); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + if(pos < r.len || filtered_chars || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + } + break; + default: + _c4err("unknown block style"); + } + + _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r); + + #undef _c4dbgfbl + + return r; +} + +//----------------------------------------------------------------------------- +size_t Parser::_count_nlines(csubstr src) +{ + return 1 + src.count('\n'); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_directive(csubstr directive_) +{ + csubstr directive = directive_; + if(directive.begins_with("%TAG")) + { + TagDirective td; + _c4dbgpf("%TAG directive: {}", directive_); + directive = directive.sub(4); + if(!directive.begins_with(' ')) + _c4err("malformed tag directive: {}", directive_); + directive = directive.triml(' '); + size_t pos = directive.find(' '); + if(pos == npos) + _c4err("malformed tag directive: {}", directive_); + td.handle = directive.first(pos); + directive = directive.sub(td.handle.len).triml(' '); + pos = directive.find(' '); + if(pos != npos) + directive = directive.first(pos); + td.prefix = directive; + td.next_node_id = m_tree->size(); + if(m_tree->size() > 0) + { + size_t prev = m_tree->size() - 1; + if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev)) + ++td.next_node_id; + } + _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id); + m_tree->add_tag_directive(td); + } + else if(directive.begins_with("%YAML")) + { + _c4dbgpf("%YAML directive! ignoring...: {}", directive); + } +} + +//----------------------------------------------------------------------------- +void Parser::set_flags(flag_t f, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64]; + csubstr buf1 = _prfl(buf1_, f); + csubstr buf2 = _prfl(buf2_, s->flags); + _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2); +#endif + s->flags = f; +} + +void Parser::add_flags(flag_t on, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags|on); + _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags |= on; +} + +void Parser::addrem_flags(flag_t on, flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, off); + csubstr buf3 = _prfl(buf3_, s->flags); + csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off))); + _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4); +#endif + s->flags |= on; + s->flags &= ~off; +} + +void Parser::rem_flags(flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, off); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags&(~off)); + _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags &= ~off; +} + +//----------------------------------------------------------------------------- + +csubstr Parser::_prfl(substr buf, flag_t flags) +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl) \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = #fl; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + } + + _prflag(RTOP); + _prflag(RUNK); + _prflag(RMAP); + _prflag(RSEQ); + _prflag(FLOW); + _prflag(QMRK); + _prflag(RKEY); + _prflag(RVAL); + _prflag(RNXT); + _prflag(SSCL); + _prflag(QSCL); + _prflag(RSET); + _prflag(NDOC); + _prflag(RSEQIMAP); + + #undef _prflag + + RYML_ASSERT(pos <= buf.len); + + return buf.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void Parser::_grow_filter_arena(size_t num_characters_needed) +{ + _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed); + if(num_characters_needed <= m_filter_arena.len) + return; + size_t sz = m_filter_arena.len << 1; + _c4dbgpf("grow: sz={}", sz); + sz = num_characters_needed > sz ? num_characters_needed : sz; + _c4dbgpf("grow: sz={}", sz); + sz = sz < 128u ? 128u : sz; + _c4dbgpf("grow: sz={}", sz); + _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed); + _resize_filter_arena(sz); +} + +void Parser::_resize_filter_arena(size_t num_characters) +{ + if(num_characters > m_filter_arena.len) + { + _c4dbgpf("resize: sz={}", num_characters); + char *prev = m_filter_arena.str; + if(m_filter_arena.str) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0); + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + } + m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev); + m_filter_arena.len = num_characters; + } +} + +substr Parser::_finish_filter_arena(substr dst, size_t pos) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len); + memcpy(dst.str, m_filter_arena.str, pos); + return dst.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +csubstr Parser::location_contents(Location const& loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len); + return m_buf.sub(loc.offset); +} + +Location Parser::location(ConstNodeRef node) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid()); + return location(*node.tree(), node.id()); +} + +Location Parser::location(Tree const& tree, size_t node) const +{ + // try hard to avoid getting the location from a null string. + Location loc; + if(_location_from_node(tree, node, &loc, 0)) + return loc; + return val_location(m_buf.str); +} + +bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const +{ + if(tree.has_key(node)) + { + csubstr k = tree.key(node); + if(C4_LIKELY(k.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k)); + *loc = val_location(k.str); + return true; + } + } + + if(tree.has_val(node)) + { + csubstr v = tree.val(node); + if(C4_LIKELY(v.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v)); + *loc = val_location(v.str); + return true; + } + } + + if(tree.is_container(node)) + { + if(_location_from_cont(tree, node, loc)) + return true; + } + + if(tree.type(node) != NOTYPE && level == 0) + { + // try the prev sibling + { + const size_t prev = tree.prev_sibling(node); + if(prev != NONE) + { + if(_location_from_node(tree, prev, loc, level+1)) + return true; + } + } + // try the next sibling + { + const size_t next = tree.next_sibling(node); + if(next != NONE) + { + if(_location_from_node(tree, next, loc, level+1)) + return true; + } + } + // try the parent + { + const size_t parent = tree.parent(node); + if(parent != NONE) + { + if(_location_from_node(tree, parent, loc, level+1)) + return true; + } + } + } + + return false; +} + +bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node)); + if(!tree.is_stream(node)) + { + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) + { + size_t child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(k.str && node_start > k.str) + node_start = k.str; + } + } + *loc = val_location(node_start); + return true; + } + else // it's a stream + { + *loc = val_location(m_buf.str); // just return the front of the buffer + } + return true; +} + + +Location Parser::val_location(const char *val) const +{ + if(C4_UNLIKELY(val == nullptr)) + return {m_file, 0, 0, 0}; + + _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations()); + // NOTE: if any of these checks fails, the parser needs to be + // instantiated with locations enabled. + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations()); + _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); + // NOTE: the pointer needs to belong to the buffer that was used to parse. + csubstr src = m_buf; + _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr); + _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); + // ok. search the first stored newline after the given ptr + using lineptr_type = size_t const* C4_RESTRICT; + lineptr_type lineptr = nullptr; + size_t offset = (size_t)(val - src.begin()); + if(m_newline_offsets_size < 30) // TODO magic number + { + // just do a linear search if the size is small. + for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) + { + if(*curr > offset) + { + lineptr = curr; + break; + } + } + } + else + { + // do a bisection search if the size is not small. + // + // We could use std::lower_bound but this is simple enough and + // spares the include of <algorithm>. + size_t count = m_newline_offsets_size; + size_t step; + lineptr_type it; + lineptr = m_newline_offsets; + while(count) + { + step = count >> 1; + it = lineptr + step; + if(*it < offset) + { + lineptr = ++it; + count -= step + 1; + } + else + { + count = step; + } + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets); + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); + _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset); + Location loc; + loc.name = m_file; + loc.offset = offset; + loc.line = (size_t)(lineptr - m_newline_offsets); + if(lineptr > m_newline_offsets) + loc.col = (offset - *(lineptr-1) - 1u); + else + loc.col = offset; + return loc; +} + +void Parser::_prepare_locations() +{ + m_newline_offsets_buf = m_buf; + size_t numnewlines = 1u + m_buf.count('\n'); + _resize_locations(numnewlines); + m_newline_offsets_size = 0; + for(size_t i = 0; i < m_buf.len; i++) + if(m_buf[i] == '\n') + m_newline_offsets[m_newline_offsets_size++] = i; + m_newline_offsets[m_newline_offsets_size++] = m_buf.len; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); +} + +void Parser::_resize_locations(size_t numnewlines) +{ + if(numnewlines > m_newline_offsets_capacity) + { + if(m_newline_offsets) + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); + m_newline_offsets_capacity = numnewlines; + } +} + +bool Parser::_locations_dirty() const +{ + return !m_newline_offsets_size; +} + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif diff --git a/thirdparty/ryml/src/c4/yml/parse.hpp b/thirdparty/ryml/src/c4/yml/parse.hpp new file mode 100644 index 000000000..659edf7e0 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/parse.hpp @@ -0,0 +1,706 @@ +#ifndef _C4_YML_PARSE_HPP_ +#define _C4_YML_PARSE_HPP_ + +#ifndef _C4_YML_TREE_HPP_ +#include "c4/yml/tree.hpp" +#endif + +#ifndef _C4_YML_NODE_HPP_ +#include "c4/yml/node.hpp" +#endif + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +#include "c4/yml/detail/stack.hpp" +#endif + +#include <stdarg.h> + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#endif + +namespace c4 { +namespace yml { + +struct RYML_EXPORT ParserOptions +{ +private: + + typedef enum : uint32_t { + LOCATIONS = (1 << 0), + DEFAULTS = 0, + } Flags_e; + + uint32_t flags = DEFAULTS; +public: + ParserOptions() = default; + + /** @name source location tracking */ + /** @{ */ + + /** enable/disable source location tracking */ + ParserOptions& locations(bool enabled) + { + if(enabled) + flags |= LOCATIONS; + else + flags &= ~LOCATIONS; + return *this; + } + bool locations() const { return (flags & LOCATIONS) != 0u; } + + /** @} */ +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT Parser +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Parser(Callbacks const& cb, ParserOptions opts={}); + Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {} + ~Parser(); + + Parser(Parser &&); + Parser(Parser const&); + Parser& operator=(Parser &&); + Parser& operator=(Parser const&); + + /** @} */ + +public: + + /** @name modifiers */ + /** @{ */ + + /** Reserve a certain capacity for the parsing stack. + * This should be larger than the expected depth of the parsed + * YAML tree. + * + * The parsing stack is the only (potential) heap memory used by + * the parser. + * + * If the requested capacity is below the default + * stack size of 16, the memory is used directly in the parser + * object; otherwise it will be allocated from the heap. + * + * @note this reserves memory only for the parser itself; all the + * allocations for the parsed tree will go through the tree's + * allocator. + * + * @note the tree and the arena can (and should) also be reserved. */ + void reserve_stack(size_t capacity) + { + m_stack.reserve(capacity); + } + + /** Reserve a certain capacity for the array used to track node + * locations in the source buffer. */ + void reserve_locations(size_t num_source_lines) + { + _resize_locations(num_source_lines); + } + + /** Reserve a certain capacity for the character arena used to + * filter scalars. */ + void reserve_filter_arena(size_t num_characters) + { + _resize_filter_arena(num_characters); + } + + /** @} */ + +public: + + /** @name getters and modifiers */ + /** @{ */ + + /** Get the current callbacks in the parser. */ + Callbacks callbacks() const { return m_stack.m_callbacks; } + + /** Get the name of the latest file parsed by this object. */ + csubstr filename() const { return m_file; } + + /** Get the latest YAML buffer parsed by this object. */ + csubstr source() const { return m_buf; } + + size_t stack_capacity() const { return m_stack.capacity(); } + size_t locations_capacity() const { return m_newline_offsets_capacity; } + size_t filter_arena_capacity() const { return m_filter_arena.len; } + + ParserOptions const& options() const { return m_options; } + + /** @} */ + +public: + + /** @name parse_in_place */ + /** @{ */ + + /** Create a new tree and parse into its root. + * The tree is created with the callbacks currently in the parser. */ + Tree parse_in_place(csubstr filename, substr src) + { + Tree t(callbacks()); + t.reserve(_estimate_capacity(src)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t) + { + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id); + // ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, NodeRef node) + { + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); } + + /** @} */ + +public: + + /** @name parse_in_arena: copy the YAML source buffer to the + * tree's arena, then parse the copy in situ + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ + /** @{ */ + + // READ THE NOTE ABOVE! + #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error." + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node); + + /** Create a new tree and parse into its root. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + Tree parse_in_arena(csubstr filename, csubstr csrc) + { + Tree t(callbacks()); + substr src = t.copy_to_arena(csrc); + t.reserve(_estimate_capacity(csrc)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, node_id); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node) + { + substr src = node.tree()->copy_to_arena(csrc); + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); } + + /** @} */ + +public: + + /** @name locations */ + /** @{ */ + + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(Tree const& tree, size_t node_id) const; + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(ConstNodeRef node) const; + /** Get the string starting at a particular location, to the end + * of the parsed source buffer. */ + csubstr location_contents(Location const& loc) const; + /** Given a pointer to a buffer position, get the location. @p val + * must be pointing to somewhere in the source buffer that was + * last parsed by this object. */ + Location val_location(const char *val) const; + + /** @} */ + +private: + + typedef enum { + BLOCK_LITERAL, //!< keep newlines (|) + BLOCK_FOLD //!< replace newline with single space (>) + } BlockStyle_e; + + typedef enum { + CHOMP_CLIP, //!< single newline at end (default) + CHOMP_STRIP, //!< no newline at end (-) + CHOMP_KEEP //!< all newlines from end (+) + } BlockChomp_e; + +private: + + using flag_t = int; + + static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } + + void _reset(); + + bool _finished_file() const; + bool _finished_line() const; + + csubstr _peek_next_line(size_t pos=npos) const; + bool _advance_to_peeked(); + void _scan_line(); + + csubstr _slurp_doc_scalar(); + + /** + * @param [out] quoted + * Will only be written to if this method returns true. + * Will be set to true if the scanned scalar was quoted, by '', "", > or |. + */ + bool _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + + csubstr _scan_comment(); + csubstr _scan_squot_scalar(); + csubstr _scan_dquot_scalar(); + csubstr _scan_block(); + substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation); + substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line); + substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); + csubstr _scan_to_next_nonempty_line(size_t indentation); + csubstr _extend_scanned_scalar(csubstr currscalar); + + csubstr _filter_squot_scalar(const substr s); + csubstr _filter_dquot_scalar(substr s); + csubstr _filter_plain_scalar(substr s, size_t indentation); + csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); + template<bool backslash_is_escape, bool keep_trailing_whitespace> + bool _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation); + template<bool keep_trailing_whitespace> + void _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos); + bool _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp); + + void _handle_finished_file(); + void _handle_line(); + + bool _handle_indentation(); + + bool _handle_unk(); + bool _handle_map_flow(); + bool _handle_map_blck(); + bool _handle_seq_flow(); + bool _handle_seq_blck(); + bool _handle_top(); + bool _handle_types(); + bool _handle_key_anchors_and_refs(); + bool _handle_val_anchors_and_refs(); + void _move_val_tag_to_key_tag(); + void _move_key_tag_to_val_tag(); + void _move_key_tag2_to_key_tag(); + void _move_val_anchor_to_key_anchor(); + void _move_key_anchor_to_val_anchor(); + + void _push_level(bool explicit_flow_chars = false); + void _pop_level(); + + void _start_unk(bool as_child=true); + + void _start_map(bool as_child=true); + void _start_map_unk(bool as_child); + void _stop_map(); + + void _start_seq(bool as_child=true); + void _stop_seq(); + + void _start_seqimap(); + void _stop_seqimap(); + + void _start_doc(bool as_child=true); + void _stop_doc(); + void _start_new_doc(csubstr rem); + void _end_stream(); + + NodeData* _append_val(csubstr val, flag_t quoted=false); + NodeData* _append_key_val(csubstr val, flag_t val_quoted=false); + bool _rval_dash_start_or_continue_seq(); + + void _store_scalar(csubstr s, flag_t is_quoted); + csubstr _consume_scalar(); + void _move_scalar_from_top(); + + inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({nullptr, size_t(0)}); } + inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({nullptr, size_t(0)}); } + inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({nullptr, size_t(0)}, false); } + + void _set_indentation(size_t behind); + void _save_indentation(size_t behind=0); + bool _maybe_set_indentation_from_anchor_or_tag(); + + void _write_key_anchor(size_t node_id); + void _write_val_anchor(size_t node_id); + + void _handle_directive(csubstr directive); + + void _skipchars(char c); + template<size_t N> + void _skipchars(const char (&chars)[N]); + +private: + + static size_t _count_nlines(csubstr src); + +private: + + typedef enum : flag_t { + RTOP = 0x01 << 0, ///< reading at top level + RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq + RMAP = 0x01 << 2, ///< reading a map + RSEQ = 0x01 << 3, ///< reading a seq + FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} + QMRK = 0x01 << 5, ///< reading an explicit key (`? key`) + RKEY = 0x01 << 6, ///< reading a scalar as key + RVAL = 0x01 << 7, ///< reading a scalar as val + RNXT = 0x01 << 8, ///< read next val or keyval + SSCL = 0x01 << 9, ///< there's a stored scalar + QSCL = 0x01 << 10, ///< stored scalar was quoted + RSET = 0x01 << 11, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html + NDOC = 0x01 << 12, ///< no document mode. a document has ended and another has not started yet. + //! reading an implicit map nested in an explicit seq. + //! eg, {key: [key2: value2, key3: value3]} + //! is parsed as {key: [{key2: value2}, {key3: value3}]} + RSEQIMAP = 0x01 << 13, + } State_e; + + struct LineContents + { + csubstr full; ///< the full line, including newlines on the right + csubstr stripped; ///< the stripped line, excluding newlines on the right + csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character + size_t indentation; ///< the number of spaces on the beginning of the line + + LineContents() : full(), stripped(), rem(), indentation() {} + + void reset_with_next_line(csubstr buf, size_t pos); + + void reset(csubstr full_, csubstr stripped_) + { + full = full_; + stripped = stripped_; + rem = stripped_; + // find the first column where the character is not a space + indentation = full.first_not_of(' '); + } + + size_t current_col() const + { + return current_col(rem); + } + + size_t current_col(csubstr s) const + { + RYML_ASSERT(s.str >= full.str); + RYML_ASSERT(full.is_super(s)); + size_t col = static_cast<size_t>(s.str - full.str); + return col; + } + }; + + struct State + { + flag_t flags; + size_t level; + size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes + csubstr scalar; + size_t scalar_col; // the column where the scalar (or its quotes) begin + + Location pos; + LineContents line_contents; + size_t indref; + + State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} + + void reset(const char *file, size_t node_id_) + { + flags = RUNK|RTOP; + level = 0; + pos.name = to_csubstr(file); + pos.offset = 0; + pos.line = 1; + pos.col = 1; + node_id = node_id_; + scalar_col = 0; + scalar.clear(); + indref = 0; + } + }; + + void _line_progressed(size_t ahead); + void _line_ended(); + void _line_ended_undo(); + + void _prepare_pop() + { + RYML_ASSERT(m_stack.size() > 1); + State const& curr = m_stack.top(); + State & next = m_stack.top(1); + next.pos = curr.pos; + next.line_contents = curr.line_contents; + next.scalar = curr.scalar; + } + + inline bool _at_line_begin() const + { + return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); + } + inline bool _at_line_end() const + { + csubstr r = m_state->line_contents.rem; + return r.empty() || r.begins_with(' ', r.len); + } + inline bool _token_is_from_this_line(csubstr token) const + { + return token.is_sub(m_state->line_contents.full); + } + + inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } + inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } + inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } + + inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; } + inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; } + inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; } + + static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; } + static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; } + static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; } + + inline void set_flags(flag_t f) { set_flags(f, m_state); } + inline void add_flags(flag_t on) { add_flags(on, m_state); } + inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); } + inline void rem_flags(flag_t off) { rem_flags(off, m_state); } + + void set_flags(flag_t f, State * s); + void add_flags(flag_t on, State * s); + void addrem_flags(flag_t on, flag_t off, State * s); + void rem_flags(flag_t off, State * s); + + void _resize_filter_arena(size_t num_characters); + void _grow_filter_arena(size_t num_characters); + substr _finish_filter_arena(substr dst, size_t pos); + + void _prepare_locations(); + void _resize_locations(size_t sz); + bool _locations_dirty() const; + + bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const; + bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const; + +private: + + void _free(); + void _clr(); + void _cp(Parser const* that); + void _mv(Parser *that); + +#ifdef RYML_DBG + template<class ...Args> void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; +#endif + template<class ...Args> void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; + template<class DumpFn> void _fmt_msg(DumpFn &&dumpfn) const; + static csubstr _prfl(substr buf, flag_t v); + +private: + + ParserOptions m_options; + + csubstr m_file; + substr m_buf; + + size_t m_root_id; + Tree * m_tree; + + detail::stack<State> m_stack; + State * m_state; + + size_t m_key_tag_indentation; + size_t m_key_tag2_indentation; + csubstr m_key_tag; + csubstr m_key_tag2; + size_t m_val_tag_indentation; + csubstr m_val_tag; + + bool m_key_anchor_was_before; + size_t m_key_anchor_indentation; + csubstr m_key_anchor; + size_t m_val_anchor_indentation; + csubstr m_val_anchor; + + substr m_filter_arena; + + size_t *m_newline_offsets; + size_t m_newline_offsets_size; + size_t m_newline_offsets_capacity; + csubstr m_newline_offsets_buf; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @name parse_in_place + * + * @desc parse a mutable YAML source buffer. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_place() */ +/** @{ */ + +inline Tree parse_in_place( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } //!< parse in-situ a modifiable YAML source buffer. +inline Tree parse_in_place(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @name parse_in_arena + * @desc parse a read-only YAML source buffer, copying it first to the tree's arena. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_arena(). + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ +/** @{ */ + +/* READ THE NOTE ABOVE! */ +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); + +inline Tree parse_in_arena( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +/** @} */ + +} // namespace yml +} // namespace c4 + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* _C4_YML_PARSE_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/preprocess.cpp b/thirdparty/ryml/src/c4/yml/preprocess.cpp new file mode 100644 index 000000000..2e92dce14 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/preprocess.cpp @@ -0,0 +1,110 @@ +#include "c4/yml/preprocess.hpp" +#include "c4/yml/detail/parser_dbg.hpp" + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +namespace c4 { +namespace yml { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace { +C4_ALWAYS_INLINE bool _is_idchar(char c) +{ + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || (c == '_' || c == '-' || c == '~' || c == '$'); +} + +typedef enum { kReadPending = 0, kKeyPending = 1, kValPending = 2 } _ppstate; +C4_ALWAYS_INLINE _ppstate _next(_ppstate s) +{ + int n = (int)s + 1; + return (_ppstate)(n <= (int)kValPending ? n : 0); +} +} // empty namespace + + +//----------------------------------------------------------------------------- + +size_t preprocess_rxmap(csubstr s, substr buf) +{ + detail::_SubstrWriter writer(buf); + _ppstate state = kReadPending; + size_t last = 0; + + if(s.begins_with('{')) + { + RYML_CHECK(s.ends_with('}')); + s = s.offs(1, 1); + } + + writer.append('{'); + + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s[i]; + const char next = i+1 < s.len ? s[i+1] : '\0'; + + if(curr == '\'' || curr == '"') + { + csubstr ss = s.sub(i).pair_range_esc(curr, '\\'); + i += static_cast<size_t>(ss.end() - (s.str + i)); + state = _next(state); + } + else if(state == kReadPending && _is_idchar(curr)) + { + state = _next(state); + } + + switch(state) + { + case kKeyPending: + { + if(curr == ':' && next == ' ') + { + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + writer.append(s.range(last, i)); + writer.append(": 1, "); + last = i + 2; + } + break; + } + case kValPending: + { + if(curr == '[' || curr == '{' || curr == '(') + { + csubstr ss = s.sub(i).pair_range_nested(curr, '\\'); + i += static_cast<size_t>(ss.end() - (s.str + i)); + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + state = _next(state); + } + break; + } + default: + // nothing to do + break; + } + } + + writer.append(s.sub(last)); + if(state == kKeyPending) + writer.append(": 1"); + writer.append('}'); + + return writer.pos; +} + + +} // namespace yml +} // namespace c4 diff --git a/thirdparty/ryml/src/c4/yml/preprocess.hpp b/thirdparty/ryml/src/c4/yml/preprocess.hpp new file mode 100644 index 000000000..def066389 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/preprocess.hpp @@ -0,0 +1,99 @@ +#ifndef _C4_YML_PREPROCESS_HPP_ +#define _C4_YML_PREPROCESS_HPP_ + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +/** @defgroup Preprocessors Preprocessor functions + * + * These are the existing preprocessors: + * + * @code{.cpp} + * size_t preprocess_json(csubstr json, substr buf) + * size_t preprocess_rxmap(csubstr json, substr buf) + * @endcode + */ + +#ifndef _C4_YML_COMMON_HPP_ +#include "./common.hpp" +#endif +#include <c4/substr.hpp> + + +namespace c4 { +namespace yml { + +namespace detail { +using Preprocessor = size_t(csubstr, substr); +template<Preprocessor PP, class CharContainer> +substr preprocess_into_container(csubstr input, CharContainer *out) +{ + // try to write once. the preprocessor will stop writing at the end of + // the container, but will process all the input to determine the + // required container size. + size_t sz = PP(input, to_substr(*out)); + // if the container size is not enough, resize, and run again in the + // resized container + if(sz > out->size()) + { + out->resize(sz); + sz = PP(input, to_substr(*out)); + } + return to_substr(*out).first(sz); +} +} // namespace detail + + +//----------------------------------------------------------------------------- + +/** @name preprocess_rxmap + * Convert flow-type relaxed maps (with implicit bools) into strict YAML + * flow map. + * + * @code{.yaml} + * {a, b, c, d: [e, f], g: {a, b}} + * # is converted into this: + * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}} + * @endcode + + * @note this is NOT recursive - conversion happens only in the top-level map + * @param rxmap A relaxed map + * @param buf output buffer + * @param out output container + */ + +//@{ + +/** Write into a given output buffer. This function is safe to call with + * empty or small buffers; it won't write beyond the end of the buffer. + * + * @return the number of characters required for output + */ +RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf); + + +/** Write into an existing container. It is resized to contained the output. + * @return a substr of the container + * @overload preprocess_rxmap */ +template<class CharContainer> +substr preprocess_rxmap(csubstr rxmap, CharContainer *out) +{ + return detail::preprocess_into_container<preprocess_rxmap>(rxmap, out); +} + + +/** Create a container with the result. + * @overload preprocess_rxmap */ +template<class CharContainer> +CharContainer preprocess_rxmap(csubstr rxmap) +{ + CharContainer out; + preprocess_rxmap(rxmap, &out); + return out; +} + +//@} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PREPROCESS_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/std/map.hpp b/thirdparty/ryml/src/c4/yml/std/map.hpp new file mode 100644 index 000000000..fc48dc5e6 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/std/map.hpp @@ -0,0 +1,45 @@ +#ifndef _C4_YML_STD_MAP_HPP_ +#define _C4_YML_STD_MAP_HPP_ + +/** @file map.hpp write/read std::map to/from a YAML tree. */ + +#include "c4/yml/node.hpp" +#include <map> + +namespace c4 { +namespace yml { + +// std::map requires child nodes in the data +// tree hierarchy (a MAP node in ryml parlance). +// So it should be serialized via write()/read(). + +template<class K, class V, class Less, class Alloc> +void write(c4::yml::NodeRef *n, std::map<K, V, Less, Alloc> const& m) +{ + *n |= c4::yml::MAP; + for(auto const& C4_RESTRICT p : m) + { + auto ch = n->append_child(); + ch << c4::yml::key(p.first); + ch << p.second; + } +} + +template<class K, class V, class Less, class Alloc> +bool read(c4::yml::ConstNodeRef const& n, std::map<K, V, Less, Alloc> * m) +{ + K k{}; + V v{}; + for(auto const& C4_RESTRICT ch : n) + { + ch >> c4::yml::key(k); + ch >> v; + m->emplace(std::make_pair(std::move(k), std::move(v))); + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_MAP_HPP_ diff --git a/thirdparty/ryml/src/c4/yml/std/std.hpp b/thirdparty/ryml/src/c4/yml/std/std.hpp new file mode 100644 index 000000000..08e80d155 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/std/std.hpp @@ -0,0 +1,8 @@ +#ifndef _C4_YML_STD_STD_HPP_ +#define _C4_YML_STD_STD_HPP_ + +#include "c4/yml/std/string.hpp" +#include "c4/yml/std/vector.hpp" +#include "c4/yml/std/map.hpp" + +#endif // _C4_YML_STD_STD_HPP_ diff --git a/thirdparty/ryml/src/c4/yml/std/string.hpp b/thirdparty/ryml/src/c4/yml/std/string.hpp new file mode 100644 index 000000000..e3318f91c --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/std/string.hpp @@ -0,0 +1,9 @@ +#ifndef C4_YML_STD_STRING_HPP_ +#define C4_YML_STD_STRING_HPP_ + +/** @file string.hpp substring conversions for/from std::string */ + +// everything we need is implemented here: +#include <c4/std/string.hpp> + +#endif // C4_YML_STD_STRING_HPP_ diff --git a/thirdparty/ryml/src/c4/yml/std/vector.hpp b/thirdparty/ryml/src/c4/yml/std/vector.hpp new file mode 100644 index 000000000..1b6a4610a --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/std/vector.hpp @@ -0,0 +1,53 @@ +#ifndef _C4_YML_STD_VECTOR_HPP_ +#define _C4_YML_STD_VECTOR_HPP_ + +#include "c4/yml/node.hpp" +#include <c4/std/vector.hpp> +#include <vector> + +namespace c4 { +namespace yml { + +// vector is a sequence-like type, and it requires child nodes +// in the data tree hierarchy (a SEQ node in ryml parlance). +// So it should be serialized via write()/read(). + + +template<class V, class Alloc> +void write(c4::yml::NodeRef *n, std::vector<V, Alloc> const& vec) +{ + *n |= c4::yml::SEQ; + for(auto const& v : vec) + n->append_child() << v; +} + +template<class V, class Alloc> +bool read(c4::yml::ConstNodeRef const& n, std::vector<V, Alloc> *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + for(auto const ch : n) + ch >> (*vec)[pos++]; + return true; +} + +/** specialization: std::vector<bool> uses std::vector<bool>::reference as + * the return value of its operator[]. */ +template<class Alloc> +bool read(c4::yml::ConstNodeRef const& n, std::vector<bool, Alloc> *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + bool tmp; + for(auto const ch : n) + { + ch >> tmp; + (*vec)[pos++] = tmp; + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_VECTOR_HPP_ diff --git a/thirdparty/ryml/src/c4/yml/tree.cpp b/thirdparty/ryml/src/c4/yml/tree.cpp new file mode 100644 index 000000000..b16ff8f5d --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/tree.cpp @@ -0,0 +1,2183 @@ +#include "c4/yml/tree.hpp" +#include "c4/yml/detail/parser_dbg.hpp" +#include "c4/yml/node.hpp" +#include "c4/yml/detail/stack.hpp" + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wtype-limits") +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) + +namespace c4 { +namespace yml { + + +csubstr normalize_tag(csubstr tag) +{ + YamlTag_e t = to_tag(tag); + if(t != TAG_NONE) + return from_tag(t); + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with("<!")) + return tag; + return tag; +} + +csubstr normalize_tag_long(csubstr tag) +{ + YamlTag_e t = to_tag(tag); + if(t != TAG_NONE) + return from_tag_long(t); + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with("<!")) + return tag; + return tag; +} + +YamlTag_e to_tag(csubstr tag) +{ + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with("!!")) + tag = tag.sub(2); + else if(tag.begins_with('!')) + return TAG_NONE; + else if(tag.begins_with("tag:yaml.org,2002:")) + { + RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18); + tag = tag.sub(18); + } + else if(tag.begins_with("<tag:yaml.org,2002:")) + { + RYML_ASSERT(csubstr("<tag:yaml.org,2002:").len == 19); + tag = tag.sub(19); + if(!tag.len) + return TAG_NONE; + tag = tag.offs(0, 1); + } + + if(tag == "map") + return TAG_MAP; + else if(tag == "omap") + return TAG_OMAP; + else if(tag == "pairs") + return TAG_PAIRS; + else if(tag == "set") + return TAG_SET; + else if(tag == "seq") + return TAG_SEQ; + else if(tag == "binary") + return TAG_BINARY; + else if(tag == "bool") + return TAG_BOOL; + else if(tag == "float") + return TAG_FLOAT; + else if(tag == "int") + return TAG_INT; + else if(tag == "merge") + return TAG_MERGE; + else if(tag == "null") + return TAG_NULL; + else if(tag == "str") + return TAG_STR; + else if(tag == "timestamp") + return TAG_TIMESTAMP; + else if(tag == "value") + return TAG_VALUE; + + return TAG_NONE; +} + +csubstr from_tag_long(YamlTag_e tag) +{ + switch(tag) + { + case TAG_MAP: + return {"<tag:yaml.org,2002:map>"}; + case TAG_OMAP: + return {"<tag:yaml.org,2002:omap>"}; + case TAG_PAIRS: + return {"<tag:yaml.org,2002:pairs>"}; + case TAG_SET: + return {"<tag:yaml.org,2002:set>"}; + case TAG_SEQ: + return {"<tag:yaml.org,2002:seq>"}; + case TAG_BINARY: + return {"<tag:yaml.org,2002:binary>"}; + case TAG_BOOL: + return {"<tag:yaml.org,2002:bool>"}; + case TAG_FLOAT: + return {"<tag:yaml.org,2002:float>"}; + case TAG_INT: + return {"<tag:yaml.org,2002:int>"}; + case TAG_MERGE: + return {"<tag:yaml.org,2002:merge>"}; + case TAG_NULL: + return {"<tag:yaml.org,2002:null>"}; + case TAG_STR: + return {"<tag:yaml.org,2002:str>"}; + case TAG_TIMESTAMP: + return {"<tag:yaml.org,2002:timestamp>"}; + case TAG_VALUE: + return {"<tag:yaml.org,2002:value>"}; + case TAG_YAML: + return {"<tag:yaml.org,2002:yaml>"}; + case TAG_NONE: + return {""}; + } + return {""}; +} + +csubstr from_tag(YamlTag_e tag) +{ + switch(tag) + { + case TAG_MAP: + return {"!!map"}; + case TAG_OMAP: + return {"!!omap"}; + case TAG_PAIRS: + return {"!!pairs"}; + case TAG_SET: + return {"!!set"}; + case TAG_SEQ: + return {"!!seq"}; + case TAG_BINARY: + return {"!!binary"}; + case TAG_BOOL: + return {"!!bool"}; + case TAG_FLOAT: + return {"!!float"}; + case TAG_INT: + return {"!!int"}; + case TAG_MERGE: + return {"!!merge"}; + case TAG_NULL: + return {"!!null"}; + case TAG_STR: + return {"!!str"}; + case TAG_TIMESTAMP: + return {"!!timestamp"}; + case TAG_VALUE: + return {"!!value"}; + case TAG_YAML: + return {"!!yaml"}; + case TAG_NONE: + return {""}; + } + return {""}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +const char* NodeType::type_str(NodeType_e ty) +{ + switch(ty & _TYMASK) + { + case KEYVAL: + return "KEYVAL"; + case KEY: + return "KEY"; + case VAL: + return "VAL"; + case MAP: + return "MAP"; + case SEQ: + return "SEQ"; + case KEYMAP: + return "KEYMAP"; + case KEYSEQ: + return "KEYSEQ"; + case DOCSEQ: + return "DOCSEQ"; + case DOCMAP: + return "DOCMAP"; + case DOCVAL: + return "DOCVAL"; + case DOC: + return "DOC"; + case STREAM: + return "STREAM"; + case NOTYPE: + return "NOTYPE"; + default: + if((ty & KEYVAL) == KEYVAL) + return "KEYVAL***"; + if((ty & KEYMAP) == KEYMAP) + return "KEYMAP***"; + if((ty & KEYSEQ) == KEYSEQ) + return "KEYSEQ***"; + if((ty & DOCSEQ) == DOCSEQ) + return "DOCSEQ***"; + if((ty & DOCMAP) == DOCMAP) + return "DOCMAP***"; + if((ty & DOCVAL) == DOCVAL) + return "DOCVAL***"; + if(ty & KEY) + return "KEY***"; + if(ty & VAL) + return "VAL***"; + if(ty & MAP) + return "MAP***"; + if(ty & SEQ) + return "SEQ***"; + if(ty & DOC) + return "DOC***"; + return "(unk)"; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +NodeRef Tree::rootref() +{ + return NodeRef(this, root_id()); +} +ConstNodeRef Tree::rootref() const +{ + return ConstNodeRef(this, root_id()); +} + +ConstNodeRef Tree::crootref() +{ + return ConstNodeRef(this, root_id()); +} +ConstNodeRef Tree::crootref() const +{ + return ConstNodeRef(this, root_id()); +} + +NodeRef Tree::ref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return NodeRef(this, id); +} +ConstNodeRef Tree::ref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} + +ConstNodeRef Tree::cref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} +ConstNodeRef Tree::cref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} + +NodeRef Tree::operator[] (csubstr key) +{ + return rootref()[key]; +} +ConstNodeRef Tree::operator[] (csubstr key) const +{ + return rootref()[key]; +} + +NodeRef Tree::operator[] (size_t i) +{ + return rootref()[i]; +} +ConstNodeRef Tree::operator[] (size_t i) const +{ + return rootref()[i]; +} + +NodeRef Tree::docref(size_t i) +{ + return ref(doc(i)); +} +ConstNodeRef Tree::docref(size_t i) const +{ + return cref(doc(i)); +} + + +//----------------------------------------------------------------------------- +Tree::Tree(Callbacks const& cb) + : m_buf(nullptr) + , m_cap(0) + , m_size(0) + , m_free_head(NONE) + , m_free_tail(NONE) + , m_arena() + , m_arena_pos(0) + , m_callbacks(cb) +{ +} + +Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb) + : Tree(cb) +{ + reserve(node_capacity); + reserve_arena(arena_capacity); +} + +Tree::~Tree() +{ + _free(); +} + + +Tree::Tree(Tree const& that) noexcept : Tree(that.m_callbacks) +{ + _copy(that); +} + +Tree& Tree::operator= (Tree const& that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _copy(that); + return *this; +} + +Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) +{ + _move(that); +} + +Tree& Tree::operator= (Tree && that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _move(that); + return *this; +} + +void Tree::_free() +{ + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap > 0); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + if(m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0); + _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len); + } + _clear(); +} + + +C4_SUPPRESS_WARNING_GCC_PUSH +#if defined(__GNUC__) && __GNUC__>= 8 + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +void Tree::_clear() +{ + m_buf = nullptr; + m_cap = 0; + m_size = 0; + m_free_head = 0; + m_free_tail = 0; + m_arena = {}; + m_arena_pos = 0; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_copy(Tree const& that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf); + memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData)); + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena_pos = that.m_arena_pos; + m_arena = that.m_arena; + if(that.m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0); + substr arena; + arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str); + arena.len = that.m_arena.len; + _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena + m_arena = arena; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; +} + +void Tree::_move(Tree & that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = that.m_buf; + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena = that.m_arena; + m_arena_pos = that.m_arena_pos; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; + that._clear(); +} + +void Tree::_relocate(substr next_arena) +{ + _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); + _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); + memcpy(next_arena.str, m_arena.str, m_arena_pos); + for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) + { + if(in_arena(n->m_key.scalar)) + n->m_key.scalar = _relocated(n->m_key.scalar, next_arena); + if(in_arena(n->m_key.tag)) + n->m_key.tag = _relocated(n->m_key.tag, next_arena); + if(in_arena(n->m_key.anchor)) + n->m_key.anchor = _relocated(n->m_key.anchor, next_arena); + if(in_arena(n->m_val.scalar)) + n->m_val.scalar = _relocated(n->m_val.scalar, next_arena); + if(in_arena(n->m_val.tag)) + n->m_val.tag = _relocated(n->m_val.tag, next_arena); + if(in_arena(n->m_val.anchor)) + n->m_val.anchor = _relocated(n->m_val.anchor, next_arena); + } + for(TagDirective &C4_RESTRICT td : m_tag_directives) + { + if(in_arena(td.prefix)) + td.prefix = _relocated(td.prefix, next_arena); + if(in_arena(td.handle)) + td.handle = _relocated(td.handle, next_arena); + } +} + + +//----------------------------------------------------------------------------- +void Tree::reserve(size_t cap) +{ + if(cap > m_cap) + { + NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf); + if(m_buf) + { + memcpy(buf, m_buf, m_cap * sizeof(NodeData)); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + size_t first = m_cap, del = cap - m_cap; + m_cap = cap; + m_buf = buf; + _clear_range(first, del); + if(m_free_head != NONE) + { + _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr); + _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE); + m_buf[m_free_tail].m_next_sibling = first; + m_buf[first].m_prev_sibling = m_free_tail; + m_free_tail = cap-1; + } + else + { + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE); + m_free_head = first; + m_free_tail = cap-1; + } + _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap)); + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap)); + + if( ! m_size) + _claim_root(); + } +} + + +//----------------------------------------------------------------------------- +void Tree::clear() +{ + _clear_range(0, m_cap); + m_size = 0; + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap >= 0); + m_free_head = 0; + m_free_tail = m_cap-1; + _claim_root(); + } + else + { + m_free_head = NONE; + m_free_tail = NONE; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_claim_root() +{ + size_t r = _claim(); + _RYML_CB_ASSERT(m_callbacks, r == 0); + _set_hierarchy(r, NONE, NONE); +} + + +//----------------------------------------------------------------------------- +void Tree::_clear_range(size_t first, size_t num) +{ + if(num == 0) + return; // prevent overflow when subtracting + _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); + memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this + for(size_t i = first, e = first + num; i < e; ++i) + { + _clear(i); + NodeData *n = m_buf + i; + n->m_prev_sibling = i - 1; + n->m_next_sibling = i + 1; + } + m_buf[first + num - 1].m_next_sibling = NONE; +} + +C4_SUPPRESS_WARNING_GCC_POP + + +//----------------------------------------------------------------------------- +void Tree::_release(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + _rem_hierarchy(i); + _free_list_add(i); + _clear(i); + + --m_size; +} + +//----------------------------------------------------------------------------- +// add to the front of the free list +void Tree::_free_list_add(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + NodeData &C4_RESTRICT w = m_buf[i]; + + w.m_parent = NONE; + w.m_next_sibling = m_free_head; + w.m_prev_sibling = NONE; + if(m_free_head != NONE) + m_buf[m_free_head].m_prev_sibling = i; + m_free_head = i; + if(m_free_tail == NONE) + m_free_tail = m_free_head; +} + +void Tree::_free_list_rem(size_t i) +{ + if(m_free_head == i) + m_free_head = _p(i)->m_next_sibling; + _rem_hierarchy(i); +} + +//----------------------------------------------------------------------------- +size_t Tree::_claim() +{ + if(m_free_head == NONE || m_buf == nullptr) + { + size_t sz = 2 * m_cap; + sz = sz ? sz : 16; + reserve(sz); + _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); + } + + _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); + _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); + + size_t ichild = m_free_head; + NodeData *child = m_buf + ichild; + + ++m_size; + m_free_head = child->m_next_sibling; + if(m_free_head == NONE) + { + m_free_tail = NONE; + _RYML_CB_ASSERT(m_callbacks, m_size == m_cap); + } + + _clear(ichild); + + return ichild; +} + +//----------------------------------------------------------------------------- + +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_CLANG_PUSH +C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") +#if defined(__GNUC__) && (__GNUC__ >= 6) +C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") +#endif + +void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) +{ + _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); + + NodeData *C4_RESTRICT child = get(ichild); + + child->m_parent = iparent; + child->m_prev_sibling = NONE; + child->m_next_sibling = NONE; + + if(iparent == NONE) + { + _RYML_CB_ASSERT(m_callbacks, ichild == 0); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE); + } + + if(iparent == NONE) + return; + + size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); + NodeData *C4_RESTRICT parent = get(iparent); + NodeData *C4_RESTRICT psib = get(iprev_sibling); + NodeData *C4_RESTRICT nsib = get(inext_sibling); + + if(psib) + { + _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib)); + child->m_prev_sibling = id(psib); + psib->m_next_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE); + } + + if(nsib) + { + _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib)); + child->m_next_sibling = id(nsib); + nsib->m_prev_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE); + } + + if(parent->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE); + parent->m_first_child = id(child); + parent->m_last_child = id(child); + } + else + { + if(child->m_next_sibling == parent->m_first_child) + parent->m_first_child = id(child); + + if(child->m_prev_sibling == parent->m_last_child) + parent->m_last_child = id(child); + } +} + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_CLANG_POP + + +//----------------------------------------------------------------------------- +void Tree::_rem_hierarchy(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + NodeData &C4_RESTRICT w = m_buf[i]; + + // remove from the parent + if(w.m_parent != NONE) + { + NodeData &C4_RESTRICT p = m_buf[w.m_parent]; + if(p.m_first_child == i) + { + p.m_first_child = w.m_next_sibling; + } + if(p.m_last_child == i) + { + p.m_last_child = w.m_prev_sibling; + } + } + + // remove from the used list + if(w.m_prev_sibling != NONE) + { + NodeData *C4_RESTRICT prev = get(w.m_prev_sibling); + prev->m_next_sibling = w.m_next_sibling; + } + if(w.m_next_sibling != NONE) + { + NodeData *C4_RESTRICT next = get(w.m_next_sibling); + next->m_prev_sibling = w.m_prev_sibling; + } +} + +//----------------------------------------------------------------------------- +void Tree::reorder() +{ + size_t r = root_id(); + _do_reorder(&r, 0); +} + +//----------------------------------------------------------------------------- +size_t Tree::_do_reorder(size_t *node, size_t count) +{ + // swap this node if it's not in place + if(*node != count) + { + _swap(*node, count); + *node = count; + } + ++count; // bump the count from this node + + // now descend in the hierarchy + for(size_t i = first_child(*node); i != NONE; i = next_sibling(i)) + { + // this child may have been relocated to a different index, + // so get an updated version + count = _do_reorder(&i, count); + } + return count; +} + +//----------------------------------------------------------------------------- +void Tree::_swap(size_t n_, size_t m_) +{ + _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); + _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); + NodeType tn = type(n_); + NodeType tm = type(m_); + if(tn != NOTYPE && tm != NOTYPE) + { + _swap_props(n_, m_); + _swap_hierarchy(n_, m_); + } + else if(tn == NOTYPE && tm != NOTYPE) + { + _copy_props(n_, m_); + _free_list_rem(n_); + _copy_hierarchy(n_, m_); + _clear(m_); + _free_list_add(m_); + } + else if(tn != NOTYPE && tm == NOTYPE) + { + _copy_props(m_, n_); + _free_list_rem(m_); + _copy_hierarchy(m_, n_); + _clear(n_); + _free_list_add(n_); + } + else + { + C4_NEVER_REACH(); + } +} + +//----------------------------------------------------------------------------- +void Tree::_swap_hierarchy(size_t ia, size_t ib) +{ + if(ia == ib) return; + + for(size_t i = first_child(ia); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ib; + } + + for(size_t i = first_child(ib); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ia; + } + + auto & C4_RESTRICT a = *_p(ia); + auto & C4_RESTRICT b = *_p(ib); + auto & C4_RESTRICT pa = *_p(a.m_parent); + auto & C4_RESTRICT pb = *_p(b.m_parent); + + if(&pa == &pb) + { + if((pa.m_first_child == ib && pa.m_last_child == ia) + || + (pa.m_first_child == ia && pa.m_last_child == ib)) + { + std::swap(pa.m_first_child, pa.m_last_child); + } + else + { + bool changed = false; + if(pa.m_first_child == ia) + { + pa.m_first_child = ib; + changed = true; + } + if(pa.m_last_child == ia) + { + pa.m_last_child = ib; + changed = true; + } + if(pb.m_first_child == ib && !changed) + { + pb.m_first_child = ia; + } + if(pb.m_last_child == ib && !changed) + { + pb.m_last_child = ia; + } + } + } + else + { + if(pa.m_first_child == ia) + pa.m_first_child = ib; + if(pa.m_last_child == ia) + pa.m_last_child = ib; + if(pb.m_first_child == ib) + pb.m_first_child = ia; + if(pb.m_last_child == ib) + pb.m_last_child = ia; + } + std::swap(a.m_first_child , b.m_first_child); + std::swap(a.m_last_child , b.m_last_child); + + if(a.m_prev_sibling != ib && b.m_prev_sibling != ia && + a.m_next_sibling != ib && b.m_next_sibling != ia) + { + if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib) + _p(a.m_prev_sibling)->m_next_sibling = ib; + if(a.m_next_sibling != NONE && a.m_next_sibling != ib) + _p(a.m_next_sibling)->m_prev_sibling = ib; + if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia) + _p(b.m_prev_sibling)->m_next_sibling = ia; + if(b.m_next_sibling != NONE && b.m_next_sibling != ia) + _p(b.m_next_sibling)->m_prev_sibling = ia; + std::swap(a.m_prev_sibling, b.m_prev_sibling); + std::swap(a.m_next_sibling, b.m_next_sibling); + } + else + { + if(a.m_next_sibling == ib) // n will go after m + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia); + if(a.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib); + _p(a.m_prev_sibling)->m_next_sibling = ib; + } + if(b.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); + _p(b.m_next_sibling)->m_prev_sibling = ia; + } + size_t ns = b.m_next_sibling; + b.m_prev_sibling = a.m_prev_sibling; + b.m_next_sibling = ia; + a.m_prev_sibling = ib; + a.m_next_sibling = ns; + } + else if(a.m_prev_sibling == ib) // m will go after n + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia); + if(b.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia); + _p(b.m_prev_sibling)->m_next_sibling = ia; + } + if(a.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); + _p(a.m_next_sibling)->m_prev_sibling = ib; + } + size_t ns = b.m_prev_sibling; + a.m_prev_sibling = b.m_prev_sibling; + a.m_next_sibling = ib; + b.m_prev_sibling = ia; + b.m_next_sibling = ns; + } + else + { + C4_NEVER_REACH(); + } + } + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib); + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib); + + if(a.m_parent != ib && b.m_parent != ia) + { + std::swap(a.m_parent, b.m_parent); + } + else + { + if(a.m_parent == ib && b.m_parent != ia) + { + a.m_parent = b.m_parent; + b.m_parent = ia; + } + else if(a.m_parent != ib && b.m_parent == ia) + { + b.m_parent = a.m_parent; + a.m_parent = ib; + } + else + { + C4_NEVER_REACH(); + } + } +} + +//----------------------------------------------------------------------------- +void Tree::_copy_hierarchy(size_t dst_, size_t src_) +{ + auto const& C4_RESTRICT src = *_p(src_); + auto & C4_RESTRICT dst = *_p(dst_); + auto & C4_RESTRICT prt = *_p(src.m_parent); + for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i)) + { + _p(i)->m_parent = dst_; + } + if(src.m_prev_sibling != NONE) + { + _p(src.m_prev_sibling)->m_next_sibling = dst_; + } + if(src.m_next_sibling != NONE) + { + _p(src.m_next_sibling)->m_prev_sibling = dst_; + } + if(prt.m_first_child == src_) + { + prt.m_first_child = dst_; + } + if(prt.m_last_child == src_) + { + prt.m_last_child = dst_; + } + dst.m_parent = src.m_parent; + dst.m_first_child = src.m_first_child; + dst.m_last_child = src.m_last_child; + dst.m_prev_sibling = src.m_prev_sibling; + dst.m_next_sibling = src.m_next_sibling; +} + +//----------------------------------------------------------------------------- +void Tree::_swap_props(size_t n_, size_t m_) +{ + NodeData &C4_RESTRICT n = *_p(n_); + NodeData &C4_RESTRICT m = *_p(m_); + std::swap(n.m_type, m.m_type); + std::swap(n.m_key, m.m_key); + std::swap(n.m_val, m.m_val); +} + +//----------------------------------------------------------------------------- +void Tree::move(size_t node, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node))); + + _rem_hierarchy(node); + _set_hierarchy(node, parent(node), after); +} + +//----------------------------------------------------------------------------- + +void Tree::move(size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != node); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + + _rem_hierarchy(node); + _set_hierarchy(node, new_parent, after); +} + +size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + + size_t dup = duplicate(src, node, new_parent, after); + src->remove(node); + return dup; +} + +void Tree::set_root_as_stream() +{ + size_t root = root_id(); + if(is_stream(root)) + return; + // don't use _add_flags() because it's checked and will fail + if(!has_children(root)) + { + if(is_val(root)) + { + _p(root)->m_type.add(SEQ); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _p(next_doc)->m_type.add(DOC); + _p(next_doc)->m_type.rem(SEQ); + } + _p(root)->m_type = STREAM; + return; + } + _RYML_CB_ASSERT(m_callbacks, !has_key(root)); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _add_flags(next_doc, DOC); + for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + { + if(ch == next_doc) + break; + move(ch, next_doc, prev); + prev = ch; + ch = next; + next = next_sibling(next); + } + _p(root)->m_type = STREAM; +} + + +//----------------------------------------------------------------------------- +void Tree::remove_children(size_t node) +{ + _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); + size_t ich = get(node)->m_first_child; + while(ich != NONE) + { + remove_children(ich); + _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); + size_t next = get(ich)->m_next_sibling; + _release(ich); + if(ich == get(node)->m_last_child) + break; + ich = next; + } +} + +bool Tree::change_type(size_t node, NodeType type) +{ + _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); + _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); + _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key())); + NodeData *d = _p(node); + if(type.is_map() && is_map(node)) + return false; + else if(type.is_seq() && is_seq(node)) + return false; + else if(type.is_val() && is_val(node)) + return false; + d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type; + remove_children(node); + return true; +} + + +//----------------------------------------------------------------------------- +size_t Tree::duplicate(size_t node, size_t parent, size_t after) +{ + return duplicate(this, node, parent, after); +} + +size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); + + size_t copy = _claim(); + + _copy_props(copy, src, node); + _set_hierarchy(copy, parent, after); + duplicate_children(src, node, copy, NONE); + + return copy; +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children(size_t node, size_t parent, size_t after) +{ + return duplicate_children(this, node, parent, after); +} + +size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + size_t prev = after; + for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + { + prev = duplicate(src, i, parent, prev); + } + + return prev; +} + +//----------------------------------------------------------------------------- +void Tree::duplicate_contents(size_t node, size_t where) +{ + duplicate_contents(this, node, where); +} + +void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, where != NONE); + _copy_props_wo_key(where, src, node); + duplicate_children(src, node, where, last_child(where)); +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after) +{ + return duplicate_children_no_rep(this, node, parent, after); +} + +size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + // don't loop using pointers as there may be a relocation + + // find the position where "after" is + size_t after_pos = NONE; + if(after != NONE) + { + for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) + { + if(i == after) + { + after_pos = icount; + break; + } + } + _RYML_CB_ASSERT(m_callbacks, after_pos != NONE); + } + + // for each child to be duplicated... + size_t prev = after; + for(size_t i = src->first_child(node), icount = 0; i != NONE; ++icount, i = src->next_sibling(i)) + { + if(is_seq(parent)) + { + prev = duplicate(i, parent, prev); + } + else + { + _RYML_CB_ASSERT(m_callbacks, is_map(parent)); + // does the parent already have a node with key equal to that of the current duplicate? + size_t rep = NONE, rep_pos = NONE; + for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) + { + if(key(j) == key(i)) + { + rep = j; + rep_pos = jcount; + break; + } + } + if(rep == NONE) // there is no repetition; just duplicate + { + prev = duplicate(src, i, parent, prev); + } + else // yes, there is a repetition + { + if(after_pos != NONE && rep_pos < after_pos) + { + // rep is located before the node which will be inserted, + // and will be overridden by the duplicate. So replace it. + remove(rep); + prev = duplicate(src, i, parent, prev); + } + else if(prev == NONE) + { + // first iteration with prev = after = NONE and repetition + prev = rep; + } + else if(rep != prev) + { + // rep is located after the node which will be inserted + // and overrides it. So move the rep into this node's place. + move(rep, prev); + prev = rep; + } + } // there's a repetition + } + } + + return prev; +} + + +//----------------------------------------------------------------------------- + +void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + if(src_node == NONE) + src_node = src->root_id(); + if(dst_node == NONE) + dst_node = root_id(); + _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); + + if(src->has_val(src_node)) + { + if( ! has_val(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + } + if(src->is_keyval(src_node)) + _copy_props(dst_node, src, src_node); + else if(src->is_val(src_node)) + _copy_props_wo_key(dst_node, src, src_node); + else + C4_NEVER_REACH(); + } + else if(src->is_seq(src_node)) + { + if( ! is_seq(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_seq(dst_node, src->key(src_node)); + else + to_seq(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = append_child(dst_node); + _copy_props_wo_key(dch, src, sch); + merge_with(src, sch, dch); + } + } + else if(src->is_map(src_node)) + { + if( ! is_map(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_map(dst_node, src->key(src_node)); + else + to_map(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = find_child(dst_node, src->key(sch)); + if(dch == NONE) + { + dch = append_child(dst_node); + _copy_props(dch, src, sch); + } + merge_with(src, sch, dch); + } + } + else + { + C4_NEVER_REACH(); + } +} + + +//----------------------------------------------------------------------------- + +namespace detail { +/** @todo make this part of the public API, refactoring as appropriate + * to be able to use the same resolver to handle multiple trees (one + * at a time) */ +struct ReferenceResolver +{ + struct refdata + { + NodeType type; + size_t node; + size_t prev_anchor; + size_t target; + size_t parent_ref; + size_t parent_ref_sibling; + }; + + Tree *t; + /** from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + stack<refdata> refs; + + ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks()) + { + resolve(); + } + + void store_anchors_and_refs() + { + // minimize (re-)allocations by counting first + size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id()); + if(!num_anchors_and_refs) + return; + refs.reserve(num_anchors_and_refs); + + // now descend through the hierarchy + _store_anchors_and_refs(t->root_id()); + + // finally connect the reference list + size_t prev_anchor = npos; + size_t count = 0; + for(auto &rd : refs) + { + rd.prev_anchor = prev_anchor; + if(rd.type.is_anchor()) + prev_anchor = count; + ++count; + } + } + + size_t count_anchors_and_refs(size_t n) + { + size_t c = 0; + c += t->has_key_anchor(n); + c += t->has_val_anchor(n); + c += t->is_key_ref(n); + c += t->is_val_ref(n); + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + c += count_anchors_and_refs(ch); + return c; + } + + void _store_anchors_and_refs(size_t n) + { + if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<")) + { + if(t->is_seq(n)) + { + // for merging multiple inheritance targets + // <<: [ *CENTER, *BIG ] + for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich)) + { + RYML_ASSERT(t->num_children(ich) == 0); + refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)}); + } + return; + } + if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs + { + RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n))); + refs.push({KEYREF, n, npos, npos, NONE, NONE}); + } + if(t->is_val_ref(n)) + { + RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n))); + refs.push({VALREF, n, npos, npos, NONE, NONE}); + } + } + if(t->has_key_anchor(n)) + { + RYML_CHECK(t->has_key(n)); + refs.push({KEYANCH, n, npos, npos, NONE, NONE}); + } + if(t->has_val_anchor(n)) + { + RYML_CHECK(t->has_val(n) || t->is_container(n)); + refs.push({VALANCH, n, npos, npos, NONE, NONE}); + } + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + { + _store_anchors_and_refs(ch); + } + } + + size_t lookup_(refdata *C4_RESTRICT ra) + { + RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); + RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); + csubstr refname; + if(ra->type.is_val_ref()) + { + refname = t->val_ref(ra->node); + } + else + { + RYML_ASSERT(ra->type.is_key_ref()); + refname = t->key_ref(ra->node); + } + while(ra->prev_anchor != npos) + { + ra = &refs[ra->prev_anchor]; + if(t->has_anchor(ra->node, refname)) + return ra->node; + } + + #ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 + #endif + + char errmsg[RYML_ERRMSG_SIZE]; + snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'", + static_cast<int>(refname.size()), refname.data()); + c4::yml::error(errmsg); + return NONE; + } + + void resolve() + { + store_anchors_and_refs(); + if(refs.empty()) + return; + + /* from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + for(size_t i = 0, e = refs.size(); i < e; ++i) + { + auto &C4_RESTRICT rd = refs.top(i); + if( ! rd.type.is_ref()) + continue; + rd.target = lookup_(&rd); + } + } + +}; // ReferenceResolver +} // namespace detail + +void Tree::resolve() +{ + if(m_size == 0) + return; + + detail::ReferenceResolver rr(this); + + // insert the resolved references + size_t prev_parent_ref = NONE; + size_t prev_parent_ref_after = NONE; + for(auto const& C4_RESTRICT rd : rr.refs) + { + if( ! rd.type.is_ref()) + continue; + if(rd.parent_ref != NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref)); + size_t after, p = parent(rd.parent_ref); + if(prev_parent_ref != rd.parent_ref) + { + after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling); + prev_parent_ref_after = after; + } + else + { + after = prev_parent_ref_after; + } + prev_parent_ref = rd.parent_ref; + prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else + { + if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<") + { + _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node)); + size_t p = parent(rd.node); + size_t after = prev_sibling(rd.node); + duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else if(rd.type.is_key_ref()) + { + _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node)); + _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target)); + if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_key.scalar = val(rd.target); + _add_flags(rd.node, KEY); + } + else + { + _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node)); + _p(rd.node)->m_key.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + } + else + { + _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref()); + if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_val.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + else + { + duplicate_contents(rd.target, rd.node); + } + } + } + } + + // clear anchors and refs + for(auto const& C4_RESTRICT ar : rr.refs) + { + rem_anchor_ref(ar.node); + if(ar.parent_ref != NONE) + if(type(ar.parent_ref) != NOTYPE) + remove(ar.parent_ref); + } + +} + +//----------------------------------------------------------------------------- + +size_t Tree::num_children(size_t node) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + ++count; + return count; +} + +size_t Tree::child(size_t node, size_t pos) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(count++ == pos) + return i; + } + return NONE; +} + +size_t Tree::child_pos(size_t node, size_t ch) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(i == ch) + return count; + ++count; + } + return npos; +} + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma GCC diagnostic ignored "-Wnull-dereference" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +size_t Tree::find_child(size_t node, csubstr const& name) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(node)); + if(get(node)->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE); + return NONE; + } + else + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); + } + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(_p(i)->m_key.scalar == name) + { + return i; + } + } + return NONE; +} + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +//----------------------------------------------------------------------------- + +void Tree::to_val(size_t node, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); + _set_flags(node, VAL|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val = val; +} + +void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEYVAL|more_flags); + _p(node)->m_key = key; + _p(node)->m_val = val; +} + +void Tree::to_map(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys + _set_flags(node, MAP|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_map(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|MAP|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); + _set_flags(node, SEQ|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|SEQ|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_doc(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, DOC|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_stream(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, STREAM|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + + +//----------------------------------------------------------------------------- +size_t Tree::num_tag_directives() const +{ + // this assumes we have a very small number of tag directives + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + if(m_tag_directives[i].handle.empty()) + return i; + return RYML_MAX_TAG_DIRECTIVES; +} + +void Tree::clear_tag_directives() +{ + for(TagDirective &td : m_tag_directives) + td = {}; +} + +size_t Tree::add_tag_directive(TagDirective const& td) +{ + _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); + _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); + _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!')); + _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!')); + // https://yaml.org/spec/1.2.2/#rule-ns-word-char + _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); + size_t pos = num_tag_directives(); + _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); + m_tag_directives[pos] = td; + return pos; +} + +size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const +{ + // lookup from the end. We want to find the first directive that + // matches the tag and has a target node id leq than the given + // node_id. + for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i) + { + auto const& td = m_tag_directives[i]; + if(td.handle.empty()) + continue; + if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + { + _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len); + csubstr rest = tag.sub(td.handle.len); + size_t len = 1u + td.prefix.len + rest.len + 1u; + size_t numpc = rest.count('%'); + if(numpc == 0) + { + if(len <= output.len) + { + output.str[0] = '<'; + memcpy(1u + output.str, td.prefix.str, td.prefix.len); + memcpy(1u + output.str + td.prefix.len, rest.str, rest.len); + output.str[1u + td.prefix.len + rest.len] = '>'; + } + } + else + { + // need to decode URI % sequences + size_t pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + size_t delta = next - (pos+1); + len -= delta; + pos = rest.find('%', pos+1); + } while(pos != npos); + if(len <= output.len) + { + size_t prev = 0, wpos = 0; + auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; + auto appendchar = [&](char c) { output.str[wpos++] = c; }; + appendchar('<'); + appendstr(td.prefix); + pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + uint8_t val; + if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) + _RYML_CB_ERR(m_callbacks, "invalid URI character"); + appendstr(rest.range(prev, pos)); + appendchar((char)val); + prev = next; + pos = rest.find('%', pos+1); + } while(pos != npos); + _RYML_CB_ASSERT(m_callbacks, pos == npos); + _RYML_CB_ASSERT(m_callbacks, prev > 0); + _RYML_CB_ASSERT(m_callbacks, rest.len >= prev); + appendstr(rest.sub(prev)); + appendchar('>'); + _RYML_CB_ASSERT(m_callbacks, wpos == len); + } + } + return len; + } + } + return 0; // return 0 to signal that the tag is local and cannot be resolved +} + +namespace { +csubstr _transform_tag(Tree *t, csubstr tag, size_t node) +{ + size_t required_size = t->resolve_tag(substr{}, tag, node); + if(!required_size) + return tag; + const char *prev_arena = t->arena().str; + substr buf = t->alloc_arena(required_size); + _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); + size_t actual_size = t->resolve_tag(buf, tag, node); + _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); + return buf.first(actual_size); +} +void _resolve_tags(Tree *t, size_t node) +{ + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child)); + if(t->has_val(child) && t->has_val_tag(child)) + t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child)); + _resolve_tags(t, child); + } +} +size_t _count_resolved_tags_size(Tree const* t, size_t node) +{ + size_t sz = 0; + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + sz += t->resolve_tag(substr{}, t->key_tag(child), child); + if(t->has_val(child) && t->has_val_tag(child)) + sz += t->resolve_tag(substr{}, t->val_tag(child), child); + sz += _count_resolved_tags_size(t, child); + } + return sz; +} +} // namespace + +void Tree::resolve_tags() +{ + if(empty()) + return; + if(num_tag_directives() == 0) + return; + size_t needed_size = _count_resolved_tags_size(this, root_id()); + if(needed_size) + reserve_arena(arena_size() + needed_size); + _resolve_tags(this, root_id()); +} + + +//----------------------------------------------------------------------------- + +csubstr Tree::lookup_result::resolved() const +{ + csubstr p = path.first(path_pos); + if(p.ends_with('.')) + p = p.first(p.len-1); + return p; +} + +csubstr Tree::lookup_result::unresolved() const +{ + return path.sub(path_pos); +} + +void Tree::_advance(lookup_result *r, size_t more) const +{ + r->path_pos += more; + if(r->path.sub(r->path_pos).begins_with('.')) + ++r->path_pos; +} + +Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + if(path.empty()) + return r; + _lookup_path(&r); + if(r.target == NONE && r.closest == start) + r.closest = NONE; + return r; +} + +size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + if(parent_is_map(target)) + to_keyval(target, key(target), default_value); + else + to_val(target, default_value); + return target; +} + +size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + merge_with(src, src_node, target); + return target; +} + +size_t Tree::_lookup_path_or_create(csubstr path, size_t start) +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + _lookup_path(&r); + if(r.target != NONE) + { + C4_ASSERT(r.unresolved().empty()); + return r.target; + } + _lookup_path_modify(&r); + return r.target; +} + +void Tree::_lookup_path(lookup_result *r) const +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +void Tree::_lookup_path_modify(lookup_result *r) +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node_modify(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + csubstr prev = token.value; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = find_child(r->closest, token.value); + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx = 0; + _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); + node = child(r->closest, idx); + } + else + { + C4_NEVER_REACH(); + } + + if(node != NONE) + { + *parent = token; + } + else + { + csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos); + r->path_pos -= prev.len; + if(p.begins_with('.')) + r->path_pos -= 1u; + } + + return node; +} + +size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + to_map(r->closest, key(r->closest)); + else + to_map(r->closest); + } + else + { + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + else + { + size_t pos = NONE; + _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); + _RYML_CB_ASSERT(m_callbacks, pos != NONE); + node = child(r->closest, pos); + } + } + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = append_child(r->closest); + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_type.add(KEY); + } + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + { + node = find_child(r->closest, token.value); + if(node == NONE) + node = append_child(r->closest); + } + else + { + _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest)); + _add_flags(r->closest, MAP); + node = append_child(r->closest); + } + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_val.scalar = ""; + n->m_type.add(KEYVAL); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx; + if( ! from_chars(token.value, &idx)) + return NONE; + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + { + csubstr k = key(r->closest); + _clear_type(r->closest); + to_seq(r->closest, k); + } + else + { + _clear_type(r->closest); + to_seq(r->closest); + } + } + _RYML_CB_ASSERT(m_callbacks, is_container(r->closest)); + node = child(r->closest, idx); + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); + for(size_t i = num_children(r->closest); i <= idx; ++i) + { + node = append_child(r->closest); + if(i < idx) + { + if(is_map(r->closest)) + to_keyval(node, /*"~"*/{}, /*"~"*/{}); + else if(is_seq(r->closest)) + to_val(node, /*"~"*/{}); + } + } + } + } + else + { + C4_NEVER_REACH(); + } + + _RYML_CB_ASSERT(m_callbacks, node != NONE); + *parent = token; + return node; +} + +/** types of tokens: + * - seeing "map." ---> "map"/MAP + * - finishing "scalar" ---> "scalar"/KEYVAL + * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY) + * - seeing "[n]" ---> "[n]"/KEY + */ +Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const +{ + csubstr unres = r->unresolved(); + if(unres.empty()) + return {}; + + // is it an indexation like [0], [1], etc? + if(unres.begins_with('[')) + { + size_t pos = unres.find(']'); + if(pos == csubstr::npos) + return {}; + csubstr idx = unres.first(pos + 1); + _advance(r, pos + 1); + return {idx, KEY}; + } + + // no. so it must be a name + size_t pos = unres.first_of(".["); + if(pos == csubstr::npos) + { + _advance(r, unres.len); + NodeType t; + if(( ! parent) || parent.type.is_seq()) + return {unres, VAL}; + return {unres, KEYVAL}; + } + + // it's either a map or a seq + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '['); + if(unres[pos] == '.') + { + _RYML_CB_ASSERT(m_callbacks, pos != 0); + _advance(r, pos + 1); + return {unres.first(pos), MAP}; + } + + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '['); + _advance(r, pos); + return {unres.first(pos), SEQ}; +} + + +} // namespace ryml +} // namespace c4 + + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_MSVC_POP diff --git a/thirdparty/ryml/src/c4/yml/tree.hpp b/thirdparty/ryml/src/c4/yml/tree.hpp new file mode 100644 index 000000000..5adc5583a --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/tree.hpp @@ -0,0 +1,1495 @@ +#ifndef _C4_YML_TREE_HPP_ +#define _C4_YML_TREE_HPP_ + + +#include "c4/error.hpp" +#include "c4/types.hpp" +#ifndef _C4_YML_COMMON_HPP_ +#include "c4/yml/common.hpp" +#endif + +#include <c4/charconv.hpp> +#include <cmath> +#include <limits> + + +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct +C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC("-Wtype-limits") + + +namespace c4 { +namespace yml { + +struct NodeScalar; +struct NodeInit; +struct NodeData; +class NodeRef; +class ConstNodeRef; +class Tree; + + +/** encode a floating point value to a string. */ +template<class T> +size_t to_chars_float(substr buf, T val) +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); + static_assert(std::is_floating_point<T>::value, "must be floating point"); + if(C4_UNLIKELY(std::isnan(val))) + return to_chars(buf, csubstr(".nan")); + else if(C4_UNLIKELY(val == std::numeric_limits<T>::infinity())) + return to_chars(buf, csubstr(".inf")); + else if(C4_UNLIKELY(val == -std::numeric_limits<T>::infinity())) + return to_chars(buf, csubstr("-.inf")); + return to_chars(buf, val); + C4_SUPPRESS_WARNING_GCC_CLANG_POP +} + + +/** decode a floating point from string. Accepts special values: .nan, + * .inf, -.inf */ +template<class T> +bool from_chars_float(csubstr buf, T *C4_RESTRICT val) +{ + static_assert(std::is_floating_point<T>::value, "must be floating point"); + if(C4_LIKELY(from_chars(buf, val))) + { + return true; + } + else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) + { + *val = std::numeric_limits<T>::quiet_NaN(); + return true; + } + else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) + { + *val = std::numeric_limits<T>::infinity(); + return true; + } + else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) + { + *val = -std::numeric_limits<T>::infinity(); + return true; + } + else + { + return false; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** the integral type necessary to cover all the bits marking node tags */ +using tag_bits = uint16_t; + +/** a bit mask for marking tags for types */ +typedef enum : tag_bits { + // container types + TAG_NONE = 0, + TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ + TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ + TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ + TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ + TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ + // scalar types + TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ + TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ + TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ + TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ + TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ + TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ + TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ + TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ + TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ + TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ +} YamlTag_e; + +YamlTag_e to_tag(csubstr tag); +csubstr from_tag(YamlTag_e tag); +csubstr from_tag_long(YamlTag_e tag); +csubstr normalize_tag(csubstr tag); +csubstr normalize_tag_long(csubstr tag); + +struct TagDirective +{ + /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr handle; + /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr prefix; + /** The next node to which this tag directive applies */ + size_t next_node_id; +}; + +#ifndef RYML_MAX_TAG_DIRECTIVES +/** the maximum number of tag directives in a Tree */ +#define RYML_MAX_TAG_DIRECTIVES 4 +#endif + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + +/** the integral type necessary to cover all the bits marking node types */ +using type_bits = uint64_t; + + +/** a bit mask for marking node types */ +typedef enum : type_bits { + // a convenience define, undefined below + #define c4bit(v) (type_bits(1) << v) + NOTYPE = 0, ///< no node type is set + VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value + KEY = c4bit(1), ///< is member of a map, must have non-empty key + MAP = c4bit(2), ///< a map: a parent of keyvals + SEQ = c4bit(3), ///< a seq: a parent of vals + DOC = c4bit(4), ///< a document + STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs + KEYREF = c4bit(6), ///< a *reference: the key references an &anchor + VALREF = c4bit(7), ///< a *reference: the val references an &anchor + KEYANCH = c4bit(8), ///< the key has an &anchor + VALANCH = c4bit(9), ///< the val has an &anchor + KEYTAG = c4bit(10), ///< the key has an explicit tag/type + VALTAG = c4bit(11), ///< the val has an explicit tag/type + _TYMASK = c4bit(12)-1, // all the bits up to here + VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | + KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | + KEYVAL = KEY|VAL, + KEYSEQ = KEY|SEQ, + KEYMAP = KEY|MAP, + DOCMAP = DOC|MAP, + DOCSEQ = DOC|SEQ, + DOCVAL = DOC|VAL, + _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, + _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, + // these flags are from a work in progress and should not be used yet + _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') + _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') + _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val') + _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | + _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | + _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > + _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > + _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted + _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted + _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted + _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted + _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) + _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) + _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, + _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, + _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents + _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents + _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents + _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents + _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents + _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents + #undef c4bit +} NodeType_e; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** wraps a NodeType_e element with some syntactic sugar and predicates */ +struct NodeType +{ +public: + + NodeType_e type; + +public: + + C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} + C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} + C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} + + C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } + static const char* type_str(NodeType_e t); + + C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } + C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } + + C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } + C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } + + C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } + C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } + + C4_ALWAYS_INLINE void clear() { type = NOTYPE; } + +public: + + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } + + C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } + C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } + C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } + C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } + C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } + C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } + C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } + C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } + C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } + C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; } + C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } + C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } + C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); } + C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } + C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } + C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } + C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } + C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } + + // these predicates are a work in progress and subject to change. Don't use yet. + C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } + C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } + C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a node scalar is a csubstr, which may be tagged and anchored. */ +struct NodeScalar +{ + csubstr tag; + csubstr scalar; + csubstr anchor; + +public: + + /// initialize as an empty scalar + inline NodeScalar() noexcept : tag(), scalar(), anchor() {} + + /// initialize as an untagged scalar + template<size_t N> + inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} + inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} + + /// initialize as a tagged scalar + template<size_t N, size_t M> + inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} + inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} + +public: + + ~NodeScalar() noexcept = default; + NodeScalar(NodeScalar &&) noexcept = default; + NodeScalar(NodeScalar const&) noexcept = default; + NodeScalar& operator= (NodeScalar &&) noexcept = default; + NodeScalar& operator= (NodeScalar const&) noexcept = default; + +public: + + bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } + + void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } + + void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept + { + csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; + anchor = trimmed; + if((!has_scalar) || !scalar.ends_with(trimmed)) + scalar = ref; + } +}; +C4_MUST_BE_TRIVIAL_COPY(NodeScalar); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** convenience class to initialize nodes */ +struct NodeInit +{ + + NodeType type; + NodeScalar key; + NodeScalar val; + +public: + + /// initialize as an empty node + NodeInit() : type(NOTYPE), key(), val() {} + /// initialize as a typed node + NodeInit(NodeType_e t) : type(t), key(), val() {} + /// initialize as a sequence member + NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } + /// initialize as a mapping member + NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type + NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type (eg SEQ or MAP) + NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } + +public: + + void clear() + { + type.clear(); + key.clear(); + val.clear(); + } + + void _add_flags(type_bits more_flags=0) + { + type = (type|more_flags); + if( ! key.tag.empty()) + type = (type|KEYTAG); + if( ! val.tag.empty()) + type = (type|VALTAG); + if( ! key.anchor.empty()) + type = (type|KEYANCH); + if( ! val.anchor.empty()) + type = (type|VALANCH); + } + + bool _check() const + { + // key cannot be empty + RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); + // key tag cannot be empty + RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); + // val may be empty even though VAL is set. But when VAL is not set, val must be empty + RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); + // val tag cannot be empty + RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); + return true; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** contains the data for each YAML node. */ +struct NodeData +{ + NodeType m_type; + + NodeScalar m_key; + NodeScalar m_val; + + size_t m_parent; + size_t m_first_child; + size_t m_last_child; + size_t m_next_sibling; + size_t m_prev_sibling; +}; +C4_MUST_BE_TRIVIAL_COPY(NodeData); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +class RYML_EXPORT Tree +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Tree() : Tree(get_callbacks()) {} + Tree(Callbacks const& cb); + Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} + Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); + + ~Tree(); + + Tree(Tree const& that) noexcept; + Tree(Tree && that) noexcept; + + Tree& operator= (Tree const& that) noexcept; + Tree& operator= (Tree && that) noexcept; + + /** @} */ + +public: + + /** @name memory and sizing */ + /** @{ */ + + void reserve(size_t node_capacity); + + /** clear the tree and zero every node + * @note does NOT clear the arena + * @see clear_arena() */ + void clear(); + inline void clear_arena() { m_arena_pos = 0; } + + inline bool empty() const { return m_size == 0; } + + inline size_t size() const { return m_size; } + inline size_t capacity() const { return m_cap; } + inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } + + Callbacks const& callbacks() const { return m_callbacks; } + void callbacks(Callbacks const& cb) { m_callbacks = cb; } + + /** @} */ + +public: + + /** @name node getters */ + /** @{ */ + + //! get the index of a node belonging to this tree. + //! @p n can be nullptr, in which case a + size_t id(NodeData const* n) const + { + if( ! n) + { + return NONE; + } + RYML_ASSERT(n >= m_buf && n < m_buf + m_cap); + return static_cast<size_t>(n - m_buf); + } + + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned + inline NodeData *get(size_t i) + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned. + inline NodeData const *get(size_t i) const + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData * _p(size_t i) { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + + //! Get the id of the root node + size_t root_id() { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + //! Get the id of the root node + size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + + //! Get a NodeRef of a node by id + NodeRef ref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef ref(size_t id) const; + //! Get a NodeRef of a node by id + ConstNodeRef cref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef cref(size_t id) const; + + //! Get the root as a NodeRef + NodeRef rootref(); + //! Get the root as a NodeRef + ConstNodeRef rootref() const; + //! Get the root as a NodeRef + ConstNodeRef crootref(); + //! Get the root as a NodeRef + ConstNodeRef crootref() const; + + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + NodeRef operator[] (csubstr key); + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + ConstNodeRef operator[] (csubstr key) const; + + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + NodeRef operator[] (size_t i); + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + ConstNodeRef operator[] (size_t i) const; + + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + NodeRef docref(size_t i); + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + ConstNodeRef docref(size_t i) const; + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + NodeType type(size_t node) const { return _p(node)->m_type; } + const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } + + csubstr const& key (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; } + csubstr const& key_tag (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; } + csubstr const& key_ref (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } + csubstr const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } + NodeScalar const& keysc (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; } + + csubstr const& val (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; } + csubstr const& val_tag (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; } + csubstr const& val_ref (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } + csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } + NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } + + /** @} */ + +public: + + /** @name node predicates */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } + C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } + C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } + C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } + C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } + C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } + C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } + C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } + C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } + C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } + C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } + C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } + C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } + C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } + C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } + C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } + C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } + C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } + C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } + C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } + C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } + C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } + C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } + C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } + + C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } + + /** true when key and val are empty, and has no children */ + C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } + /** true when the node has an anchor named a */ + C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); } + C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); } + static bool _is_null(csubstr s) noexcept + { + return s.str == nullptr || + s == "~" || + s == "null" || + s == "Null" || + s == "NULL"; + } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + + bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + + /** true if @p node has a child with id @p ch */ + bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } + /** true if @p node has a child with key @p key */ + bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } + /** true if @p node has any children key */ + bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } + + /** true if @p node has a sibling with id @p sib */ + bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } + /** true if one of the node's siblings has the given key */ + bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } + /** true if node is not a single child */ + bool has_other_siblings(size_t node) const + { + NodeData const *n = _p(node); + if(C4_LIKELY(n->m_parent != NONE)) + { + n = _p(n->m_parent); + return n->m_first_child != n->m_last_child; + } + return false; + } + + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + size_t parent(size_t node) const { return _p(node)->m_parent; } + + size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } + size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } + + /** O(#num_children) */ + size_t num_children(size_t node) const; + size_t child_pos(size_t node, size_t ch) const; + size_t first_child(size_t node) const { return _p(node)->m_first_child; } + size_t last_child(size_t node) const { return _p(node)->m_last_child; } + size_t child(size_t node, size_t pos) const; + size_t find_child(size_t node, csubstr const& key) const; + + /** O(#num_siblings) */ + /** counts with this */ + size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } + /** does not count with this */ + size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; } + size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } + size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } + size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } + size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } + size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } + + size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); + void to_map(size_t node, csubstr key, type_bits more_flags=0); + void to_seq(size_t node, csubstr key, type_bits more_flags=0); + void to_val(size_t node, csubstr val, type_bits more_flags=0); + void to_map(size_t node, type_bits more_flags=0); + void to_seq(size_t node, type_bits more_flags=0); + void to_doc(size_t node, type_bits more_flags=0); + void to_stream(size_t node, type_bits more_flags=0); + + void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; } + void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; } + + void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } + void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + + void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } + void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } + void set_key_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } + void set_val_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + + void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } + void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } + void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } + void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } + void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } + + /** @} */ + +public: + + /** @name tree modifiers */ + /** @{ */ + + /** reorder the tree in memory so that all the nodes are stored + * in a linear sequence when visited in depth-first order. + * This will invalidate existing ids, since the node id is its + * position in the node array. */ + void reorder(); + + /** Resolve references (aliases <- anchors) in the tree. + * + * Dereferencing is opt-in; after parsing, Tree::resolve() + * has to be called explicitly for obtaining resolved references in the + * tree. This method will resolve all references and substitute the + * anchored values in place of the reference. + * + * This method first does a full traversal of the tree to gather all + * anchors and references in a separate collection, then it goes through + * that collection to locate the names, which it does by obeying the YAML + * standard diktat that "an alias node refers to the most recent node in + * the serialization having the specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear complexity + * (from the initial traversal). This potential cost is the reason for + * requiring an explicit call. + */ + void resolve(); + + /** @} */ + +public: + + /** @name tag directives */ + /** @{ */ + + void resolve_tags(); + + size_t num_tag_directives() const; + size_t add_tag_directive(TagDirective const& td); + void clear_tag_directives(); + + size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; + csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const + { + size_t needed = resolve_tag(output, tag, node_id); + return needed <= output.len ? output.first(needed) : output; + } + + using tag_directive_const_iterator = TagDirective const*; + tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } + tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + + struct TagDirectiveProxy + { + tag_directive_const_iterator b, e; + tag_directive_const_iterator begin() const { return b; } + tag_directive_const_iterator end() const { return e; } + }; + + TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } + + /** @} */ + +public: + + /** @name modifying hierarchy */ + /** @{ */ + + /** create and insert a new child of @p parent. insert after the (to-be) + * sibling @p after, which must be a child of @p parent. To insert as the + * first child, set after to NONE */ + C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) + { + RYML_ASSERT(parent != NONE); + RYML_ASSERT(is_container(parent) || is_root(parent)); + RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent)); + size_t child = _claim(); + _set_hierarchy(child, parent, after); + return child; + } + /** create and insert a node as the first child of @p parent */ + C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } + /** create and insert a node as the last child of @p parent */ + C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + //! create and insert a new sibling of n. insert after "after" + C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) + { + return insert_child(_p(node)->m_parent, after); + } + /** create and insert a node as the first node of @p parent */ + C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } + +public: + + /** remove an entire branch at once: ie remove the children and the node itself */ + inline void remove(size_t node) + { + remove_children(node); + _release(node); + } + + /** remove all the node's children, but keep the node itself */ + void remove_children(size_t node); + + /** change the @p type of the node to one of MAP, SEQ or VAL. @p + * type must have one and only one of MAP,SEQ,VAL; @p type may + * possibly have KEY, but if it does, then the @p node must also + * have KEY. Changing to the same type is a no-op. Otherwise, + * changing to a different type will initialize the node with an + * empty value of the desired type: changing to VAL will + * initialize with a null scalar (~), changing to MAP will + * initialize with an empty map ({}), and changing to SEQ will + * initialize with an empty seq ([]). */ + bool change_type(size_t node, NodeType type); + + bool change_type(size_t node, type_bits type) + { + return change_type(node, (NodeType)type); + } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +public: + + /** change the node's position in the parent */ + void move(size_t node, size_t after); + + /** change the node's parent and position */ + void move(size_t node, size_t new_parent, size_t after); + + /** change the node's parent and position to a different tree + * @return the index of the new node in the destination tree */ + size_t move(Tree * src, size_t node, size_t new_parent, size_t after); + + /** ensure the first node is a stream. Eg, change this tree + * + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * to + * + * STREAM + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * If the root is already a stream, this is a no-op. + */ + void set_root_as_stream(); + +public: + + /** recursively duplicate a node from this tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(size_t node, size_t new_parent, size_t after); + /** recursively duplicate a node from a different tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); + + /** recursively duplicate the node's children (but not the node) + * @return the index of the last duplicated child */ + size_t duplicate_children(size_t node, size_t parent, size_t after); + /** recursively duplicate the node's children (but not the node), where + * the node is from a different tree + * @return the index of the last duplicated child */ + size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); + + void duplicate_contents(size_t node, size_t where); + void duplicate_contents(Tree const* src, size_t node, size_t where); + + /** duplicate the node's children (but not the node) in a new parent, but + * omit repetitions where a duplicated node has the same key (in maps) or + * value (in seqs). If one of the duplicated children has the same key + * (in maps) or value (in seqs) as one of the parent's children, the one + * that is placed closest to the end will prevail. */ + size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); + size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); + +public: + + void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); + + /** @} */ + +public: + + /** @name internal string arena */ + /** @{ */ + + /** get the current size of the tree's internal arena */ + RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; } + /** get the current size of the tree's internal arena */ + inline size_t arena_size() const { return m_arena_pos; } + /** get the current capacity of the tree's internal arena */ + inline size_t arena_capacity() const { return m_arena.len; } + /** get the current slack of the tree's internal arena */ + inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } + + /** get the current arena */ + substr arena() const { return m_arena.first(m_arena_pos); } + + /** return true if the given substring is part of the tree's string arena */ + bool in_arena(csubstr s) const + { + return m_arena.is_super(s); + } + + /** serialize the given floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + template<class T> + typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars_float(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars_float(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** serialize the given non-floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + template<class T> + typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** serialize the given csubstr to the tree's arena, growing the + * arena as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + csubstr to_arena(csubstr a) + { + if(a.len > 0) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + return _request_span(num); + } + else + { + if(a.str == nullptr) + { + return csubstr{}; + } + else if(m_arena.str == nullptr) + { + // Arena is empty and we want to store a non-null + // zero-length string. + // Even though the string has zero length, we need + // some "memory" to store a non-nullptr string + _grow_arena(1); + } + return _request_span(0); + } + } + C4_ALWAYS_INLINE csubstr to_arena(const char *s) + { + return to_arena(to_csubstr(s)); + } + C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t) + { + return csubstr{}; + } + + /** copy the given substr to the tree's arena, growing it by the + * required size + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + substr copy_to_arena(csubstr s) + { + substr cp = alloc_arena(s.len); + RYML_ASSERT(cp.len == s.len); + RYML_ASSERT(!s.overlaps(cp)); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_PUSH + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 + C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior + #endif + if(s.len) + memcpy(cp.str, s.str, s.len); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_POP + #endif + return cp; + } + + /** grow the tree's string arena by the given size and return a substr + * of the added portion + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena(). + * + * @see reserve_arena() */ + substr alloc_arena(size_t sz) + { + if(sz > arena_slack()) + _grow_arena(sz - arena_slack()); + substr s = _request_span(sz); + return s; + } + + /** ensure the tree's internal string arena is at least the given capacity + * @note This operation has a potential complexity of O(numNodes)+O(arenasize). + * Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. */ + void reserve_arena(size_t arena_cap) + { + if(arena_cap > m_arena.len) + { + substr buf; + buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data); + buf.len = arena_cap; + if(m_arena.str) + { + RYML_ASSERT(m_arena.len >= 0); + _relocate(buf); // does a memcpy and changes nodes using the arena + m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data); + } + m_arena = buf; + } + } + + /** @} */ + +private: + + substr _grow_arena(size_t more) + { + size_t cap = m_arena.len + more; + cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap; + cap = cap < 64 ? 64 : cap; + reserve_arena(cap); + return m_arena.sub(m_arena_pos); + } + + substr _request_span(size_t sz) + { + substr s; + s = m_arena.sub(m_arena_pos, sz); + m_arena_pos += sz; + return s; + } + + substr _relocated(csubstr s, substr next_arena) const + { + RYML_ASSERT(m_arena.is_super(s)); + RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s)); + auto pos = (s.str - m_arena.str); + substr r(next_arena.str + pos, s.len); + RYML_ASSERT(r.str - next_arena.str == pos); + RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r)); + return r; + } + +public: + + /** @name lookup */ + /** @{ */ + + struct lookup_result + { + size_t target; + size_t closest; + size_t path_pos; + csubstr path; + + inline operator bool() const { return target != NONE; } + + lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} + lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} + + /** get the part ot the input path that was resolved */ + csubstr resolved() const; + /** get the part ot the input path that was unresolved */ + csubstr unresolved() const; + }; + + /** for example foo.bar[0].baz */ + lookup_result lookup_path(csubstr path, size_t start=NONE) const; + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * default value. + * @see lookup_path() */ + size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * branch @p src_node (from the tree @p src). + * @see lookup_path() */ + size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); + + /** @} */ + +private: + + struct _lookup_path_token + { + csubstr value; + NodeType type; + _lookup_path_token() : value(), type() {} + _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {} + inline operator bool() const { return type != NOTYPE; } + bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } + }; + + size_t _lookup_path_or_create(csubstr path, size_t start); + + void _lookup_path (lookup_result *r) const; + void _lookup_path_modify(lookup_result *r); + + size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; + size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); + + void _advance(lookup_result *r, size_t more) const; + + _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const; + +private: + + void _clear(); + void _free(); + void _copy(Tree const& that); + void _move(Tree & that); + + void _relocate(substr next_arena); + +public: + + #if ! RYML_USE_ASSERT + C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} + #else + void _check_next_flags(size_t node, type_bits f) + { + auto n = _p(node); + type_bits o = n->m_type; // old + C4_UNUSED(o); + if(f & MAP) + { + RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val"); + RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first"); + } + else if(f & SEQ) + { + RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val"); + RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first"); + } + if(f & KEY) + { + RYML_ASSERT(!is_root(node)); + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid)); + } + if((f & VAL) && !is_root(node)) + { + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid) || is_seq(pid)); + } + } + #endif + + inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } + + inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } + + inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } + + void _set_key(size_t node, csubstr key, type_bits more_flags=0) + { + _p(node)->m_key.scalar = key; + _add_flags(node, KEY|more_flags); + } + void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) + { + _p(node)->m_key = key; + _add_flags(node, KEY|more_flags); + } + + void _set_val(size_t node, csubstr val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT(!is_seq(node) && !is_map(node)); + _p(node)->m_val.scalar = val; + _add_flags(node, VAL|more_flags); + } + void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT( ! is_container(node)); + _p(node)->m_val = val; + _add_flags(node, VAL|more_flags); + } + + void _set(size_t node, NodeInit const& i) + { + RYML_ASSERT(i._check()); + NodeData *n = _p(node); + RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); + _add_flags(node, i.type); + if(n->m_key.scalar.empty()) + { + if( ! i.key.scalar.empty()) + { + _set_key(node, i.key.scalar); + } + } + n->m_key.tag = i.key.tag; + n->m_val = i.val; + } + + void _set_parent_as_container_if_needed(size_t in) + { + NodeData const* n = _p(in); + size_t ip = parent(in); + if(ip != NONE) + { + if( ! (is_seq(ip) || is_map(ip))) + { + if((in == first_child(ip)) && (in == last_child(ip))) + { + if( ! n->m_key.empty() || has_key(in)) + { + _add_flags(ip, MAP); + } + else + { + _add_flags(ip, SEQ); + } + } + } + } + } + + void _seq2map(size_t node) + { + RYML_ASSERT(is_seq(node)); + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + NodeData *C4_RESTRICT ch = _p(i); + if(ch->m_type.is_keyval()) + continue; + ch->m_type.add(KEY); + ch->m_key = ch->m_val; + } + auto *C4_RESTRICT n = _p(node); + n->m_type.rem(SEQ); + n->m_type.add(MAP); + } + + size_t _do_reorder(size_t *node, size_t count); + + void _swap(size_t n_, size_t m_); + void _swap_props(size_t n_, size_t m_); + void _swap_hierarchy(size_t n_, size_t m_); + void _copy_hierarchy(size_t dst_, size_t src_); + + inline void _copy_props(size_t dst_, size_t src_) + { + _copy_props(dst_, this, src_); + } + + inline void _copy_props_wo_key(size_t dst_, size_t src_) + { + _copy_props_wo_key(dst_, this, src_); + } + + void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = src.m_type; + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK); + dst.m_val = src.m_val; + } + + inline void _clear_type(size_t node) + { + _p(node)->m_type = NOTYPE; + } + + inline void _clear(size_t node) + { + auto *C4_RESTRICT n = _p(node); + n->m_type = NOTYPE; + n->m_key.clear(); + n->m_val.clear(); + n->m_parent = NONE; + n->m_first_child = NONE; + n->m_last_child = NONE; + } + + inline void _clear_key(size_t node) + { + _p(node)->m_key.clear(); + _rem_flags(node, KEY); + } + + inline void _clear_val(size_t node) + { + _p(node)->m_val.clear(); + _rem_flags(node, VAL); + } + +private: + + void _clear_range(size_t first, size_t num); + + size_t _claim(); + void _claim_root(); + void _release(size_t node); + void _free_list_add(size_t node); + void _free_list_rem(size_t node); + + void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); + void _rem_hierarchy(size_t node); + +public: + + // members are exposed, but you should NOT access them directly + + NodeData * m_buf; + size_t m_cap; + + size_t m_size; + + size_t m_free_head; + size_t m_free_tail; + + substr m_arena; + size_t m_arena_pos; + + Callbacks m_callbacks; + + TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES]; + +}; + +} // namespace yml +} // namespace c4 + + +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +#endif /* _C4_YML_TREE_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/writer.hpp b/thirdparty/ryml/src/c4/yml/writer.hpp new file mode 100644 index 000000000..29e32d47b --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/writer.hpp @@ -0,0 +1,229 @@ +#ifndef _C4_YML_WRITER_HPP_ +#define _C4_YML_WRITER_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "./common.hpp" +#endif + +#include <c4/substr.hpp> +#include <stdio.h> // fwrite(), fputc() +#include <string.h> // memcpy() + + +namespace c4 { +namespace yml { + + +/** Repeat-Character: a character to be written a number of times. */ +struct RepC +{ + char c; + size_t num_times; +}; +inline RepC indent_to(size_t num_levels) +{ + return {' ', size_t(2) * num_levels}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to a file. Defaults to stdout. */ +struct WriterFile +{ + FILE * m_file; + size_t m_pos; + + WriterFile(FILE *f = nullptr) : m_file(f ? f : stdout), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template<size_t N> + inline void _do_write(const char (&a)[N]) + { + fwrite(a, sizeof(char), N - 1, m_file); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + fwrite(sp.str, sizeof(csubstr::char_type), sp.len, m_file); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + fputc(c, m_file); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + fputc(rc.c, m_file); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to an STL-like ostream. */ +template<class OStream> +struct WriterOStream +{ + OStream& m_stream; + size_t m_pos; + + WriterOStream(OStream &s) : m_stream(s), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template<size_t N> + inline void _do_write(const char (&a)[N]) + { + m_stream.write(a, N - 1); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + m_stream.write(sp.str, sp.len); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + m_stream.put(c); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_stream.put(rc.c); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** a writer to a substr */ +struct WriterBuf +{ + substr m_buf; + size_t m_pos; + + WriterBuf(substr sp) : m_buf(sp), m_pos(0) {} + + inline substr _get(bool error_on_excess) + { + if(m_pos <= m_buf.len) + { + return m_buf.first(m_pos); + } + if(error_on_excess) + { + c4::yml::error("not enough space in the given buffer"); + } + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template<size_t N> + inline void _do_write(const char (&a)[N]) + { + RYML_ASSERT( ! m_buf.overlaps(a)); + if(m_pos + N-1 <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), a, N-1); + } + m_pos += N-1; + } + + inline void _do_write(csubstr sp) + { + if(sp.empty()) return; + RYML_ASSERT( ! sp.overlaps(m_buf)); + if(m_pos + sp.len <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), sp.str, sp.len); + } + m_pos += sp.len; + } + + inline void _do_write(const char c) + { + if(m_pos + 1 <= m_buf.len) + { + m_buf[m_pos] = c; + } + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + if(m_pos + rc.num_times <= m_buf.len) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_buf[m_pos + i] = rc.c; + } + } + m_pos += rc.num_times; + } +}; + + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_WRITER_HPP_ */ diff --git a/thirdparty/ryml/src/c4/yml/yml.hpp b/thirdparty/ryml/src/c4/yml/yml.hpp new file mode 100644 index 000000000..36f78fe82 --- /dev/null +++ b/thirdparty/ryml/src/c4/yml/yml.hpp @@ -0,0 +1,10 @@ +#ifndef _C4_YML_YML_HPP_ +#define _C4_YML_YML_HPP_ + +#include "c4/yml/tree.hpp" +#include "c4/yml/node.hpp" +#include "c4/yml/emit.hpp" +#include "c4/yml/parse.hpp" +#include "c4/yml/preprocess.hpp" + +#endif // _C4_YML_YML_HPP_ |